feat: ✨ AI Read File Tool, Configurable system prompts and loading lots of llms
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
data/*
|
||||
*.log
|
||||
|
||||
# Python-generated files
|
||||
__pycache__/
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
Read File Tool for Retrieve Agent
|
||||
---Read File Tool for Retrieve Agent---
|
||||
|
||||
Easy Config of system prompts
|
||||
---Easy Config of system prompts---
|
||||
examples into prompts & better prompts
|
||||
|
||||
LMS CLI script to load multiple models and to make each model accept multiple inferences
|
||||
|
||||
context engineering, - only include vector hits that are x distance?
|
||||
|
||||
AI in the middle - make the ai generate the string for vector search
|
||||
@@ -19,7 +21,3 @@ QA specific embedding models?
|
||||
|
||||
Evaluation metrics, how good is it doing?
|
||||
rate my response!?
|
||||
examples into prompts & better prompts
|
||||
|
||||
common model attributes - temp & top-k
|
||||
|
||||
|
||||
Executable
+10
@@ -0,0 +1,10 @@
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-0" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-1" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-2" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-3" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-4" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-5" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-6" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-7" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-8" --ttl 1800
|
||||
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-9" --ttl 1800
|
||||
+19
-6
@@ -5,7 +5,7 @@ api:
|
||||
|
||||
# --- Model Settings ---
|
||||
models:
|
||||
enrich: "lm_studio/qwen/qwen3-8b"
|
||||
enrich: "lm_studio/qwen-"
|
||||
embedding: "text-embedding-qwen3-embedding-8b"
|
||||
retrieval: "lm_studio/qwen/qwen3-30b-a3b-2507"
|
||||
|
||||
@@ -13,13 +13,26 @@ models:
|
||||
ingestion:
|
||||
data_dir: "/home/devin/DnD"
|
||||
db_path: "./data/dmv.db"
|
||||
max_workers: 8
|
||||
active_llms: 10
|
||||
parallel_requests_per_llm: 4
|
||||
chunk_size: 800
|
||||
chunk_overlap: 100
|
||||
embedding_batch_size: 32
|
||||
time_file_location: "./data/time_file.txt"
|
||||
|
||||
# --- Retrieval Settings ---
|
||||
retrieval:
|
||||
top_k: 4
|
||||
context_limit: 10000 # Max characters from full file context
|
||||
# ---- Agent Settings ----
|
||||
ingestion_agent:
|
||||
ingestion_signature: |
|
||||
You are an expert Dungeon Master's assistant.
|
||||
Analyze the provided notes and extract a concise synopsis and relevant metadata.
|
||||
synopsis = A one-sentence summary of the document.
|
||||
tags = Relevant tags (NPCs, Locations, Items, Plot Points).
|
||||
entities = a list of Key names of people, places, or factions.
|
||||
"note -> synopsis:str, tags: list[str], entities: list[str]"
|
||||
|
||||
retrieval_agent:
|
||||
retrieval_signature: |
|
||||
You are an expert Dungeon Master's assistant.
|
||||
Given the context and the question, answer the question.
|
||||
Do not make things up, base all of your answers on the context.
|
||||
Always site your sources
|
||||
|
||||
+7
-3
@@ -1,12 +1,16 @@
|
||||
import requests
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from config_loader import load_config
|
||||
|
||||
CFG = load_config()
|
||||
API_BASE = CFG["api"]["base_url"]
|
||||
API_VERSION = CFG["api"]["api_version"]
|
||||
|
||||
class LocalLMEmbeddings(Embeddings):
|
||||
def __init__(
|
||||
self, model: str, base_url: str = "http://192.168.0.49:1234", batch_size: int = 32
|
||||
self, model: str, base_url: str = API_BASE, batch_size: int = 32
|
||||
):
|
||||
self.url = f"{base_url}/v1/embeddings"
|
||||
self.url = f"{base_url}/{API_VERSION}/embeddings"
|
||||
self.model = model
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -27,7 +31,7 @@ class LocalLMEmbeddings(Embeddings):
|
||||
return [[] for _ in input_texts]
|
||||
|
||||
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Splits 500+ chunks into batches of 32 and processes them."""
|
||||
"""Splits chunks into batches of 32 and processes them."""
|
||||
all_embeddings = []
|
||||
|
||||
for i in range(0, len(texts), self.batch_size):
|
||||
|
||||
@@ -1,21 +1,16 @@
|
||||
import dspy
|
||||
from typing import List
|
||||
from config_loader import load_config
|
||||
|
||||
CFG = load_config()
|
||||
INGESTION_CONFIG = CFG["ingestion_agent"]
|
||||
|
||||
class IngestionSignature(dspy.Signature):
|
||||
"""You are an expert Dungeon Master's assistant.
|
||||
Analyze the provided notes and extract a concise synopsis and relevant metadata.
|
||||
synopsis = A one-sentence summary of the document.
|
||||
tags = Relevant tags (NPCs, Locations, Items, Plot Points).
|
||||
entities = Key names of people, places, or factions.
|
||||
"note -> synopsis:str, tags: list[str], entities: list[str]"
|
||||
/no_think
|
||||
"""
|
||||
f"{INGESTION_CONFIG["ingestion_signature"]}"
|
||||
|
||||
note: str = dspy.InputField(desc="The DM notes or session recap content.")
|
||||
answer: dict[str,str|List] = dspy.OutputField(desc="the metadata dictionary with the keys; synopsis, tags, entities")
|
||||
|
||||
|
||||
|
||||
class IngestionAgent(dspy.Module):
|
||||
def __init__(self):
|
||||
self.ingest = dspy.Predict(IngestionSignature)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# from pathlib import Path
|
||||
import os
|
||||
import turso
|
||||
import dspy
|
||||
# from langchain_community.vectorstores import FAISS
|
||||
|
||||
|
||||
from config_loader import load_config
|
||||
from embedding import LocalLMEmbeddings
|
||||
@@ -11,11 +11,10 @@ CFG = load_config()
|
||||
DATABASE_PATH = CFG["ingestion"]["db_path"]
|
||||
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
||||
API_BASE = CFG["api"]["base_url"]
|
||||
RETRIEVAL_CONFIG = CFG["retrieval_agent"]
|
||||
|
||||
|
||||
# Inside your retrieval logic:
|
||||
def retrieve_from_turso(embedded_question, k=5):
|
||||
# Example query: search for relevant notes using full-text search or embedding similarity
|
||||
# Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available
|
||||
query = f"""
|
||||
SELECT file_path, synopsis, tags, entities, chunk_data,
|
||||
vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance
|
||||
@@ -31,8 +30,7 @@ def retrieve_from_turso(embedded_question, k=5):
|
||||
|
||||
# --- DSPy Signature ---
|
||||
class DnDContextQA(dspy.Signature):
|
||||
"""Answer DnD campaign questions using provided details.
|
||||
"""
|
||||
f"{RETRIEVAL_CONFIG["retrieval_signature"]}"
|
||||
|
||||
context = dspy.InputField(
|
||||
desc="Relevant chunks and metadata from the campaign notes."
|
||||
@@ -49,7 +47,11 @@ class DnDRAG(dspy.Module):
|
||||
base_url=API_BASE,
|
||||
batch_size=1, # we only send 1 question at a time.
|
||||
)
|
||||
self.generate_answer = dspy.ChainOfThought(DnDContextQA)
|
||||
# Tools exposed to the ReAct loop
|
||||
self.tools = [
|
||||
self.load_file
|
||||
]
|
||||
self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)
|
||||
|
||||
def forward(self, question):
|
||||
# Use Turso to retrieve relevant notes
|
||||
@@ -73,12 +75,23 @@ tags: {tags},
|
||||
entities: {entities}
|
||||
{content}
|
||||
""")
|
||||
|
||||
print('Closest embedding hits')
|
||||
for part in context_parts:
|
||||
print(part)
|
||||
|
||||
# print('Closest embedding hits')
|
||||
# for part in context_parts:
|
||||
# print(part)
|
||||
|
||||
context = "\n\n".join(context_parts)
|
||||
|
||||
prediction = self.generate_answer(context=context, question=question)
|
||||
return dspy.Prediction(answer=prediction.answer, context=context)
|
||||
|
||||
def load_file(self, file_path) -> str | None:
|
||||
"""Load and return specified file."""
|
||||
if os.path.exists(file_path):
|
||||
try:
|
||||
with open(file_path) as file:
|
||||
return file.read()
|
||||
except Exception:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
+6
-4
@@ -19,7 +19,9 @@ MODEL_BASE = CFG["models"]["enrich"]
|
||||
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
||||
API_BASE = CFG["api"]["base_url"]
|
||||
API_VERSION = CFG["api"]["api_version"]
|
||||
MAX_WORKERS = CFG["ingestion"]["max_workers"]
|
||||
# MAX_WORKERS = CFG["ingestion"]["max_workers"]
|
||||
ACTIVE_LLMS = CFG["ingestion"]["active_llms"]
|
||||
PARALLEL_REQUESTS_PER_LLM = CFG["ingestion"]["parallel_requests_per_llm"]
|
||||
CHUNK_SIZE = CFG["ingestion"]["chunk_size"]
|
||||
CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
|
||||
EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
|
||||
@@ -75,10 +77,10 @@ def chunk_documents(docs):
|
||||
def enrich_chunks(chunks: list) -> list:
|
||||
def process_single_chunk(indexed_chunk):
|
||||
idx, chunk = indexed_chunk
|
||||
lm_index = idx % 8
|
||||
lm_index = idx % ACTIVE_LLMS
|
||||
|
||||
try:
|
||||
with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE + API_VERSION)):
|
||||
with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
|
||||
response = IngestionAgent().ingest(note=chunk.page_content)
|
||||
|
||||
# This is now an object, not a string!
|
||||
@@ -92,7 +94,7 @@ def enrich_chunks(chunks: list) -> list:
|
||||
return (idx, chunk)
|
||||
|
||||
enriched_results = []
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
|
||||
# Wrap chunks in enumerate to keep track of order
|
||||
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
|
||||
|
||||
|
||||
+82
-6
@@ -1,16 +1,93 @@
|
||||
import sys
|
||||
import dspy
|
||||
# import turso
|
||||
import logging
|
||||
from dspy.utils.callback import BaseCallback
|
||||
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
from config_loader import load_config
|
||||
from experts.dnd_agent import DnDRAG
|
||||
from experts.retrieval_agent import DnDRAG
|
||||
|
||||
CFG = load_config()
|
||||
RETRIEVE_MODEL = CFG["models"]["retrieval"]
|
||||
API_BASE = CFG["api"]["base_url"]
|
||||
API_VERSION = CFG["api"]["api_version"]
|
||||
|
||||
class CallbackHandler(BaseCallback):
|
||||
"""Custom callback class for logging agent interactions."""
|
||||
|
||||
def __init__(self, logger):
|
||||
"""Initialize the callback with a logger instance."""
|
||||
super().__init__()
|
||||
self.logger = logger
|
||||
|
||||
def on_module_end(self, call_id, outputs, exception):
|
||||
"""Handle module end events for logging."""
|
||||
step = "Reasoning" if self._is_reasoning_output(outputs) else "Acting"
|
||||
self.logger.debug(f"== {step} Step ===")
|
||||
for k, v in outputs.items():
|
||||
self.logger.debug(f" {k}: {v}")
|
||||
|
||||
def on_lm_start(self, call_id, instance, inputs):
|
||||
"""Handle language model start events for logging."""
|
||||
self.logger.debug(f"LM is called with inputs: {inputs}")
|
||||
|
||||
def on_tool_start(self, call_id, instance, inputs):
|
||||
"""Handle tool start events for logging."""
|
||||
self.logger.debug(f"Tool {instance} called with inputs: {inputs}")
|
||||
|
||||
def on_tool_end(self, call_id, outputs, exception):
|
||||
"""Handle tool end events for logging."""
|
||||
self.logger.debug(f"Tool finished with outputs: {outputs}")
|
||||
|
||||
def on_lm_end(self, call_id, outputs, exception):
|
||||
"""Handle language model end events for logging."""
|
||||
self.logger.debug(f"LM is finished with outputs: {outputs}")
|
||||
|
||||
def _is_reasoning_output(self, outputs):
|
||||
return any(k.startswith("Thought") for k in outputs)
|
||||
|
||||
def setup_logging():
|
||||
"""Set up logging configuration for Merlin."""
|
||||
# Create a custom logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Set the minimum level for the logger
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Create a console handler
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.INFO)
|
||||
|
||||
# Create a file handler with rotation every 5MB
|
||||
file_handler = RotatingFileHandler(
|
||||
"dmv.log", maxBytes=5 * 1024 * 1024, backupCount=3
|
||||
)
|
||||
file_handler.setLevel(logging.DEBUG)
|
||||
|
||||
# Create a formatter
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
|
||||
# Set the formatter for the handler
|
||||
console_handler.setFormatter(formatter)
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
# Add the handler to the logger
|
||||
logger.addHandler(console_handler)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def main():
|
||||
logger = setup_logging()
|
||||
logger.debug("main application started")
|
||||
|
||||
# Add verbose callback
|
||||
dspy.configure(verbose_errors=True)
|
||||
dspy.configure(callbacks=[CallbackHandler(logger)])
|
||||
# 1. Setup the LLM
|
||||
print("🚀 Initializing Qwen-8B via LM Studio...")
|
||||
lm = dspy.LM(RETRIEVE_MODEL, api_base=API_BASE + API_VERSION)
|
||||
@@ -32,7 +109,7 @@ def main():
|
||||
query = input("📝 Query: ").strip()
|
||||
|
||||
# Exit conditions
|
||||
if query.lower() in ["exit", "quit", "q"]:
|
||||
if query.lower() in ["exit", "quit", "q", "bye"]:
|
||||
print("Farewell, traveler. Good luck on your quest!")
|
||||
break
|
||||
|
||||
@@ -47,11 +124,10 @@ def main():
|
||||
print(response.answer)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nExiting... See you next session!")
|
||||
print("\n\nRude?!.... Exiting...")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
print(f"\n⚠️ An error occurred: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
@@ -0,0 +1,25 @@
|
||||
import dspy
|
||||
|
||||
base_url= "http://framework.tawny-bellatrix.ts.net:1234"
|
||||
model_name= "lm_studio/qwen-0"
|
||||
|
||||
|
||||
lm = dspy.LM(
|
||||
model=model_name,
|
||||
api_base=f"{base_url}/v1/"
|
||||
)
|
||||
dspy.configure(lm=lm)
|
||||
|
||||
|
||||
# question = "How can i use dspy framework to add 'chat_template_kwargs={\"enable_thinking\": False}' to my API call to LM Studio? i know it uses litellm under the hood"
|
||||
# question = "Hi there, do you have a name? if not i want you to name yourself."
|
||||
question = "how long would it take light to travel from the sun to the earth? /no_think"
|
||||
|
||||
# Call with request_kwargs to inject the template kwargs
|
||||
response = lm(
|
||||
messages=[{"role": "user", "content": question}]
|
||||
# extra_body={"enable_thinking": False}
|
||||
# enable_thinking=False
|
||||
)
|
||||
|
||||
print(response)
|
||||
Reference in New Issue
Block a user