feat: AI Read File Tool, Configurable system prompts and loading lots of llms

This commit is contained in:
2026-03-04 15:48:25 +00:00
parent bbaebf1f70
commit 0d0e747682
10 changed files with 184 additions and 47 deletions
+19 -6
View File
@@ -5,7 +5,7 @@ api:
# --- Model Settings ---
models:
enrich: "lm_studio/qwen/qwen3-8b"
enrich: "lm_studio/qwen-"
embedding: "text-embedding-qwen3-embedding-8b"
retrieval: "lm_studio/qwen/qwen3-30b-a3b-2507"
@@ -13,13 +13,26 @@ models:
ingestion:
data_dir: "/home/devin/DnD"
db_path: "./data/dmv.db"
max_workers: 8
active_llms: 10
parallel_requests_per_llm: 4
chunk_size: 800
chunk_overlap: 100
embedding_batch_size: 32
time_file_location: "./data/time_file.txt"
# --- Retrieval Settings ---
retrieval:
top_k: 4
context_limit: 10000 # Max characters from full file context
# ---- Agent Settings ----
ingestion_agent:
ingestion_signature: |
You are an expert Dungeon Master's assistant.
Analyze the provided notes and extract a concise synopsis and relevant metadata.
synopsis = A one-sentence summary of the document.
tags = Relevant tags (NPCs, Locations, Items, Plot Points).
entities = a list of Key names of people, places, or factions.
"note -> synopsis:str, tags: list[str], entities: list[str]"
retrieval_agent:
retrieval_signature: |
You are an expert Dungeon Master's assistant.
Given the context and the question, answer the question.
Do not make things up, base all of your answers on the context.
Always site your sources
+7 -3
View File
@@ -1,12 +1,16 @@
import requests
from langchain_core.embeddings import Embeddings
from config_loader import load_config
CFG = load_config()
API_BASE = CFG["api"]["base_url"]
API_VERSION = CFG["api"]["api_version"]
class LocalLMEmbeddings(Embeddings):
def __init__(
self, model: str, base_url: str = "http://192.168.0.49:1234", batch_size: int = 32
self, model: str, base_url: str = API_BASE, batch_size: int = 32
):
self.url = f"{base_url}/v1/embeddings"
self.url = f"{base_url}/{API_VERSION}/embeddings"
self.model = model
self.batch_size = batch_size
@@ -27,7 +31,7 @@ class LocalLMEmbeddings(Embeddings):
return [[] for _ in input_texts]
def embed_documents(self, texts: list[str]) -> list[list[float]]:
"""Splits 500+ chunks into batches of 32 and processes them."""
"""Splits chunks into batches of 32 and processes them."""
all_embeddings = []
for i in range(0, len(texts), self.batch_size):
+5 -10
View File
@@ -1,21 +1,16 @@
import dspy
from typing import List
from config_loader import load_config
CFG = load_config()
INGESTION_CONFIG = CFG["ingestion_agent"]
class IngestionSignature(dspy.Signature):
"""You are an expert Dungeon Master's assistant.
Analyze the provided notes and extract a concise synopsis and relevant metadata.
synopsis = A one-sentence summary of the document.
tags = Relevant tags (NPCs, Locations, Items, Plot Points).
entities = Key names of people, places, or factions.
"note -> synopsis:str, tags: list[str], entities: list[str]"
/no_think
"""
f"{INGESTION_CONFIG["ingestion_signature"]}"
note: str = dspy.InputField(desc="The DM notes or session recap content.")
answer: dict[str,str|List] = dspy.OutputField(desc="the metadata dictionary with the keys; synopsis, tags, entities")
class IngestionAgent(dspy.Module):
def __init__(self):
self.ingest = dspy.Predict(IngestionSignature)
@@ -1,7 +1,7 @@
# from pathlib import Path
import os
import turso
import dspy
# from langchain_community.vectorstores import FAISS
from config_loader import load_config
from embedding import LocalLMEmbeddings
@@ -11,11 +11,10 @@ CFG = load_config()
DATABASE_PATH = CFG["ingestion"]["db_path"]
EMBEDDING_MODEL = CFG["models"]["embedding"]
API_BASE = CFG["api"]["base_url"]
RETRIEVAL_CONFIG = CFG["retrieval_agent"]
# Inside your retrieval logic:
def retrieve_from_turso(embedded_question, k=5):
# Example query: search for relevant notes using full-text search or embedding similarity
# Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available
query = f"""
SELECT file_path, synopsis, tags, entities, chunk_data,
vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance
@@ -31,8 +30,7 @@ def retrieve_from_turso(embedded_question, k=5):
# --- DSPy Signature ---
class DnDContextQA(dspy.Signature):
"""Answer DnD campaign questions using provided details.
"""
f"{RETRIEVAL_CONFIG["retrieval_signature"]}"
context = dspy.InputField(
desc="Relevant chunks and metadata from the campaign notes."
@@ -49,7 +47,11 @@ class DnDRAG(dspy.Module):
base_url=API_BASE,
batch_size=1, # we only send 1 question at a time.
)
self.generate_answer = dspy.ChainOfThought(DnDContextQA)
# Tools exposed to the ReAct loop
self.tools = [
self.load_file
]
self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)
def forward(self, question):
# Use Turso to retrieve relevant notes
@@ -73,12 +75,23 @@ tags: {tags},
entities: {entities}
{content}
""")
print('Closest embedding hits')
for part in context_parts:
print(part)
# print('Closest embedding hits')
# for part in context_parts:
# print(part)
context = "\n\n".join(context_parts)
prediction = self.generate_answer(context=context, question=question)
return dspy.Prediction(answer=prediction.answer, context=context)
def load_file(self, file_path) -> str | None:
"""Load and return specified file."""
if os.path.exists(file_path):
try:
with open(file_path) as file:
return file.read()
except Exception:
return None
else:
return None
+6 -4
View File
@@ -19,7 +19,9 @@ MODEL_BASE = CFG["models"]["enrich"]
EMBEDDING_MODEL = CFG["models"]["embedding"]
API_BASE = CFG["api"]["base_url"]
API_VERSION = CFG["api"]["api_version"]
MAX_WORKERS = CFG["ingestion"]["max_workers"]
# MAX_WORKERS = CFG["ingestion"]["max_workers"]
ACTIVE_LLMS = CFG["ingestion"]["active_llms"]
PARALLEL_REQUESTS_PER_LLM = CFG["ingestion"]["parallel_requests_per_llm"]
CHUNK_SIZE = CFG["ingestion"]["chunk_size"]
CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
@@ -75,10 +77,10 @@ def chunk_documents(docs):
def enrich_chunks(chunks: list) -> list:
def process_single_chunk(indexed_chunk):
idx, chunk = indexed_chunk
lm_index = idx % 8
lm_index = idx % ACTIVE_LLMS
try:
with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE + API_VERSION)):
with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
response = IngestionAgent().ingest(note=chunk.page_content)
# This is now an object, not a string!
@@ -92,7 +94,7 @@ def enrich_chunks(chunks: list) -> list:
return (idx, chunk)
enriched_results = []
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
# Wrap chunks in enumerate to keep track of order
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
+82 -6
View File
@@ -1,16 +1,93 @@
import sys
import dspy
# import turso
import logging
from dspy.utils.callback import BaseCallback
from logging.handlers import RotatingFileHandler
from config_loader import load_config
from experts.dnd_agent import DnDRAG
from experts.retrieval_agent import DnDRAG
CFG = load_config()
RETRIEVE_MODEL = CFG["models"]["retrieval"]
API_BASE = CFG["api"]["base_url"]
API_VERSION = CFG["api"]["api_version"]
class CallbackHandler(BaseCallback):
"""Custom callback class for logging agent interactions."""
def __init__(self, logger):
"""Initialize the callback with a logger instance."""
super().__init__()
self.logger = logger
def on_module_end(self, call_id, outputs, exception):
"""Handle module end events for logging."""
step = "Reasoning" if self._is_reasoning_output(outputs) else "Acting"
self.logger.debug(f"== {step} Step ===")
for k, v in outputs.items():
self.logger.debug(f" {k}: {v}")
def on_lm_start(self, call_id, instance, inputs):
"""Handle language model start events for logging."""
self.logger.debug(f"LM is called with inputs: {inputs}")
def on_tool_start(self, call_id, instance, inputs):
"""Handle tool start events for logging."""
self.logger.debug(f"Tool {instance} called with inputs: {inputs}")
def on_tool_end(self, call_id, outputs, exception):
"""Handle tool end events for logging."""
self.logger.debug(f"Tool finished with outputs: {outputs}")
def on_lm_end(self, call_id, outputs, exception):
"""Handle language model end events for logging."""
self.logger.debug(f"LM is finished with outputs: {outputs}")
def _is_reasoning_output(self, outputs):
return any(k.startswith("Thought") for k in outputs)
def setup_logging():
"""Set up logging configuration for Merlin."""
# Create a custom logger
logger = logging.getLogger(__name__)
# Set the minimum level for the logger
logger.setLevel(logging.DEBUG)
# Create a console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# Create a file handler with rotation every 5MB
file_handler = RotatingFileHandler(
"dmv.log", maxBytes=5 * 1024 * 1024, backupCount=3
)
file_handler.setLevel(logging.DEBUG)
# Create a formatter
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
# Set the formatter for the handler
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)
# Add the handler to the logger
logger.addHandler(console_handler)
logger.addHandler(file_handler)
return logger
def main():
logger = setup_logging()
logger.debug("main application started")
# Add verbose callback
dspy.configure(verbose_errors=True)
dspy.configure(callbacks=[CallbackHandler(logger)])
# 1. Setup the LLM
print("🚀 Initializing Qwen-8B via LM Studio...")
lm = dspy.LM(RETRIEVE_MODEL, api_base=API_BASE + API_VERSION)
@@ -32,7 +109,7 @@ def main():
query = input("📝 Query: ").strip()
# Exit conditions
if query.lower() in ["exit", "quit", "q"]:
if query.lower() in ["exit", "quit", "q", "bye"]:
print("Farewell, traveler. Good luck on your quest!")
break
@@ -47,11 +124,10 @@ def main():
print(response.answer)
except KeyboardInterrupt:
print("\n\nExiting... See you next session!")
print("\n\nRude?!.... Exiting...")
sys.exit(0)
except Exception as e:
print(f"\n⚠️ An error occurred: {e}")
if __name__ == "__main__":
main()
main()