3 Commits

Author SHA1 Message Date
Jake 1e20a5452f fix: 🐛 more stable ingestion 2026-03-08 17:28:29 +00:00
Jake-Pullen 90c88b068b Merge pull request #1 from Jake-Pullen/ai_in_the_middle
feat:  AI Powered enhanced queries to get better results
2026-03-07 11:22:11 +00:00
Jake 26c0049fd8 feat: AI Powered enhanced queries to get better results 2026-03-07 11:08:21 +00:00
6 changed files with 108 additions and 51 deletions
+7 -1
View File
@@ -11,9 +11,15 @@
## Planned Next ## Planned Next
* AI in the middle - make the llm generate multiple queries for a wider search * database retrieve for tag or entity
## Planned Later ## Planned Later
* entity chunking & re-ranking * entity chunking & re-ranking
* Logging in Ingestion * Logging in Ingestion
* More robust ingestion - llm response sometimes out of expected
## Done
* AI in the middle - make the llm generate multiple queries for a wider search
+36 -13
View File
@@ -8,13 +8,15 @@ models:
enrich: "lm_studio/qwen-" # will have an identifier, based on amount of active LLMs see ./load_ingestion_llms.sh enrich: "lm_studio/qwen-" # will have an identifier, based on amount of active LLMs see ./load_ingestion_llms.sh
embedding: "text-embedding-qwen3-embedding-8b" embedding: "text-embedding-qwen3-embedding-8b"
retrieval: "lm_studio/qwen/qwen3-30b-a3b-2507" retrieval: "lm_studio/qwen/qwen3-30b-a3b-2507"
expansion: "lm_studio/qwen/qwen3-30b-a3b-2507"
# --- Ingestion Settings --- # --- Ingestion Settings ---
ingestion: ingestion:
data_dir: "/home/cosmic/DnD" data_dir: "/home/jake/DnD"
db_path: "./data/dmv.db" db_path: "./data/"
db_name: "dmv.db"
active_llms: 2 active_llms: 2
parallel_requests_per_llm: 2 parallel_requests_per_llm: 4
chunk_size: 800 chunk_size: 800
chunk_overlap: 100 chunk_overlap: 100
embedding_batch_size: 32 embedding_batch_size: 32
@@ -23,16 +25,37 @@ ingestion:
# ---- Agent Settings ---- # ---- Agent Settings ----
ingestion_agent: ingestion_agent:
ingestion_signature: | ingestion_signature: |
You are an expert Dungeon Master's assistant. You are an expert Dungeon Master's assistant specialized in campaign note enrichment.
Analyze the provided notes and extract a concise synopsis and relevant metadata. Your task is to analyze DnD session notes and extract structured metadata.
synopsis = A one-sentence summary of the document.
tags = Relevant tags (NPCs, Locations, Items, Plot Points). Follow these guidelines:
entities = a list of Key names of people, places, or factions. - SYNOPSIS: One concise sentence capturing the key event or development (use active voice)
"note -> synopsis:str, tags: list[str], entities: list[str]" - TAGS: Extract 3-7 relevant tags from: Campaign arcs, NPC names, Locations, Items, Spells, Factions, Plot hooks, Themes
- ENTITIES: List all proper nouns (NPCs, locations, organizations) - be specific and consistent with naming
The TAGS and ENTITIES must be a list of strings, not json objects
Format output as JSON with keys: synopsis, tags, entities
retrieval_agent: retrieval_agent:
retrieval_signature: | retrieval_signature: |
You are an expert Dungeon Master's assistant. You are an expert Dungeon Master's assistant helping to run a campaign.
Given the context and the question, answer the question. When answering questions about your DnD world:
Do not make things up, base all of your answers on the context.
Always site the file location of your source of information. 1. Strictly use ONLY the provided context from campaign notes
2. If information is incomplete, infer plausibly based on established lore (flag inferences)
3. Always cite sources: "Per [filename], [quote/summary]"
4. Maintain character voice and narrative style when appropriate
5. For rules questions, distinguish between rules-as-written and DM interpretation
Provide comprehensive answers that help you run the game, including relevant details about NPCs, locations, or plot points.
expansion_agent:
expansion_signature: |
You are a query expansion expert specialized in Dungeons & Dragons campaign management.
Given a user question about their DnD world, generate 3-5 enhanced search queries that:
- Cover different aspects (characters, locations, lore, rules)
- Include synonyms and related terms (e.g., "dragon" → "wyrm", "scales" → "armor")
- Address potential follow-up questions the DM might have
- Vary specificity (broad to narrow)
Return ONLY a JSON array with key "queries". Keep queries concise (5-10 words each).
+2 -1
View File
@@ -1,5 +1,6 @@
import requests import requests
from langchain_core.embeddings import Embeddings from langchain_core.embeddings import Embeddings
from config_loader import load_config from config_loader import load_config
CFG = load_config() CFG = load_config()
@@ -37,7 +38,7 @@ class LocalLMEmbeddings(Embeddings):
for i in range(0, len(texts), self.batch_size): for i in range(0, len(texts), self.batch_size):
batch = texts[i : i + self.batch_size] batch = texts[i : i + self.batch_size]
print(f"🚀 Processing batch {(i // self.batch_size) + 1} (Size: {len(batch)})...") # print(f"🚀 Processing batch {(i // self.batch_size) + 1} (Size: {len(batch)})...")
batch_vectors = self._post_request(batch) batch_vectors = self._post_request(batch)
all_embeddings.extend(batch_vectors) all_embeddings.extend(batch_vectors)
View File
+50 -25
View File
@@ -1,7 +1,7 @@
import os import os
import turso
import dspy
import dspy
import turso
from config_loader import load_config from config_loader import load_config
from embedding import LocalLMEmbeddings from embedding import LocalLMEmbeddings
@@ -9,27 +9,28 @@ from embedding import LocalLMEmbeddings
CFG = load_config() CFG = load_config()
DATABASE_PATH = CFG["ingestion"]["db_path"] DATABASE_PATH = CFG["ingestion"]["db_path"]
DATABASE_NAME = CFG["ingestion"]["db_name"]
EMBEDDING_MODEL = CFG["models"]["embedding"] EMBEDDING_MODEL = CFG["models"]["embedding"]
API_BASE = CFG["api"]["base_url"] API_BASE = CFG["api"]["base_url"]
RETRIEVAL_CONFIG = CFG["retrieval_agent"] RETRIEVAL_CONFIG = CFG["retrieval_agent"]
EXPANSION_CONFIG = CFG["expansion_agent"]
def retrieve_from_turso(embedded_question, k=5): def retrieve_from_turso(embedded_question, k=5):
query = f""" query = f"""
SELECT file_path, synopsis, tags, entities, chunk_data, SELECT file_path, synopsis, tags, entities, chunk_data,
vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance vector_distance_cos(embedding, vector32('{embedded_question}')) AS distance
FROM notes FROM notes
ORDER BY distance ASC ORDER BY distance ASC
LIMIT {k}; LIMIT {k};
""" """
con = turso.connect(DATABASE_PATH) con = turso.connect(DATABASE_PATH + DATABASE_NAME)
cur = con.cursor() cur = con.cursor()
cur.execute(query) cur.execute(query)
rows = cur.fetchall() rows = cur.fetchall()
return rows return rows
# --- DSPy Signature ---
class DnDContextQA(dspy.Signature): class DnDContextQA(dspy.Signature):
f"{RETRIEVAL_CONFIG['retrieval_signature']}" f"{RETRIEVAL_CONFIG['retrieval_signature']}"
@@ -38,47 +39,71 @@ class DnDContextQA(dspy.Signature):
answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.") answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
class ExpansionSignature(dspy.Signature):
f"{EXPANSION_CONFIG['expansion_signature']}"
question = dspy.InputField()
answer = dspy.OutputField(
desc="A list of questions that will be used to vector search the database."
)
class DnDRAG(dspy.Module): class DnDRAG(dspy.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.embeddings_model = LocalLMEmbeddings( self.embeddings_model = LocalLMEmbeddings(
model=EMBEDDING_MODEL, model=EMBEDDING_MODEL,
base_url=API_BASE, base_url=API_BASE,
batch_size=1, # we only send 1 question at a time. # batch_size=1,
) )
# Tools exposed to the ReAct loop self.retrieval_lm = dspy.LM(
model=CFG["models"]["retrieval"], api_base=API_BASE + CFG["api"]["api_version"]
)
with dspy.context(lm=self.retrieval_lm, signature=ExpansionSignature):
self.query_expander = dspy.Predict("question -> queries:list[str]")
self.tools = [self.load_file] self.tools = [self.load_file]
self.generate_answer = dspy.ReAct(signature=DnDContextQA, tools=self.tools) self.generate_answer = dspy.ReAct(signature=DnDContextQA, tools=self.tools)
def forward(self, question): def forward(self, question):
# TODO: Add step here to LLM Expand print("Enhancing Question")
# given the current question, generate 3-5 distinct search queries. with dspy.context(lm=self.retrieval_lm):
# embed all the questions expanded_queries = self.query_expander(question=question).queries
embedded_question = self.embeddings_model._post_request(question) print("Enhanced Queries:")
# store the 5 from all 3-5 questions (15 - 25 results) for q in expanded_queries:
results = retrieve_from_turso(embedded_question, k=5) # k is limit to return print(" ", q)
all_embeddings = self.embeddings_model.embed_documents([question] + expanded_queries)
# print(all_embeddings)
all_results = []
for embedded_question in all_embeddings:
results = retrieve_from_turso(embedded_question, k=5)
all_results.extend(results)
seen = set()
unique_results = []
for row in all_results:
key = (row[0], row[4])
if key not in seen:
seen.add(key)
unique_results.append(row)
# Format context as before
context_parts = [] context_parts = []
for i, row in enumerate(results): for i, row in enumerate(unique_results):
source = row[0] # file_path source = row[0]
synopsis = row[1] # synopsis synopsis = row[1]
tags = row[2] # tags tags = row[2]
entities = row[3] # entities entities = row[3]
content = row[4] # chunk_data content = row[4]
closeness = row[5]
context_parts.append(f""" context_parts.append(f"""
--- Chunk {i + 1} from {source} --- --- Chunk {i + 1} from {source} ---
synopsis: {synopsis}, synopsis: {synopsis},
tags: {tags}, tags: {tags},
entities: {entities} entities: {entities},
closeness: {closeness},
{content} {content}
""") """)
# print('Closest embedding hits')
# for part in context_parts:
# print(part)
context = "\n\n".join(context_parts) context = "\n\n".join(context_parts)
prediction = self.generate_answer(context=context, question=question) prediction = self.generate_answer(context=context, question=question)
+11 -9
View File
@@ -16,6 +16,7 @@ from experts.ingestion_agent import IngestionAgent
CFG = load_config() CFG = load_config()
DATA_DIR = CFG["ingestion"]["data_dir"] DATA_DIR = CFG["ingestion"]["data_dir"]
DATABASE_PATH = CFG["ingestion"]["db_path"] DATABASE_PATH = CFG["ingestion"]["db_path"]
DATABASE_NAME = CFG["ingestion"]["db_name"]
MODEL_BASE = CFG["models"]["enrich"] MODEL_BASE = CFG["models"]["enrich"]
EMBEDDING_MODEL = CFG["models"]["embedding"] EMBEDDING_MODEL = CFG["models"]["embedding"]
API_BASE = CFG["api"]["base_url"] API_BASE = CFG["api"]["base_url"]
@@ -139,13 +140,10 @@ def embed_chunks(chunks: List[Any], batch_size: int = EMBEDDING_BATCH_SIZE) -> L
# Process chunks in batches # Process chunks in batches
for i in tqdm(range(0, total_chunks, batch_size), desc="Embedding batches"): for i in tqdm(range(0, total_chunks, batch_size), desc="Embedding batches"):
batch = chunks[i : i + batch_size] batch = chunks[i : i + batch_size]
print(f"🚀 Processing batch {(i // batch_size) + 1} (Size: {len(batch)})...")
batch_content = [chunk.page_content for chunk in batch] batch_content = [chunk.page_content for chunk in batch]
try: try:
# Use model's batched embedding method
# batch_embeddings = embeddings_model.embed_query(batch_content)
batch_embeddings = embeddings_model.embed_documents(batch_content) batch_embeddings = embeddings_model.embed_documents(batch_content)
# Process each chunk in the batch # Process each chunk in the batch
for j, (chunk, embedding) in enumerate(zip(batch, batch_embeddings)): for j, (chunk, embedding) in enumerate(zip(batch, batch_embeddings)):
# Extract metadata # Extract metadata
@@ -178,8 +176,8 @@ def embed_chunks(chunks: List[Any], batch_size: int = EMBEDDING_BATCH_SIZE) -> L
print(f"⚠️ Batch processing failed at index {i}: {e}") print(f"⚠️ Batch processing failed at index {i}: {e}")
# Fallback: process individually (if needed) # Fallback: process individually (if needed)
for j, chunk in enumerate(batch): for j, chunk in enumerate(batch):
try:
content = chunk.page_content content = chunk.page_content
try:
embedding = embeddings_model.embed_query(content) embedding = embeddings_model.embed_query(content)
file_path_orig = chunk.metadata.get("full_path", "unknown") file_path_orig = chunk.metadata.get("full_path", "unknown")
@@ -228,7 +226,7 @@ def save_to_db(chunk_dicts):
Each dict maps to a row in the 'notes' table. Each dict maps to a row in the 'notes' table.
""" """
print("connecting to db") print("connecting to db")
con = turso.connect(DATABASE_PATH) con = turso.connect(DATABASE_PATH + DATABASE_NAME)
print("opening cursor") print("opening cursor")
cur = con.cursor() cur = con.cursor()
@@ -252,7 +250,10 @@ def save_to_db(chunk_dicts):
entry["chunk_data"], entry["chunk_data"],
entry["synopsis"], entry["synopsis"],
",".join(entry["tags"]), # Store as comma-separated string ",".join(entry["tags"]), # Store as comma-separated string
",".join(entry["entities"]), # Store as comma-separated string ",".join(
str(e) if isinstance(e, str) else e.get("name", str(e))
for e in entry["entities"]
), # Store as comma-separated string
embedding_str, embedding_str,
entry["timestamp"], entry["timestamp"],
) )
@@ -267,7 +268,8 @@ def save_to_db(chunk_dicts):
def create_db(): def create_db():
con = turso.connect(DATABASE_PATH) Path(DATABASE_PATH).mkdir(exist_ok=True)
con = turso.connect(DATABASE_PATH + DATABASE_NAME)
cur = con.cursor() cur = con.cursor()
cur.execute(""" cur.execute("""
@@ -334,7 +336,7 @@ def delete_from_db(embedded_chunks):
print(f"Deleting existing rows for {len(file_paths)} file(s)") print(f"Deleting existing rows for {len(file_paths)} file(s)")
con = turso.connect(DATABASE_PATH) con = turso.connect(DATABASE_PATH + DATABASE_NAME)
cur = con.cursor() cur = con.cursor()
# Use a single DELETE statement with IN clause for efficiency # Use a single DELETE statement with IN clause for efficiency