From 26c0049fd8e4656eb0e90787bf82afe130b005b0 Mon Sep 17 00:00:00 2001 From: Jake Pullen Date: Sat, 7 Mar 2026 11:08:21 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=E2=9C=A8=20AI=20Powered=20enhanced=20q?= =?UTF-8?q?ueries=20to=20get=20better=20results?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ROADMAP.md | 4 +- config.yaml | 15 +++++-- src/embedding.py | 3 +- src/experts/expansion_agent.py | 0 src/experts/retrieval_agent.py | 75 ++++++++++++++++++++++------------ src/ingest.py | 13 +++--- 6 files changed, 73 insertions(+), 37 deletions(-) create mode 100644 src/experts/expansion_agent.py diff --git a/ROADMAP.md b/ROADMAP.md index d0ce349..06c3de4 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -16,4 +16,6 @@ ## Planned Later * entity chunking & re-ranking -* Logging in Ingestion \ No newline at end of file +* Logging in Ingestion +* database retrieve for tag or entity +* diff --git a/config.yaml b/config.yaml index 0aa67cc..6864ca5 100644 --- a/config.yaml +++ b/config.yaml @@ -8,11 +8,13 @@ models: enrich: "lm_studio/qwen-" # will have an identifier, based on amount of active LLMs see ./load_ingestion_llms.sh embedding: "text-embedding-qwen3-embedding-8b" retrieval: "lm_studio/qwen/qwen3-30b-a3b-2507" + expansion: "lm_studio/qwen/qwen3-30b-a3b-2507" # --- Ingestion Settings --- ingestion: - data_dir: "/home/cosmic/DnD" - db_path: "./data/dmv.db" + data_dir: "/home/jake/DnD" + db_path: "./data/" + db_name: "dmv.db" active_llms: 2 parallel_requests_per_llm: 2 chunk_size: 800 @@ -21,7 +23,7 @@ ingestion: time_file_location: "./data/time_file.txt" # ---- Agent Settings ---- -ingestion_agent: +ingestion_agent: ingestion_signature: | You are an expert Dungeon Master's assistant. Analyze the provided notes and extract a concise synopsis and relevant metadata. @@ -36,3 +38,10 @@ retrieval_agent: Given the context and the question, answer the question. Do not make things up, base all of your answers on the context. Always site the file location of your source of information. + +expansion_agent: + expansion_signature: | + You are a query expansion expert, specialised in Dungeons and Dragons. + Given a user's question, generate 3-5 similar but enhanced search queries that would help find more relevant information. + Each expanded query should be distinct and add different perspective to the original question. + Return only the queries as a JSON list with key "queries".""" diff --git a/src/embedding.py b/src/embedding.py index f0908a1..291131d 100644 --- a/src/embedding.py +++ b/src/embedding.py @@ -1,5 +1,6 @@ import requests from langchain_core.embeddings import Embeddings + from config_loader import load_config CFG = load_config() @@ -37,7 +38,7 @@ class LocalLMEmbeddings(Embeddings): for i in range(0, len(texts), self.batch_size): batch = texts[i : i + self.batch_size] - print(f"🚀 Processing batch {(i // self.batch_size) + 1} (Size: {len(batch)})...") + # print(f"🚀 Processing batch {(i // self.batch_size) + 1} (Size: {len(batch)})...") batch_vectors = self._post_request(batch) all_embeddings.extend(batch_vectors) diff --git a/src/experts/expansion_agent.py b/src/experts/expansion_agent.py new file mode 100644 index 0000000..e69de29 diff --git a/src/experts/retrieval_agent.py b/src/experts/retrieval_agent.py index f42d111..7e0f837 100644 --- a/src/experts/retrieval_agent.py +++ b/src/experts/retrieval_agent.py @@ -1,7 +1,7 @@ import os -import turso -import dspy +import dspy +import turso from config_loader import load_config from embedding import LocalLMEmbeddings @@ -9,27 +9,28 @@ from embedding import LocalLMEmbeddings CFG = load_config() DATABASE_PATH = CFG["ingestion"]["db_path"] +DATABASE_NAME = CFG["ingestion"]["db_name"] EMBEDDING_MODEL = CFG["models"]["embedding"] API_BASE = CFG["api"]["base_url"] RETRIEVAL_CONFIG = CFG["retrieval_agent"] +EXPANSION_CONFIG = CFG["expansion_agent"] def retrieve_from_turso(embedded_question, k=5): query = f""" SELECT file_path, synopsis, tags, entities, chunk_data, - vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance + vector_distance_cos(embedding, vector32('{embedded_question}')) AS distance FROM notes ORDER BY distance ASC LIMIT {k}; """ - con = turso.connect(DATABASE_PATH) + con = turso.connect(DATABASE_PATH + DATABASE_NAME) cur = con.cursor() cur.execute(query) rows = cur.fetchall() return rows -# --- DSPy Signature --- class DnDContextQA(dspy.Signature): f"{RETRIEVAL_CONFIG['retrieval_signature']}" @@ -38,47 +39,71 @@ class DnDContextQA(dspy.Signature): answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.") +class ExpansionSignature(dspy.Signature): + f"{EXPANSION_CONFIG['expansion_signature']}" + question = dspy.InputField() + answer = dspy.OutputField( + desc="A list of questions that will be used to vector search the database." + ) + + class DnDRAG(dspy.Module): def __init__(self): super().__init__() self.embeddings_model = LocalLMEmbeddings( model=EMBEDDING_MODEL, base_url=API_BASE, - batch_size=1, # we only send 1 question at a time. + # batch_size=1, ) - # Tools exposed to the ReAct loop + self.retrieval_lm = dspy.LM( + model=CFG["models"]["retrieval"], api_base=API_BASE + CFG["api"]["api_version"] + ) + with dspy.context(lm=self.retrieval_lm, signature=ExpansionSignature): + self.query_expander = dspy.Predict("question -> queries:list[str]") + self.tools = [self.load_file] self.generate_answer = dspy.ReAct(signature=DnDContextQA, tools=self.tools) def forward(self, question): - # TODO: Add step here to LLM Expand - # given the current question, generate 3-5 distinct search queries. - # embed all the questions - embedded_question = self.embeddings_model._post_request(question) - # store the 5 from all 3-5 questions (15 - 25 results) - results = retrieve_from_turso(embedded_question, k=5) # k is limit to return + print("Enhancing Question") + with dspy.context(lm=self.retrieval_lm): + expanded_queries = self.query_expander(question=question).queries + print("Enhanced Queries:") + for q in expanded_queries: + print(" ", q) + all_embeddings = self.embeddings_model.embed_documents([question] + expanded_queries) + # print(all_embeddings) + all_results = [] + for embedded_question in all_embeddings: + results = retrieve_from_turso(embedded_question, k=5) + all_results.extend(results) + + seen = set() + unique_results = [] + for row in all_results: + key = (row[0], row[4]) + if key not in seen: + seen.add(key) + unique_results.append(row) - # Format context as before context_parts = [] - for i, row in enumerate(results): - source = row[0] # file_path - synopsis = row[1] # synopsis - tags = row[2] # tags - entities = row[3] # entities - content = row[4] # chunk_data + for i, row in enumerate(unique_results): + source = row[0] + synopsis = row[1] + tags = row[2] + entities = row[3] + content = row[4] + closeness = row[5] context_parts.append(f""" --- Chunk {i + 1} from {source} --- synopsis: {synopsis}, tags: {tags}, -entities: {entities} +entities: {entities}, +closeness: {closeness}, {content} """) - # print('Closest embedding hits') - # for part in context_parts: - # print(part) - context = "\n\n".join(context_parts) prediction = self.generate_answer(context=context, question=question) diff --git a/src/ingest.py b/src/ingest.py index 19a53b6..b12b2b3 100644 --- a/src/ingest.py +++ b/src/ingest.py @@ -16,6 +16,7 @@ from experts.ingestion_agent import IngestionAgent CFG = load_config() DATA_DIR = CFG["ingestion"]["data_dir"] DATABASE_PATH = CFG["ingestion"]["db_path"] +DATABASE_NAME = CFG["ingestion"]["db_name"] MODEL_BASE = CFG["models"]["enrich"] EMBEDDING_MODEL = CFG["models"]["embedding"] API_BASE = CFG["api"]["base_url"] @@ -139,13 +140,10 @@ def embed_chunks(chunks: List[Any], batch_size: int = EMBEDDING_BATCH_SIZE) -> L # Process chunks in batches for i in tqdm(range(0, total_chunks, batch_size), desc="Embedding batches"): batch = chunks[i : i + batch_size] + print(f"🚀 Processing batch {(i // batch_size) + 1} (Size: {len(batch)})...") batch_content = [chunk.page_content for chunk in batch] - try: - # Use model's batched embedding method - # batch_embeddings = embeddings_model.embed_query(batch_content) batch_embeddings = embeddings_model.embed_documents(batch_content) - # Process each chunk in the batch for j, (chunk, embedding) in enumerate(zip(batch, batch_embeddings)): # Extract metadata @@ -228,7 +226,7 @@ def save_to_db(chunk_dicts): Each dict maps to a row in the 'notes' table. """ print("connecting to db") - con = turso.connect(DATABASE_PATH) + con = turso.connect(DATABASE_PATH + DATABASE_NAME) print("opening cursor") cur = con.cursor() @@ -267,7 +265,8 @@ def save_to_db(chunk_dicts): def create_db(): - con = turso.connect(DATABASE_PATH) + Path(DATABASE_PATH).mkdir(exist_ok=True) + con = turso.connect(DATABASE_PATH + DATABASE_NAME) cur = con.cursor() cur.execute(""" @@ -334,7 +333,7 @@ def delete_from_db(embedded_chunks): print(f"Deleting existing rows for {len(file_paths)} file(s)") - con = turso.connect(DATABASE_PATH) + con = turso.connect(DATABASE_PATH + DATABASE_NAME) cur = con.cursor() # Use a single DELETE statement with IN clause for efficiency