From d162aa59dc92225e086c08efade222c3b019d555 Mon Sep 17 00:00:00 2001
From: Jake Pullen <hello@jake-is.me>
Date: Thu, 5 Mar 2026 17:30:02 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20=E2=9C=A8=20Embedding=20working=20again?=
 =?UTF-8?q?,=20have=20next=20steps?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 TODO                           | 4 ++--
 src/embedding.py               | 4 +++-
 src/experts/retrieval_agent.py | 5 ++++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/TODO b/TODO
index e2429c2..099d2c1 100644
--- a/TODO
+++ b/TODO
@@ -3,9 +3,9 @@
 ---Easy Config of system prompts---
     examples into prompts & better prompts
 
-LMS CLI script to load multiple models and to make each model accept multiple inferences
+---LMS CLI script to load multiple models and to make each model accept multiple inferences---
 
-context engineering, - only include vector hits that are x distance?
+---context engineering, - only include vector hits that are x distance? --- top 5 is good enough
 
 AI in the middle - make the ai generate the string for vector search
 
diff --git a/src/embedding.py b/src/embedding.py
index 8faac55..6fe4585 100644
--- a/src/embedding.py
+++ b/src/embedding.py
@@ -10,7 +10,7 @@ class LocalLMEmbeddings(Embeddings):
     def __init__(
         self, model: str, base_url: str = API_BASE, batch_size: int = 32
     ):
-        self.url = f"{base_url}/{API_VERSION}/embeddings"
+        self.url = f"{base_url}/{API_VERSION}embeddings"
         self.model = model
         self.batch_size = batch_size
 
@@ -18,11 +18,13 @@ class LocalLMEmbeddings(Embeddings):
         """Handles the actual HTTP POST to the local server."""
         payload = {"model": self.model, "input": input_texts}
         try:
+            # print(f'payload: {payload}')
             response = requests.post(
                 self.url, json=payload, timeout=120
             )  # Longer timeout for batches
             response.raise_for_status()
             data = response.json()
+            # print(data)
             return [item["embedding"] for item in data["data"]]
         except Exception as e:
             print(f"❌ Batch request failed: {e}")
diff --git a/src/experts/retrieval_agent.py b/src/experts/retrieval_agent.py
index 7282417..9ef9d88 100644
--- a/src/experts/retrieval_agent.py
+++ b/src/experts/retrieval_agent.py
@@ -54,8 +54,11 @@ class DnDRAG(dspy.Module):
         self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)
 
     def forward(self, question):
-        # Use Turso to retrieve relevant notes
+        # TODO: Add step here to LLM Expand 
+        # given the current question, generate 3-5 distinct search queries.
+        # embed all the questions
         embedded_question = self.embeddings_model._post_request(question)
+        # store the 5 from all 3-5 questions (15 - 25 results)
         results = retrieve_from_turso(embedded_question, k=5)  # k is limit to return
 
         # Format context as before