From d162aa59dc92225e086c08efade222c3b019d555 Mon Sep 17 00:00:00 2001 From: Jake Pullen Date: Thu, 5 Mar 2026 17:30:02 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=E2=9C=A8=20Embedding=20working=20again?= =?UTF-8?q?,=20have=20next=20steps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO | 4 ++-- src/embedding.py | 4 +++- src/experts/retrieval_agent.py | 5 ++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/TODO b/TODO index e2429c2..099d2c1 100644 --- a/TODO +++ b/TODO @@ -3,9 +3,9 @@ ---Easy Config of system prompts--- examples into prompts & better prompts -LMS CLI script to load multiple models and to make each model accept multiple inferences +---LMS CLI script to load multiple models and to make each model accept multiple inferences--- -context engineering, - only include vector hits that are x distance? +---context engineering, - only include vector hits that are x distance? --- top 5 is good enough AI in the middle - make the ai generate the string for vector search diff --git a/src/embedding.py b/src/embedding.py index 8faac55..6fe4585 100644 --- a/src/embedding.py +++ b/src/embedding.py @@ -10,7 +10,7 @@ class LocalLMEmbeddings(Embeddings): def __init__( self, model: str, base_url: str = API_BASE, batch_size: int = 32 ): - self.url = f"{base_url}/{API_VERSION}/embeddings" + self.url = f"{base_url}/{API_VERSION}embeddings" self.model = model self.batch_size = batch_size @@ -18,11 +18,13 @@ class LocalLMEmbeddings(Embeddings): """Handles the actual HTTP POST to the local server.""" payload = {"model": self.model, "input": input_texts} try: + # print(f'payload: {payload}') response = requests.post( self.url, json=payload, timeout=120 ) # Longer timeout for batches response.raise_for_status() data = response.json() + # print(data) return [item["embedding"] for item in data["data"]] except Exception as e: print(f"❌ Batch request failed: {e}") diff --git a/src/experts/retrieval_agent.py b/src/experts/retrieval_agent.py index 7282417..9ef9d88 100644 --- a/src/experts/retrieval_agent.py +++ b/src/experts/retrieval_agent.py @@ -54,8 +54,11 @@ class DnDRAG(dspy.Module): self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools) def forward(self, question): - # Use Turso to retrieve relevant notes + # TODO: Add step here to LLM Expand + # given the current question, generate 3-5 distinct search queries. + # embed all the questions embedded_question = self.embeddings_model._post_request(question) + # store the 5 from all 3-5 questions (15 - 25 results) results = retrieve_from_turso(embedded_question, k=5) # k is limit to return # Format context as before