diff --git a/TODO b/TODO index e2429c2..099d2c1 100644 --- a/TODO +++ b/TODO @@ -3,9 +3,9 @@ ---Easy Config of system prompts--- examples into prompts & better prompts -LMS CLI script to load multiple models and to make each model accept multiple inferences +---LMS CLI script to load multiple models and to make each model accept multiple inferences--- -context engineering, - only include vector hits that are x distance? +---context engineering, - only include vector hits that are x distance? --- top 5 is good enough AI in the middle - make the ai generate the string for vector search diff --git a/src/embedding.py b/src/embedding.py index 8faac55..6fe4585 100644 --- a/src/embedding.py +++ b/src/embedding.py @@ -10,7 +10,7 @@ class LocalLMEmbeddings(Embeddings): def __init__( self, model: str, base_url: str = API_BASE, batch_size: int = 32 ): - self.url = f"{base_url}/{API_VERSION}/embeddings" + self.url = f"{base_url}/{API_VERSION}embeddings" self.model = model self.batch_size = batch_size @@ -18,11 +18,13 @@ class LocalLMEmbeddings(Embeddings): """Handles the actual HTTP POST to the local server.""" payload = {"model": self.model, "input": input_texts} try: + # print(f'payload: {payload}') response = requests.post( self.url, json=payload, timeout=120 ) # Longer timeout for batches response.raise_for_status() data = response.json() + # print(data) return [item["embedding"] for item in data["data"]] except Exception as e: print(f"❌ Batch request failed: {e}") diff --git a/src/experts/retrieval_agent.py b/src/experts/retrieval_agent.py index 7282417..9ef9d88 100644 --- a/src/experts/retrieval_agent.py +++ b/src/experts/retrieval_agent.py @@ -54,8 +54,11 @@ class DnDRAG(dspy.Module): self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools) def forward(self, question): - # Use Turso to retrieve relevant notes + # TODO: Add step here to LLM Expand + # given the current question, generate 3-5 distinct search queries. + # embed all the questions embedded_question = self.embeddings_model._post_request(question) + # store the 5 from all 3-5 questions (15 - 25 results) results = retrieve_from_turso(embedded_question, k=5) # k is limit to return # Format context as before