feat: ✨ Embedding working again, have next steps

2026-03-05 17:30:02 +00:00
parent 448024dca3
commit d162aa59dc
3 changed files with 9 additions and 4 deletions
@@ -3,9 +3,9 @@
 ---Easy Config of system prompts---
    examples into prompts & better prompts

-LMS CLI script to load multiple models and to make each model accept multiple inferences
+---LMS CLI script to load multiple models and to make each model accept multiple inferences---

-context engineering, - only include vector hits that are x distance?
+---context engineering, - only include vector hits that are x distance? --- top 5 is good enough

 AI in the middle - make the ai generate the string for vector search

@@ -10,7 +10,7 @@ class LocalLMEmbeddings(Embeddings):
    def __init__(
        self, model: str, base_url: str = API_BASE, batch_size: int = 32
    ):
-        self.url = f"{base_url}/{API_VERSION}/embeddings"
+        self.url = f"{base_url}/{API_VERSION}embeddings"
        self.model = model
        self.batch_size = batch_size

@@ -18,11 +18,13 @@ class LocalLMEmbeddings(Embeddings):
        """Handles the actual HTTP POST to the local server."""
        payload = {"model": self.model, "input": input_texts}
        try:
+            # print(f'payload: {payload}')
            response = requests.post(
                self.url, json=payload, timeout=120
            )  # Longer timeout for batches
            response.raise_for_status()
            data = response.json()
+            # print(data)
            return [item["embedding"] for item in data["data"]]
        except Exception as e:
            print(f"❌ Batch request failed: {e}")
@@ -54,8 +54,11 @@ class DnDRAG(dspy.Module):
        self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)

    def forward(self, question):
-        # Use Turso to retrieve relevant notes
+        # TODO: Add step here to LLM Expand 
+        # given the current question, generate 3-5 distinct search queries.
+        # embed all the questions
        embedded_question = self.embeddings_model._post_request(question)
+        # store the 5 from all 3-5 questions (15 - 25 results)
        results = retrieve_from_turso(embedded_question, k=5)  # k is limit to return

        # Format context as before