feat: ✨ Embedding working again, have next steps
This commit is contained in:
@@ -3,9 +3,9 @@
|
||||
---Easy Config of system prompts---
|
||||
examples into prompts & better prompts
|
||||
|
||||
LMS CLI script to load multiple models and to make each model accept multiple inferences
|
||||
---LMS CLI script to load multiple models and to make each model accept multiple inferences---
|
||||
|
||||
context engineering, - only include vector hits that are x distance?
|
||||
---context engineering, - only include vector hits that are x distance? --- top 5 is good enough
|
||||
|
||||
AI in the middle - make the ai generate the string for vector search
|
||||
|
||||
|
||||
+3
-1
@@ -10,7 +10,7 @@ class LocalLMEmbeddings(Embeddings):
|
||||
def __init__(
|
||||
self, model: str, base_url: str = API_BASE, batch_size: int = 32
|
||||
):
|
||||
self.url = f"{base_url}/{API_VERSION}/embeddings"
|
||||
self.url = f"{base_url}/{API_VERSION}embeddings"
|
||||
self.model = model
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -18,11 +18,13 @@ class LocalLMEmbeddings(Embeddings):
|
||||
"""Handles the actual HTTP POST to the local server."""
|
||||
payload = {"model": self.model, "input": input_texts}
|
||||
try:
|
||||
# print(f'payload: {payload}')
|
||||
response = requests.post(
|
||||
self.url, json=payload, timeout=120
|
||||
) # Longer timeout for batches
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
# print(data)
|
||||
return [item["embedding"] for item in data["data"]]
|
||||
except Exception as e:
|
||||
print(f"❌ Batch request failed: {e}")
|
||||
|
||||
@@ -54,8 +54,11 @@ class DnDRAG(dspy.Module):
|
||||
self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)
|
||||
|
||||
def forward(self, question):
|
||||
# Use Turso to retrieve relevant notes
|
||||
# TODO: Add step here to LLM Expand
|
||||
# given the current question, generate 3-5 distinct search queries.
|
||||
# embed all the questions
|
||||
embedded_question = self.embeddings_model._post_request(question)
|
||||
# store the 5 from all 3-5 questions (15 - 25 results)
|
||||
results = retrieve_from_turso(embedded_question, k=5) # k is limit to return
|
||||
|
||||
# Format context as before
|
||||
|
||||
Reference in New Issue
Block a user