feat: ✨ Embedding working again, have next steps
This commit is contained in:
@@ -3,9 +3,9 @@
|
|||||||
---Easy Config of system prompts---
|
---Easy Config of system prompts---
|
||||||
examples into prompts & better prompts
|
examples into prompts & better prompts
|
||||||
|
|
||||||
LMS CLI script to load multiple models and to make each model accept multiple inferences
|
---LMS CLI script to load multiple models and to make each model accept multiple inferences---
|
||||||
|
|
||||||
context engineering, - only include vector hits that are x distance?
|
---context engineering, - only include vector hits that are x distance? --- top 5 is good enough
|
||||||
|
|
||||||
AI in the middle - make the ai generate the string for vector search
|
AI in the middle - make the ai generate the string for vector search
|
||||||
|
|
||||||
|
|||||||
+3
-1
@@ -10,7 +10,7 @@ class LocalLMEmbeddings(Embeddings):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self, model: str, base_url: str = API_BASE, batch_size: int = 32
|
self, model: str, base_url: str = API_BASE, batch_size: int = 32
|
||||||
):
|
):
|
||||||
self.url = f"{base_url}/{API_VERSION}/embeddings"
|
self.url = f"{base_url}/{API_VERSION}embeddings"
|
||||||
self.model = model
|
self.model = model
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
|
|
||||||
@@ -18,11 +18,13 @@ class LocalLMEmbeddings(Embeddings):
|
|||||||
"""Handles the actual HTTP POST to the local server."""
|
"""Handles the actual HTTP POST to the local server."""
|
||||||
payload = {"model": self.model, "input": input_texts}
|
payload = {"model": self.model, "input": input_texts}
|
||||||
try:
|
try:
|
||||||
|
# print(f'payload: {payload}')
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
self.url, json=payload, timeout=120
|
self.url, json=payload, timeout=120
|
||||||
) # Longer timeout for batches
|
) # Longer timeout for batches
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
# print(data)
|
||||||
return [item["embedding"] for item in data["data"]]
|
return [item["embedding"] for item in data["data"]]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Batch request failed: {e}")
|
print(f"❌ Batch request failed: {e}")
|
||||||
|
|||||||
@@ -54,8 +54,11 @@ class DnDRAG(dspy.Module):
|
|||||||
self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)
|
self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)
|
||||||
|
|
||||||
def forward(self, question):
|
def forward(self, question):
|
||||||
# Use Turso to retrieve relevant notes
|
# TODO: Add step here to LLM Expand
|
||||||
|
# given the current question, generate 3-5 distinct search queries.
|
||||||
|
# embed all the questions
|
||||||
embedded_question = self.embeddings_model._post_request(question)
|
embedded_question = self.embeddings_model._post_request(question)
|
||||||
|
# store the 5 from all 3-5 questions (15 - 25 results)
|
||||||
results = retrieve_from_turso(embedded_question, k=5) # k is limit to return
|
results = retrieve_from_turso(embedded_question, k=5) # k is limit to return
|
||||||
|
|
||||||
# Format context as before
|
# Format context as before
|
||||||
|
|||||||
Reference in New Issue
Block a user