feat: ✨ Working again with proper use of enriched content

2026-02-01 09:02:41 +00:00
parent 6b9eecf24c
commit cabf4f5eab
7 changed files with 49 additions and 36 deletions
@@ -0,0 +1,17 @@
 Test new embeddings 
 Benchmark / rate embeddings & vectors 
 Is RAG still the "thing"? - What is the cutting edge 
 - "Context Engineering" is the current evolution, although GraphRaG has been a thing?
 - Context Engineering seems to be finding the balance of how to provide just the right amount of context to get best results. 
    Too little context and the llm doesnt have enough info to give an accurate answer 
    Too much conflicting context (poison)
    too much context (confusion)
 Turso - better? vector store, but sqlite so data <3 
 How can we RaG in AW? DSAR 
@@ -1,6 +1,6 @@
 # --- Connection Settings ---
 api:
-  base_url: "http://192.168.0.49:1234"
+  base_url: "http://framework.tawny-bellatrix.ts.net:1234"
  api_version: "/v1/"
 # --- Model Settings ---
@@ -58,7 +58,17 @@ class DnDRAG(dspy.Module):
        context_parts = []
        for i, doc in enumerate(results):
            source = doc.metadata.get("source", "Unknown")
-            context_parts.append(f"--- Chunk {i + 1} from {source} ---\n{doc.page_content}")
+            synopsis = doc.metadata.get("synopsis", "None")
            tags = doc.metadata.get("tags", "None")
            entities = doc.metadata.get("entities", "None")
            context_parts.append(f"""
 --- Chunk {i+1} from {source} ---
 synpsis: {synopsis}, 
 tags: {tags}, 
 entities: {entities}
 {doc.page_content}
 """)
        #print(context_parts)
        # 3. Add the Full Content of the top match (optional, but requested!)
        # We'll just take the top 1 file to avoid context window explosion
@@ -1,33 +1,21 @@
 import dspy
-from pydantic import BaseModel, Field
+from typing import List
 # 1. Define the structure of your metadata
 class DocMetadata(BaseModel):
    synopsis: str = Field(description="A one-sentence summary of the document.")
    tags: list[str] = Field(description="Relevant tags (NPCs, Locations, Items, Plot Points).")
    entities: list[str] = Field(description="Key names of people, places, or factions.")
 class IngestionSignature(dspy.Signature):
    """You are an expert Dungeon Master's assistant.
    Analyze the provided notes and extract a concise synopsis and relevant metadata.
    synopsis = A one-sentence summary of the document.
    tags = Relevant tags (NPCs, Locations, Items, Plot Points).
    entities = Key names of people, places, or factions.
    "note -> synopsis:str, tags: list[str], entities: list[str]"
    /no_think
    """
    note: str = dspy.InputField(desc="The DM notes or session recap content.")
-    # By using the Pydantic model as the type, DSPy handles the JSON formatting for you
+    answer: dict[str,str|List] = dspy.OutputField(desc="the metadata dictionary with the keys; synopsis, tags, entities")
-    answer: DocMetadata = dspy.OutputField()
+
 class IngestionAgent(dspy.Module):
    def __init__(self):
-        super().__init__()
+        self.ingest = dspy.Predict(IngestionSignature)
        # We use TypedPredictor to enforce the Pydantic schema
        # We use ChainOfThought because it helps 8B models "reason" through the tags
        # before committing to the final JSON structure.
        self.process = dspy.TypedPredictor(IngestionSignature)
    def forward(self, note: str):
        # The .answer will now be a DocMetadata object, not a string!
        prediction = self.process(note=note)
        return prediction
@@ -18,11 +18,10 @@ MODEL_BASE = CFG["models"]["enrich"]
 EMBEDDING_MODEL = CFG["models"]["embedding"]
 API_BASE = CFG["api"]["base_url"]
 API_VERSION = CFG["api"]["api_version"]
-MAX_WORKERS = CFG["ingestion"]["max_workers"]
+MAX_WORKERS=CFG["ingestion"]["max_workers"]
-CHUNK_SIZE = (CFG["ingestion"]["chunk_size"],)
+CHUNK_SIZE=CFG["ingestion"]["chunk_size"]
-CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
+CHUNK_OVERLAP=CFG["ingestion"]["chunk_overlap"]
-EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
+EMBEDDING_BATCH_SIZE=CFG["ingestion"]["embedding_batch_size"]
 def load_documents():
    docs = []
@@ -64,20 +63,19 @@ def enrich_chunks(chunks: list) -> list:
        lm_index = idx % 8
        try:
-            with dspy.context(
+            with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE+API_VERSION)):
-                lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE + API_VERSION)
+                response = IngestionAgent().ingest(note=chunk.page_content)
-            ):
+                
                response = IngestionAgent().forward(note=chunk.page_content)
                # This is now an object, not a string!
-                metadata = response.answer.dict()
+                metadata = response.answer 
        except Exception as e:
            print(f"⚠️ Failed for chunk {idx}: {e}")
            metadata = {"synopsis": "Summary failed", "tags": ["error"], "entities": []}
-            chunk.metadata.update(metadata)
+        chunk.metadata.update(metadata)
-            return chunk
+        return (idx, chunk)
    enriched_results = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: