feat: Working again with proper use of enriched content

This commit is contained in:
2026-02-01 09:02:41 +00:00
parent 6b9eecf24c
commit cabf4f5eab
7 changed files with 49 additions and 36 deletions
+17
View File
@@ -0,0 +1,17 @@
Test new embeddings
Benchmark / rate embeddings & vectors
Is RAG still the "thing"? - What is the cutting edge
- "Context Engineering" is the current evolution, although GraphRaG has been a thing?
- Context Engineering seems to be finding the balance of how to provide just the right amount of context to get best results.
Too little context and the llm doesnt have enough info to give an accurate answer
Too much conflicting context (poison)
too much context (confusion)
Turso - better? vector store, but sqlite so data <3
How can we RaG in AW? DSAR
Binary file not shown.
Binary file not shown.
+1 -1
View File
@@ -1,6 +1,6 @@
# --- Connection Settings --- # --- Connection Settings ---
api: api:
base_url: "http://192.168.0.49:1234" base_url: "http://framework.tawny-bellatrix.ts.net:1234"
api_version: "/v1/" api_version: "/v1/"
# --- Model Settings --- # --- Model Settings ---
+11 -1
View File
@@ -58,7 +58,17 @@ class DnDRAG(dspy.Module):
context_parts = [] context_parts = []
for i, doc in enumerate(results): for i, doc in enumerate(results):
source = doc.metadata.get("source", "Unknown") source = doc.metadata.get("source", "Unknown")
context_parts.append(f"--- Chunk {i + 1} from {source} ---\n{doc.page_content}") synopsis = doc.metadata.get("synopsis", "None")
tags = doc.metadata.get("tags", "None")
entities = doc.metadata.get("entities", "None")
context_parts.append(f"""
--- Chunk {i+1} from {source} ---
synpsis: {synopsis},
tags: {tags},
entities: {entities}
{doc.page_content}
""")
#print(context_parts)
# 3. Add the Full Content of the top match (optional, but requested!) # 3. Add the Full Content of the top match (optional, but requested!)
# We'll just take the top 1 file to avoid context window explosion # We'll just take the top 1 file to avoid context window explosion
+9 -21
View File
@@ -1,33 +1,21 @@
import dspy import dspy
from pydantic import BaseModel, Field from typing import List
# 1. Define the structure of your metadata
class DocMetadata(BaseModel):
synopsis: str = Field(description="A one-sentence summary of the document.")
tags: list[str] = Field(description="Relevant tags (NPCs, Locations, Items, Plot Points).")
entities: list[str] = Field(description="Key names of people, places, or factions.")
class IngestionSignature(dspy.Signature): class IngestionSignature(dspy.Signature):
"""You are an expert Dungeon Master's assistant. """You are an expert Dungeon Master's assistant.
Analyze the provided notes and extract a concise synopsis and relevant metadata. Analyze the provided notes and extract a concise synopsis and relevant metadata.
synopsis = A one-sentence summary of the document.
tags = Relevant tags (NPCs, Locations, Items, Plot Points).
entities = Key names of people, places, or factions.
"note -> synopsis:str, tags: list[str], entities: list[str]"
/no_think
""" """
note: str = dspy.InputField(desc="The DM notes or session recap content.") note: str = dspy.InputField(desc="The DM notes or session recap content.")
# By using the Pydantic model as the type, DSPy handles the JSON formatting for you answer: dict[str,str|List] = dspy.OutputField(desc="the metadata dictionary with the keys; synopsis, tags, entities")
answer: DocMetadata = dspy.OutputField()
class IngestionAgent(dspy.Module): class IngestionAgent(dspy.Module):
def __init__(self): def __init__(self):
super().__init__() self.ingest = dspy.Predict(IngestionSignature)
# We use TypedPredictor to enforce the Pydantic schema
# We use ChainOfThought because it helps 8B models "reason" through the tags
# before committing to the final JSON structure.
self.process = dspy.TypedPredictor(IngestionSignature)
def forward(self, note: str):
# The .answer will now be a DocMetadata object, not a string!
prediction = self.process(note=note)
return prediction
+11 -13
View File
@@ -18,11 +18,10 @@ MODEL_BASE = CFG["models"]["enrich"]
EMBEDDING_MODEL = CFG["models"]["embedding"] EMBEDDING_MODEL = CFG["models"]["embedding"]
API_BASE = CFG["api"]["base_url"] API_BASE = CFG["api"]["base_url"]
API_VERSION = CFG["api"]["api_version"] API_VERSION = CFG["api"]["api_version"]
MAX_WORKERS = CFG["ingestion"]["max_workers"] MAX_WORKERS=CFG["ingestion"]["max_workers"]
CHUNK_SIZE = (CFG["ingestion"]["chunk_size"],) CHUNK_SIZE=CFG["ingestion"]["chunk_size"]
CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"] CHUNK_OVERLAP=CFG["ingestion"]["chunk_overlap"]
EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"] EMBEDDING_BATCH_SIZE=CFG["ingestion"]["embedding_batch_size"]
def load_documents(): def load_documents():
docs = [] docs = []
@@ -64,20 +63,19 @@ def enrich_chunks(chunks: list) -> list:
lm_index = idx % 8 lm_index = idx % 8
try: try:
with dspy.context( with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE+API_VERSION)):
lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE + API_VERSION) response = IngestionAgent().ingest(note=chunk.page_content)
):
response = IngestionAgent().forward(note=chunk.page_content)
# This is now an object, not a string! # This is now an object, not a string!
metadata = response.answer.dict() metadata = response.answer
except Exception as e: except Exception as e:
print(f"⚠️ Failed for chunk {idx}: {e}") print(f"⚠️ Failed for chunk {idx}: {e}")
metadata = {"synopsis": "Summary failed", "tags": ["error"], "entities": []} metadata = {"synopsis": "Summary failed", "tags": ["error"], "entities": []}
chunk.metadata.update(metadata) chunk.metadata.update(metadata)
return chunk return (idx, chunk)
enriched_results = [] enriched_results = []
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: