diff --git a/TODO b/TODO new file mode 100644 index 0000000..f428a07 --- /dev/null +++ b/TODO @@ -0,0 +1,17 @@ +Test new embeddings + +Benchmark / rate embeddings & vectors + +Is RAG still the "thing"? - What is the cutting edge + - "Context Engineering" is the current evolution, although GraphRaG has been a thing? + - Context Engineering seems to be finding the balance of how to provide just the right amount of context to get best results. + Too little context and the llm doesnt have enough info to give an accurate answer + Too much conflicting context (poison) + too much context (confusion) + + + +Turso - better? vector store, but sqlite so data <3 + +How can we RaG in AW? DSAR + diff --git a/local_faiss_db/index.faiss b/local_faiss_db/index.faiss index 8c999db..ab55694 100644 Binary files a/local_faiss_db/index.faiss and b/local_faiss_db/index.faiss differ diff --git a/local_faiss_db/index.pkl b/local_faiss_db/index.pkl index 8857224..cc6cec0 100644 Binary files a/local_faiss_db/index.pkl and b/local_faiss_db/index.pkl differ diff --git a/src/config.yaml b/src/config.yaml index 8ee22c9..9f53e5d 100644 --- a/src/config.yaml +++ b/src/config.yaml @@ -1,6 +1,6 @@ # --- Connection Settings --- api: - base_url: "http://192.168.0.49:1234" + base_url: "http://framework.tawny-bellatrix.ts.net:1234" api_version: "/v1/" # --- Model Settings --- diff --git a/src/experts/dnd_agent.py b/src/experts/dnd_agent.py index 655a37c..ec92cc0 100644 --- a/src/experts/dnd_agent.py +++ b/src/experts/dnd_agent.py @@ -58,7 +58,17 @@ class DnDRAG(dspy.Module): context_parts = [] for i, doc in enumerate(results): source = doc.metadata.get("source", "Unknown") - context_parts.append(f"--- Chunk {i + 1} from {source} ---\n{doc.page_content}") + synopsis = doc.metadata.get("synopsis", "None") + tags = doc.metadata.get("tags", "None") + entities = doc.metadata.get("entities", "None") + context_parts.append(f""" +--- Chunk {i+1} from {source} --- +synpsis: {synopsis}, +tags: {tags}, +entities: {entities} +{doc.page_content} +""") + #print(context_parts) # 3. Add the Full Content of the top match (optional, but requested!) # We'll just take the top 1 file to avoid context window explosion diff --git a/src/experts/ingestion_agent.py b/src/experts/ingestion_agent.py index 8239fdc..960f74f 100644 --- a/src/experts/ingestion_agent.py +++ b/src/experts/ingestion_agent.py @@ -1,33 +1,21 @@ import dspy -from pydantic import BaseModel, Field - - -# 1. Define the structure of your metadata -class DocMetadata(BaseModel): - synopsis: str = Field(description="A one-sentence summary of the document.") - tags: list[str] = Field(description="Relevant tags (NPCs, Locations, Items, Plot Points).") - entities: list[str] = Field(description="Key names of people, places, or factions.") - +from typing import List class IngestionSignature(dspy.Signature): """You are an expert Dungeon Master's assistant. Analyze the provided notes and extract a concise synopsis and relevant metadata. + synopsis = A one-sentence summary of the document. + tags = Relevant tags (NPCs, Locations, Items, Plot Points). + entities = Key names of people, places, or factions. + "note -> synopsis:str, tags: list[str], entities: list[str]" + /no_think """ note: str = dspy.InputField(desc="The DM notes or session recap content.") - # By using the Pydantic model as the type, DSPy handles the JSON formatting for you - answer: DocMetadata = dspy.OutputField() + answer: dict[str,str|List] = dspy.OutputField(desc="the metadata dictionary with the keys; synopsis, tags, entities") + class IngestionAgent(dspy.Module): def __init__(self): - super().__init__() - # We use TypedPredictor to enforce the Pydantic schema - # We use ChainOfThought because it helps 8B models "reason" through the tags - # before committing to the final JSON structure. - self.process = dspy.TypedPredictor(IngestionSignature) - - def forward(self, note: str): - # The .answer will now be a DocMetadata object, not a string! - prediction = self.process(note=note) - return prediction + self.ingest = dspy.Predict(IngestionSignature) diff --git a/src/ingest.py b/src/ingest.py index 9c49ffa..eb69e27 100644 --- a/src/ingest.py +++ b/src/ingest.py @@ -18,11 +18,10 @@ MODEL_BASE = CFG["models"]["enrich"] EMBEDDING_MODEL = CFG["models"]["embedding"] API_BASE = CFG["api"]["base_url"] API_VERSION = CFG["api"]["api_version"] -MAX_WORKERS = CFG["ingestion"]["max_workers"] -CHUNK_SIZE = (CFG["ingestion"]["chunk_size"],) -CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"] -EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"] - +MAX_WORKERS=CFG["ingestion"]["max_workers"] +CHUNK_SIZE=CFG["ingestion"]["chunk_size"] +CHUNK_OVERLAP=CFG["ingestion"]["chunk_overlap"] +EMBEDDING_BATCH_SIZE=CFG["ingestion"]["embedding_batch_size"] def load_documents(): docs = [] @@ -64,20 +63,19 @@ def enrich_chunks(chunks: list) -> list: lm_index = idx % 8 try: - with dspy.context( - lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE + API_VERSION) - ): - response = IngestionAgent().forward(note=chunk.page_content) - + with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE+API_VERSION)): + response = IngestionAgent().ingest(note=chunk.page_content) + # This is now an object, not a string! - metadata = response.answer.dict() + metadata = response.answer except Exception as e: print(f"⚠️ Failed for chunk {idx}: {e}") metadata = {"synopsis": "Summary failed", "tags": ["error"], "entities": []} - chunk.metadata.update(metadata) - return chunk + chunk.metadata.update(metadata) + return (idx, chunk) + enriched_results = [] with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: