From d5f8d72e4648b34849ecc576c680b9ec4f7acee6 Mon Sep 17 00:00:00 2001
From: Jake Pullen <hello@jake-is.me>
Date: Tue, 27 Jan 2026 22:04:31 +0000
Subject: [PATCH] =?UTF-8?q?chore:=20=F0=9F=A7=B9=20removing=20clutter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                      |  97 ++++++++++++++++++++++++++++++
 delete_me.py                   |  66 ---------------------
 src/config.yaml                |  22 +++++++
 src/config_loader.py           |  10 ++++
 src/core/model_factory.py      |  50 ----------------
 src/experts/dnd_agent.py       |  62 +++++++++++++++++++
 src/experts/ingestion_agent.py |  39 +++++++-----
 src/experts/orchestrator.py    |  33 -----------
 src/ingest.py                  |  44 ++++++--------
 src/main.py                    | 105 ---------------------------------
 src/retrieve.py                |  63 +-------------------
 src/temp.py                    |  31 ----------
 12 files changed, 235 insertions(+), 387 deletions(-)
 delete mode 100644 delete_me.py
 create mode 100644 src/config.yaml
 create mode 100644 src/config_loader.py
 delete mode 100644 src/core/model_factory.py
 create mode 100644 src/experts/dnd_agent.py
 delete mode 100644 src/experts/orchestrator.py
 delete mode 100644 src/main.py
 delete mode 100644 src/temp.py

diff --git a/README.md b/README.md
index e69de29..586956e 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,97 @@
+This `README.md` is designed to reflect the sophisticated local RAG pipeline you've built, highlighting the multi-threaded enrichment and the DSPy-powered "Smart Retrieval" system.
+
+---
+
+# 🐉 DnD Campaign Oracle: Local RAG Assistant
+
+An advanced Retrieval-Augmented Generation (RAG) system designed for Dungeon Masters. This tool ingests markdown-based campaign notes, enriches them with AI-generated metadata, and provides an interactive terminal interface to query your world’s lore using **DSPy** and **Local LLMs**.
+
+## ⚔️ Key Features
+
+* **Parallel Enrichment:** Utilizes a `ThreadPoolExecutor` to process multiple document chunks simultaneously across local LLM slots for high-speed ingestion.
+* **Structured Metadata:** Uses **DSPy TypedPredictors** and **Pydantic** to force LLMs to output valid JSON synopses, tags, and entity lists.
+* **Deep Context Retrieval:** Unlike standard RAG, this system retrieves relevant chunks and then "peeks" at the full source file to provide the LLM with broader narrative context.
+* **Local-First:** Designed to run entirely on your hardware using **LM Studio** and **FAISS**, keeping your campaign secrets private.
+
+---
+
+## 🏗️ Architecture
+
+1. **Ingestion:** Scans `DATA_DIR` for `.md` files.
+2. **Chunking:** Splits documents into 800-character segments with overlap.
+3. **Enrichment:** A DSPy `IngestionAgent` analyzes each chunk to extract:
+* **Synopsis:** A one-sentence summary.
+* **Tags:** Plot points, item names, or themes.
+* **Entities:** Specific NPCs, Locations, or Factions.
+
+
+4. **Vector Store:** Chunks and metadata are embedded using `text-embedding-qwen3` and stored in a local **FAISS** index.
+5. **Interactive RAG:** A terminal loop that uses **Chain of Thought (CoT)** reasoning to answer queries based on retrieved context.
+
+---
+
+## 🛠️ Setup
+
+### Prerequisites
+
+* **Python 3.10+**
+* **LM Studio:** Running a local server at `http://192.168.0.49:1234` (or your specific IP).
+* **Models:** * Inference: `qwen3-8b` (or similar).
+* Embedding: `text-embedding-qwen3-embedding-8b`.
+
+
+
+### Installation
+
+```bash
+uv sync
+
+```
+
+---
+
+## 🚀 Usage
+
+### 1. Ingest & Enrich
+
+Run the ingestion script to process your markdown files and build the vector database.
+
+```bash
+uv run src/ingest.py
+
+```
+
+### 2. Query the Oracle
+
+Launch the interactive session to ask questions about your campaign.
+
+```bash
+uv run src/retrieve.py
+
+```
+
+**Example Query:**
+
+> `📝 Query: Why did the party get free bread at the Golden Grain Inn?`
+> `📜 AI RESPONSE: Based on the session notes from 'Session_12.md', the party received free bread because the Rogue successfully intimidated the baker's assistant, and the Cleric later performed a minor miracle (Thaumaturgy) that impressed the owner.`
+
+---
+
+## 📂 File Structure
+
+* `ingest.py`: Handles file loading, multi-threaded enrichment, and FAISS storage.
+* `retrieve.py`: The interactive terminal-based retrieval loop.
+* `experts/ingestion_agent.py`: Contains the `IngestionAgent` and Pydantic schemas.
+* `embedding.py`: Custom wrapper for `LocalLMEmbeddings` with batch processing support.
+* `local_faiss_db/`: Directory where the vector index and metadata are persisted.
+
+---
+
+## ⚙️ Configuration
+
+In `ingest_notes.py`, you can tune the processing speed:
+
+* `max_workers=8`: Adjust based on your GPU/CPU capability to handle concurrent LLM requests.
+* `chunk_size=800`: Increase for more context per chunk, decrease for more granular searching.
+
+---
diff --git a/delete_me.py b/delete_me.py
deleted file mode 100644
index 14e0051..0000000
--- a/delete_me.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# class PrecomputedEmbeddings(Embeddings):
-#     def __init__(self, embeddings: List[List[float]]):
-#         self.embeddings = embeddings  # Store all precomputed vectors
-
-#     def embed_documents(self, texts: List[str]) -> List[List[float]]:
-#         return self.embeddings  # Return the precomputed ones (order must match!)
-    
-#     def embed_query(self, text):
-#         return self.embeddings[0]
-
-# def embedder(texts: List[str]) -> List[List[float]]:
-#     embeddings = []
-#     base_url = "http://192.168.0.49:1234"  # ✅ Add 'http://'
-#     embed_url = f"{base_url}/v1/embeddings"
-#     headers = {"Content-Type": "application/json"}
-
-#     for text in texts:
-#         payload = {
-#             "model": "text-embedding-qwen3-embedding-8b",
-#             "input": text
-#         }
-
-#         try:
-#             response = requests.post(embed_url, json=payload, headers=headers)  # ✅ POST not GET
-#             if response.status_code == 200:
-#                 data = response.json()  # ✅ Parse JSON!
-#                 embedding = data["data"][0]["embedding"]  # ✅ Extract the actual vector
-#                 embeddings.append(embedding)
-#             else:
-#                 print(f"❌ Embedding failed for '{text[:30]}...': {response.status_code} - {response.text}")
-#                 # Optionally: insert placeholder zeros if you need to continue
-#                 # embeddings.append([0.0] * 768)  # ← adjust dimension as needed!
-#         except Exception as e:
-#             print(f"⚠️ Exception embedding '{text[:30]}...': {e}")
-#             # embeddings.append([0.0] * 768)  # fallback
-
-#     return embeddings
-
-# def store_chunks_with_embeddings_locally(chunks, db_path="./local_faiss_db"):
-#     """
-#     Stores pre-computed chunks and their embeddings into a local FAISS database.
-    
-#     Args:
-#         chunks: list of LangChain Document objects (with page_content and metadata)
-#         embeddings: list of embedding vectors (list of lists of floats) — must match length of chunks
-#         db_path: where to save the FAISS index files locally
-#     """
-
-#     texts = [chunk.page_content for chunk in chunks]
-#     embeddings = embedder(texts)
-#     if len(chunks) != len(embeddings):
-#         raise ValueError(f"Mismatch! Got {len(chunks)} chunks but {len(embeddings)} embeddings.")
-
-#     # Create LangChain Document list (we already have this)
-#     documents = chunks  # assuming they're already Document objects
-
-#     # Build FAISS vectorstore using precomputed embeddings
-#     # FAISS.from_embeddings() lets us pass our own embeddings + texts
-#     vectorstore = FAISS.from_embeddings(
-#         text_embeddings=list(zip([doc.page_content for doc in documents], embeddings)),
-#         embedding=PrecomputedEmbeddings(embeddings[0])  # We’ll define this next
-#     )
-
-#     # Save to disk
-#     vectorstore.save_local(db_path)
-#     print(f"✅ Successfully stored {len(chunks)} chunks + embeddings into local FAISS DB at '{db_path}'")
\ No newline at end of file
diff --git a/src/config.yaml b/src/config.yaml
new file mode 100644
index 0000000..d3da9ff
--- /dev/null
+++ b/src/config.yaml
@@ -0,0 +1,22 @@
+# --- Connection Settings ---
+api:
+  base_url: "http://192.168.0.49:1234"
+  api_version: "/v1/"
+
+# --- Model Settings ---
+models:
+  inference: "lm_studio/qwen/qwen3-8b"
+  embedding: "text-embedding-qwen3-embedding-8b"
+
+# --- Ingestion Settings ---
+ingestion:
+  data_dir: "/home/cosmic/DnD"
+  db_path: "./local_faiss_db"
+  max_workers: 8
+  chunk_size: 800
+  chunk_overlap: 100
+
+# --- Retrieval Settings ---
+retrieval:
+  top_k: 4
+  context_limit: 10000  # Max characters from full file context
\ No newline at end of file
diff --git a/src/config_loader.py b/src/config_loader.py
new file mode 100644
index 0000000..7198ccb
--- /dev/null
+++ b/src/config_loader.py
@@ -0,0 +1,10 @@
+import yaml
+from pathlib import Path
+
+def load_config(config_path="src/config.yaml"):
+    with open(config_path, "r") as f:
+        return yaml.safe_load(f)
+
+# Usage example:
+# CFG = load_config()
+# print(CFG['api']['base_url'])
\ No newline at end of file
diff --git a/src/core/model_factory.py b/src/core/model_factory.py
deleted file mode 100644
index 7743152..0000000
--- a/src/core/model_factory.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""Model Factory for creating language model instances.
-
-Separates model creation logic from configuration.
-"""
-
-import dspy
-from config import Config
-
-
-class ModelFactory:
-    """Factory class for creating language model instances based on configuration."""
-
-    @staticmethod
-    def create_dspy_model(agent_type: str, agent_name: str = None) -> dspy.LM:
-        """Create a dspy.LM object for a specific agent with conditional parameters.
-
-        Only includes api_base and api_key if they are configured.
-
-        Args:
-            agent_type (str): 'orchestrator' or 'expert'
-            agent_name (str): For experts, specific agent name like 'weather', 'games'
-
-        Returns:
-            dspy.LM: Configured language model object
-
-        """
-        config = Config.Model.get_agent_config(agent_type, agent_name)
-
-        # Build dspy.LM parameters conditionally
-        lm_params = {"model": f"{config['provider']}/{config['model_name']}"}
-
-        # Only add api_base if it's configured (not None)
-        if config.get("api_base"):
-            lm_params["api_base"] = config["api_base"]
-
-        # Only add api_key if it's configured (not None)
-        if config.get("api_key"):
-            lm_params["api_key"] = config["api_key"]
-
-        return dspy.LM(**lm_params)
-
-    @staticmethod
-    def create_orchestrator_model() -> dspy.LM:
-        """Create orchestrator model."""
-        return ModelFactory.create_dspy_model("orchestrator")
-
-    @staticmethod
-    def create_weather_model() -> dspy.LM:
-        """Create weather expert model."""
-        return ModelFactory.create_dspy_model("expert", "ingest")
diff --git a/src/experts/dnd_agent.py b/src/experts/dnd_agent.py
new file mode 100644
index 0000000..f6af137
--- /dev/null
+++ b/src/experts/dnd_agent.py
@@ -0,0 +1,62 @@
+import dspy
+from langchain_community.vectorstores import FAISS
+from embedding import LocalLMEmbeddings
+from pathlib import Path
+
+# --- DSPy Signature ---
+class DnDContextQA(dspy.Signature):
+    """Answer DnD campaign questions using provided snippets and full file context.
+    /no_think"""
+    context = dspy.InputField(desc="Relevant chunks and full file contents from the campaign notes.")
+    question = dspy.InputField()
+    answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
+
+# --- DSPy Module ---
+class DnDRAG(dspy.Module):
+    def __init__(self, db_path="./local_faiss_db", k=3):
+        super().__init__()
+        # 1. Setup Embeddings & Load FAISS
+        self.embeddings = LocalLMEmbeddings(
+            model="text-embedding-qwen3-embedding-8b", 
+            base_url="http://192.168.0.49:1234"
+        )
+        self.vectorstore = FAISS.load_local(
+            db_path, self.embeddings, allow_dangerous_deserialization=True
+        )
+        self.k = k
+        
+        # 2. Setup the Predictor (Chain of Thought for better reasoning)
+        self.generate_answer = dspy.ChainOfThought(DnDContextQA)
+
+    def get_full_file_content(self, file_path):
+        """Helper to read the full source file if it exists."""
+        try:
+            return Path(file_path).read_text(encoding='utf-8')
+        except Exception:
+            return ""
+
+    def forward(self, question):
+        # 1. Search for top-k chunks
+        results = self.vectorstore.similarity_search(question, k=self.k)
+        
+        # 2. Extract unique file paths to load "Full Context"
+        # This prevents the LLM from being 'blind' to the rest of a relevant session note
+        unique_paths = list(set([doc.metadata.get("full_path") for doc in results]))
+        
+        context_parts = []
+        for i, doc in enumerate(results):
+            source = doc.metadata.get("source", "Unknown")
+            context_parts.append(f"--- Chunk {i+1} from {source} ---\n{doc.page_content}")
+
+        # 3. Add the Full Content of the top match (optional, but requested!)
+        # We'll just take the top 1 file to avoid context window explosion
+        if unique_paths:
+            top_file_content = self.get_full_file_content(unique_paths[0])
+            context_parts.append(f"\n=== FULL SOURCE FILE: {Path(unique_paths[0]).name} ===\n{top_file_content[:10000]}")
+
+        # 4. Join everything into one context string
+        context_str = "\n\n".join(context_parts)
+        
+        # 5. Generate Response
+        prediction = self.generate_answer(context=context_str, question=question)
+        return dspy.Prediction(answer=prediction.answer, context=context_str)
\ No newline at end of file
diff --git a/src/experts/ingestion_agent.py b/src/experts/ingestion_agent.py
index 150d13c..acd3f64 100644
--- a/src/experts/ingestion_agent.py
+++ b/src/experts/ingestion_agent.py
@@ -1,22 +1,31 @@
 import dspy
+from pydantic import BaseModel, Field
+from typing import List
 
+# 1. Define the structure of your metadata
+class DocMetadata(BaseModel):
+    synopsis: str = Field(description="A one-sentence summary of the document.")
+    tags: List[str] = Field(description="Relevant tags (NPCs, Locations, Items, Plot Points).")
+    entities: List[str] = Field(description="Key names of people, places, or factions.")
 
-class ingestionSignature(dspy.Signature):
-    """You are going to be given dungeon masters notes, on session plans, recaps, npcs, players.
-    You must summarize these document in one sentence
-    and extract as many relevant tags aspossible as a JSON list:
-    {{'synopsis': '...', 'tags': [...]}}\n\nDocument:\n{content}"
-    /no_think
+class IngestionSignature(dspy.Signature):
     """
-
-    note: str = dspy.InputField()
-    answer: str = dspy.OutputField()
-
+    You are an expert Dungeon Master's assistant.
+    Analyze the provided notes and extract a concise synopsis and relevant metadata.
+    """
+    note: str = dspy.InputField(desc="The DM notes or session recap content.")
+    # By using the Pydantic model as the type, DSPy handles the JSON formatting for you
+    answer: DocMetadata = dspy.OutputField()
 
 class IngestionAgent(dspy.Module):
-    """The Ingestion Agent is responsible for Document tagging and summarising."""
-
     def __init__(self):
-        """Initialize the Oracle with available expert tools."""
-        # self.tools = []
-        self.ingest = dspy.Predict(signature=ingestionSignature)
+        super().__init__()
+        # We use TypedPredictor to enforce the Pydantic schema
+        # We use ChainOfThought because it helps 8B models "reason" through the tags 
+        # before committing to the final JSON structure.
+        self.process = dspy.TypedPredictor(IngestionSignature)
+
+    def forward(self, note: str):
+        # The .answer will now be a DocMetadata object, not a string!
+        prediction = self.process(note=note)
+        return prediction
\ No newline at end of file
diff --git a/src/experts/orchestrator.py b/src/experts/orchestrator.py
deleted file mode 100644
index f773366..0000000
--- a/src/experts/orchestrator.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import dspy
-
-from core import ModelFactory
-
-from .file import FileAgent
-
-
-class OrchestratorSignature(dspy.Signature):
-    """ """
-
-    question: str = dspy.InputField()
-    history: dspy.History = dspy.InputField()
-    answer: str = dspy.OutputField()
-
-
-class TheOracle(dspy.Module):
-    """The Oracle is the orchestrator of all the agents."""
-
-    def __init__(self):
-        """Initialize the Oracle with available expert tools."""
-        self.tools = [
-            self.consult_file_expert,
-        ]
-        self.oracle = dspy.ReAct(signature=OrchestratorSignature, tools=self.tools, max_iters=10)
-
-    def consult_file_expert(self, command: str) -> str:
-        """Use this expert when you want to save or retrieve information from files.
-
-        Also used to find files and update files
-        """
-        with dspy.context(lm=ModelFactory.create_file_model()):
-            result = FileAgent().file_agent(command=command)
-            return result.answer
diff --git a/src/ingest.py b/src/ingest.py
index ba5fca2..cb2733e 100644
--- a/src/ingest.py
+++ b/src/ingest.py
@@ -10,8 +10,11 @@ from tqdm import tqdm
 
 from embedding import LocalLMEmbeddings
 from experts.ingestion_agent import IngestionAgent
+from config_loader import load_config
 
-DATA_DIR = "/home/cosmic/DnD"
+
+CFG = load_config()
+DATA_DIR = CFG["ingestion"]["data_dir"]
 
 def load_documents():
     docs = []
@@ -41,47 +44,38 @@ def load_documents():
 def chunk_documents(docs):
     # LangChain preserves metadata during splitting automatically
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=800, 
-        chunk_overlap=100, 
+        chunk_size=CFG["ingestion"]["chunk_size"], 
+        chunk_overlap=CFG["ingestion"]["chunk_overlap"], 
         separators=["\n\n", "\n", ". ", " ", ""]
     )
     return text_splitter.split_documents(docs)
 
 def enrich_chunks(chunks: list) -> list:
-    MODEL_BASE = "lm_studio/qwen/qwen3-8b"
-    API_BASE = "http://192.168.0.49:1234/v1/"
+    MODEL_BASE = CFG["models"]["inference"]
+    API_BASE = CFG["api"]["base_url"]
+    API_VERSION = CFG["api"]["api_version"]
     
     def process_single_chunk(indexed_chunk):
         idx, chunk = indexed_chunk
         lm_index = idx % 8
         
         try:
-            # Configure context for this specific thread
-            with dspy.context(lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE)):
-                # Pass the text, but we will update the original chunk object
-                response = IngestionAgent().ingest(note=chunk.page_content) 
+            with dspy.context(lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE+API_VERSION)):
+                response = IngestionAgent().forward(note=chunk.page_content)
                 
-                answer = response.answer
-                start = answer.find("{")
-                end = answer.rfind("}") + 1
-                metadata_extracted = json.loads(answer[start:end])
-                
-                # UPDATE: Put AI data in a sub-key to avoid overwriting 'source'
-                chunk.metadata["enrichment"] = metadata_extracted
-                # Also flatten tags for easier searching if needed
-                if "tags" in metadata_extracted:
-                    chunk.metadata["tags"] = metadata_extracted["tags"]
+                # This is now an object, not a string!
+                metadata = response.answer.dict() 
 
         except Exception as e:
-            # If enrichment fails, we KEEP the chunk but flag the error
-            # This ensures 'source' and 'full_path' are NEVER lost
-            chunk.metadata["enrichment_error"] = str(e)
-            chunk.metadata["tags"] = ["error"]
+            print(f"⚠️ Failed for chunk {idx}: {e}")
+            metadata = {"synopsis": "Summary failed", "tags": ["error"], "entities": []}
 
-        return idx, chunk
+            chunk.metadata.update(metadata)
+            return chunk
+            
 
     enriched_results = []
-    with ThreadPoolExecutor(max_workers=8) as executor:
+    with ThreadPoolExecutor(max_workers=CFG["ingestion"]["max_workers"]) as executor:
         # Wrap chunks in enumerate to keep track of order
         futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
 
diff --git a/src/main.py b/src/main.py
deleted file mode 100644
index 58751af..0000000
--- a/src/main.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import chromadb
-import streamlit as st
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain_community.llms import Ollama
-from langchain_core.prompts import PromptTemplate
-
-# CONFIG
-BASE_IP = "192.168.0.49"
-LM_STUDIO_PORT = "1234"
-CHROMA_PATH = "vector_db"
-MODEL_NAME = (
-    "lmstudio-community/qwen/qwen3-next-80b-a3b-instruct-q8_0.gguf"  # Use "llama3", "phi3", etc.
-)
-EMBEDDING_MODEL = "all-MiniLM-L6-v2"
-
-# Load embedding model
-embedder = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-
-# Load local LLM for answering
-llm = Ollama(model=MODEL_NAME, temperature=0.3)
-
-# Initialize Chroma client
-client = chromadb.PersistentClient(path=CHROMA_PATH)
-collection = client.get_collection("documents")
-
-# Prompt template
-prompt_template = """
-You are a helpful assistant that answers questions using ONLY the context provided.
-Do not make up information or use external knowledge.
-
-Question: {question}
-
-Context:
-{context}
-
-If you cannot find an answer, say "I don't know based on the provided documents."
-
-Answer:
-"""
-
-prompt = PromptTemplate.from_template(prompt_template)
-
-# Streamlit UI
-st.title("📄 Local RAG Knowledge Assistant")
-st.write("Upload files to `documents/` and run `ingest.py` first.")
-
-query = st.text_input(
-    "Ask a question about your documents:", placeholder="What are the key financial metrics?"
-)
-
-if query:
-    with st.spinner("Searching for relevant info..."):
-        # Embed query
-        query_embedding = embedder.embed_query(query)
-
-        # Retrieve top 5 most similar chunks
-        results = collection.query(
-            query_embeddings=[query_embedding], n_results=5, include=["documents", "metadatas"]
-        )
-
-        documents = results["documents"][0]
-        metadatas = results["metadatas"][0]
-
-        # Build context from retrieved chunks + metadata
-        context = ""
-        for i, doc in enumerate(documents):
-            meta = metadatas[i]
-            synopsis = meta.get("synopsis", "No summary")
-            tags = (
-                ", ".join(meta.get("tags", []))
-                if isinstance(meta.get("tags"), list)
-                else str(meta.get("tags"))
-            )
-            source = meta.get("source", "Unknown")
-
-            context += f"""
---- Document Snippet ---
-{doc}
-
-Synopsis: {synopsis}
-Tags: {tags}
-Source: {source}
----
-"""
-
-        # Ask LLM
-        full_prompt = prompt.format(question=query, context=context)
-
-    with st.spinner("Generating answer..."):
-        response = llm.invoke(full_prompt)
-
-    st.subheader("🔍 Answer:")
-    st.write(response)
-
-    st.subheader("📚 Sources (retrieved chunks):")
-    for i, doc in enumerate(documents):
-        meta = metadatas[i]
-        source = meta.get("source", "Unknown")
-        tags = (
-            ", ".join(meta.get("tags", []))
-            if isinstance(meta.get("tags"), list)
-            else str(meta.get("tags"))
-        )
-        st.markdown(f"**Source**: `{source}` | **Tags**: {tags}")
-        st.text_area(f"Snippet {i + 1}", doc, height=120, disabled=True)
diff --git a/src/retrieve.py b/src/retrieve.py
index f3f3c9f..b5dda6a 100644
--- a/src/retrieve.py
+++ b/src/retrieve.py
@@ -1,67 +1,6 @@
 import sys
 import dspy
-from langchain_community.vectorstores import FAISS
-from embedding import LocalLMEmbeddings
-from pathlib import Path
-
-# --- DSPy Signature ---
-class DnDContextQA(dspy.Signature):
-    """Answer DnD campaign questions using provided snippets and full file context."""
-    context = dspy.InputField(desc="Relevant chunks and full file contents from the campaign notes.")
-    question = dspy.InputField()
-    answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
-
-# --- DSPy Module ---
-class DnDRAG(dspy.Module):
-    def __init__(self, db_path="./local_faiss_db", k=3):
-        super().__init__()
-        # 1. Setup Embeddings & Load FAISS
-        self.embeddings = LocalLMEmbeddings(
-            model="text-embedding-qwen3-embedding-8b", 
-            base_url="http://192.168.0.49:1234"
-        )
-        self.vectorstore = FAISS.load_local(
-            db_path, self.embeddings, allow_dangerous_deserialization=True
-        )
-        self.k = k
-        
-        # 2. Setup the Predictor (Chain of Thought for better reasoning)
-        self.generate_answer = dspy.ChainOfThought(DnDContextQA)
-
-    def get_full_file_content(self, file_path):
-        """Helper to read the full source file if it exists."""
-        try:
-            return Path(file_path).read_text(encoding='utf-8')
-        except Exception:
-            return ""
-
-    def forward(self, question):
-        # 1. Search for top-k chunks
-        results = self.vectorstore.similarity_search(question, k=self.k)
-        
-        # 2. Extract unique file paths to load "Full Context"
-        # This prevents the LLM from being 'blind' to the rest of a relevant session note
-        unique_paths = list(set([doc.metadata.get("full_path") for doc in results]))
-        
-        context_parts = []
-        for i, doc in enumerate(results):
-            source = doc.metadata.get("source", "Unknown")
-            context_parts.append(f"--- Chunk {i+1} from {source} ---\n{doc.page_content}")
-
-        # 3. Add the Full Content of the top match (optional, but requested!)
-        # We'll just take the top 1 file to avoid context window explosion
-        if unique_paths:
-            top_file_content = self.get_full_file_content(unique_paths[0])
-            context_parts.append(f"\n=== FULL SOURCE FILE: {Path(unique_paths[0]).name} ===\n{top_file_content[:10000]}")
-
-        # 4. Join everything into one context string
-        context_str = "\n\n".join(context_parts)
-        
-        # 5. Generate Response
-        prediction = self.generate_answer(context=context_str, question=question)
-        return dspy.Prediction(answer=prediction.answer, context=context_str)
-    
-
+from experts.dnd_agent import DnDRAG
 
 def main():
     # 1. Setup the LLM
diff --git a/src/temp.py b/src/temp.py
deleted file mode 100644
index da1f7d5..0000000
--- a/src/temp.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from langchain_community.vectorstores import FAISS
-
-from embedding import LocalLMEmbeddings
-
-
-def retrieve_enriched_context(query, db_path="./local_faiss_db"):
-    # 1. Re-initialize the same embedding model
-    embeddings_model = LocalLMEmbeddings(
-        model="text-embedding-qwen3-embedding-8b", base_url="http://192.168.0.49:1234"
-    )
-
-    # 2. Load the index from disk
-    # allow_dangerous_deserialization is required because FAISS uses pickle
-    vectorstore = FAISS.load_local(db_path, embeddings_model, allow_dangerous_deserialization=True)
-
-    # 3. Perform the search
-    # k=4 means "bring back the top 4 most relevant chunks"
-    results_with_scores = vectorstore.similarity_search_with_score(query, k=4)
-
-    return results_with_scores
-
-
-# --- Example Usage ---
-query = "the party get free bread but i cant remember why?"
-hits = retrieve_enriched_context(query)
-
-for doc, score in hits:
-    print(f"\n🎯 [Score: {score:.4f}]")
-    print(f"📄 Content: {doc.page_content[:200]}...")
-    print(f"🛠️ Metadata (Enrichment): {doc.metadata}")
-#    print(f"doc: {doc}")