feat: ✨ Ingestion PoC success

2026-01-27 19:04:50 +00:00
parent 7fa8df92e3
commit 645e9461ce
15 changed files with 2866 additions and 8 deletions
@@ -0,0 +1,231 @@
+import os
+from pathlib import Path
+from types import SimpleNamespace
+
+import yaml
+
+from .user_config import UserConfig
+
+
+class Config:
+    """Main Config Class for application-level configuration."""
+
+    ENVIRONMENT = "dev"
+    DEBUG = True
+    LOG_LEVEL = "DEBUG"
+    FILE_SEARCH_DIRECTORIES = [os.path.expanduser("~")]
+    FILE_SEARCH_DIRECTORIES.extend(UserConfig.FILE_SEARCH_DIRECTORIES)
+
+    class Model:
+        """Application-level model configuration with inheritance support."""
+
+        # Application-level defaults for all agents
+        # TODO: We need to decide on what we want our defaults to be,
+        # would we advise shipping with lm_studio or ollama?
+        # These can be overridden by user_config.py
+        PROVIDER = "ollama_chat"
+        MODEL_NAME = "qwen3:latest"
+
+        # Default connection settings (None = no custom connection)
+        HOST_ADDRESS = None
+        HOST_PORT = None
+        HOST_API_KEY = None
+        HOST_API_PATH = None  # e.g., "v1" for OpenAI-compatible APIs
+
+        # Application-level agent configurations (usually empty)
+        ORCHESTRATOR = {}
+
+        EXPERTS = {"default": {}, "weather": {}, "games": {}, "lighting": {}}
+
+        # Helper method to get merged configuration (app + user)
+        @classmethod
+        def _get_base_config(cls):
+            """Get base configuration with provider and model settings."""
+            base_config = {
+                "provider": cls.PROVIDER,
+                "model_name": cls.MODEL_NAME,
+            }
+
+            # Add base connection settings only if they exist
+            if hasattr(cls, "HOST_ADDRESS") and cls.HOST_ADDRESS:
+                api_path = getattr(cls, "HOST_API_PATH", "") or ""
+                base_config["api_base"] = (
+                    f"http://{cls.HOST_ADDRESS}:{cls.HOST_PORT}/{api_path}"
+                )
+            if hasattr(cls, "HOST_API_KEY") and cls.HOST_API_KEY:
+                base_config["api_key"] = cls.HOST_API_KEY
+
+            return base_config
+
+        @classmethod
+        def _merge_user_config(cls, base_config):
+            """Merge user configuration overrides with base config."""
+            try:
+                user_model_config = UserConfig.Model
+
+                # Override base config with user settings
+                if hasattr(user_model_config, "PROVIDER"):
+                    base_config["provider"] = user_model_config.PROVIDER
+                if hasattr(user_model_config, "MODEL_NAME"):
+                    base_config["model_name"] = user_model_config.MODEL_NAME
+                if (
+                    hasattr(user_model_config, "HOST_ADDRESS")
+                    and user_model_config.HOST_ADDRESS
+                ):
+                    api_path = getattr(user_model_config, "HOST_API_PATH", "") or ""
+                    base_config["api_base"] = (
+                        f"http://{user_model_config.HOST_ADDRESS}:"
+                        f"{user_model_config.HOST_PORT}/{api_path}"
+                    )
+                if (
+                    hasattr(user_model_config, "HOST_API_KEY")
+                    and user_model_config.HOST_API_KEY
+                ):
+                    base_config["api_key"] = user_model_config.HOST_API_KEY
+
+                return user_model_config
+            except ImportError:
+                return None
+
+        @classmethod
+        def get_agent_config(cls, agent_type, agent_name=None):
+            """Get configuration for a specific agent type and name.
+
+            Merges application config with user config overrides.
+
+            Args:
+                agent_type (str): 'orchestrator' or 'expert'
+                agent_name (str): For experts, specific agent name like
+                    'weather', 'games'
+
+            Returns:
+                dict: Complete configuration for the agent
+
+            """
+            base_config = cls._get_base_config()
+            user_model_config = cls._merge_user_config(base_config)
+
+            # Get application-level agent config
+            if agent_type.lower() == "orchestrator":
+                return cls._get_orchestrator_config(base_config, user_model_config)
+            elif agent_type.lower() == "expert":
+                return cls._get_expert_config(
+                    base_config, user_model_config, agent_name
+                )
+            else:
+                return base_config
+
+        @classmethod
+        def _get_orchestrator_config(cls, base_config, user_model_config):
+            """Get orchestrator-specific configuration."""
+            app_agent_config = getattr(cls, "ORCHESTRATOR", {})
+            user_agent_config = (
+                getattr(user_model_config, "ORCHESTRATOR", {})
+                if user_model_config
+                else {}
+            )
+            return {**base_config, **app_agent_config, **user_agent_config}
+
+        @classmethod
+        def _get_expert_config(cls, base_config, user_model_config, agent_name):
+            """Get expert-specific configuration."""
+            app_experts_config = getattr(cls, "EXPERTS", {})
+            user_experts_config = (
+                getattr(user_model_config, "EXPERTS", {}) if user_model_config else {}
+            )
+
+            # Start with default expert config
+            app_expert_config = app_experts_config.get("default", {})
+            user_expert_default = user_experts_config.get("default", {})
+            expert_config = {**app_expert_config, **user_expert_default}
+
+            # If specific agent name provided, merge its config
+            if agent_name:
+                app_specific_config = app_experts_config.get(agent_name, {})
+                user_specific_config = user_experts_config.get(agent_name, {})
+                expert_config = {
+                    **expert_config,
+                    **app_specific_config,
+                    **user_specific_config,
+                }
+
+            return {**base_config, **expert_config}
+
+    class Weather:
+        """Weather-related configuration and mappings."""
+
+        CODE_MAP = {
+            0: "Clear sky",
+            1: "Mainly clear",
+            2: "Partly cloudy",
+            3: "Overcast",
+            45: "Fog",
+            48: "Depositing rime fog",
+            51: "Light drizzle",
+            53: "Moderate drizzle",
+            55: "Dense drizzle",
+            56: "Light freezing drizzle",
+            57: "Dense freezing drizzle",
+            61: "Slight rain",
+            63: "Moderate rain",
+            65: "Heavy rain",
+            66: "Light freezing rain",
+            67: "Heavy freezing rain",
+            71: "Slight snow",
+            73: "Moderate snow",
+            75: "Heavy snow",
+            77: "Snow grains",
+            80: "Slight rain showers",
+            81: "Moderate rain showers",
+            82: "Violent rain showers",
+            85: "Slight snow showers",
+            86: "Heavy snow showers",
+            95: "Thunderstorm",
+            96: "Thunderstorm with slight hail",
+            99: "Thunderstorm with heavy hail",
+        }
+
+    @classmethod  # Load from YAML
+    def load_yaml(cls, file_path="src/config/config.yaml"):
+        """Load configuration from YAML file.
+
+        Args:
+            file_path (str): Path to the YAML configuration file.
+
+        Returns:
+            AppConfig: Configuration instance with loaded settings.
+
+        """
+        yaml_file = Path(file_path)
+        if not yaml_file.exists():
+            default_dict = {"DEBUG": True}
+            with open(yaml_file, "w") as f:
+                yaml.dump(default_dict, f)
+
+        with open(yaml_file) as f:
+            config_data = yaml.safe_load(f)
+
+        # Populate lights and rooms
+        lights_data = config_data.get("lights", {})
+        rooms_data = config_data.get("rooms", {})
+
+        class Lights:
+            pass
+
+        class Rooms:
+            pass
+
+        for lightname, light_config in lights_data.items():
+            light_obj = SimpleNamespace(light_config)
+            setattr(Lights, lightname.replace(" ", "").lower(), light_obj)
+
+        for roomname, room_config in rooms_data.items():
+            room_obj = SimpleNamespace(room_config)
+            setattr(Rooms, roomname.replace(" ", "").lower(), room_obj)
+
+        cls.Lights = Lights
+        cls.Rooms = Rooms
+
+
+# Load the YAML config when the module is imported
+Config.load_yaml()
@@ -0,0 +1,31 @@
+"""User-specific configuration file.
+
+DO NOT commit user_config.py to version control!
+"""
+
+
+class UserConfig:
+    """User-specific model configurations - override application defaults."""
+
+    class Model:
+        """Personal model preferences and overrides."""
+
+        # Base model overrides (affects all agents unless specifically overridden)
+        PROVIDER = "lm_studio"
+        MODEL_NAME = "openai/gpt-oss-20b"
+        HOST_ADDRESS = "192.168.0.49"
+        HOST_PORT = "1234"
+        HOST_API_KEY = "no-key"
+        HOST_API_PATH = "v1"
+
+        # Orchestrator personal config
+        ORCHESTRATOR = {}
+        # Expert agents personal config
+        EXPERTS = {
+            "default": {
+                "model_name": "qwen/qwen3-coder-30b",
+            },
+            "ingest": {},
+            "ask": {},
+        }
+
@@ -0,0 +1,97 @@
+"""User-specific configuration file.
+
+Copy this to user_config.py and customize with your personal settings.
+DO NOT commit user_config.py to version control!
+"""
+
+
+class UserConfig:
+    """User-specific model configurations - override application defaults."""
+
+    # List of file paths you want the AI to start in when searching for files
+    # We already default to your user home folder
+    FILE_SEARCH_DIRECTORIES = []
+
+    class Model:
+        """Personal model preferences and overrides."""
+
+        # Personal model preferences
+        # Uncomment and modify as needed
+
+        # Base model overrides (affects all agents unless specifically overridden)
+        PROVIDER = "lm_studio"
+        MODEL_NAME = "openai/gpt-oss-20b"
+        HOST_ADDRESS = "127.0.0.1"
+        HOST_PORT = "1234"
+        HOST_API_KEY = "your-personal-key"
+        HOST_API_PATH = "v1"
+
+        # Orchestrator personal config
+        ORCHESTRATOR = {
+            # 'model_name': 'gpt-4',
+            # 'api_base': 'https://api.openai.com/v1',
+            # 'api_key': 'your-openai-key'
+        }
+
+        # Expert agents personal config
+        # if using multiple models from your host set above
+        # you only need to add the model name.
+        EXPERTS = {
+            "default": {
+                # 'model_name': 'claude-3-sonnet',
+                # 'api_base': 'https://api.anthropic.com',
+                # 'api_key': 'your-anthropic-key'
+            },
+            "weather": {
+                # 'model_name': 'gpt-4-turbo',
+                # 'api_base': 'https://api.openai.com/v1',
+                # 'api_key': 'your-openai-key'
+            },
+            "games": {
+                # 'model_name': 'claude-3-opus',
+                # 'api_base': 'https://api.anthropic.com',
+                # 'api_key': 'your-anthropic-key'
+            },
+        }
+
+
+# Example configurations:
+#
+# Use local Ollama with custom port:
+# class Model:
+#     HOST_ADDRESS = '127.0.0.1'
+#     HOST_PORT = '11434'
+#     HOST_API_KEY = 'local'
+#
+# Use OpenAI for everything:
+# class Model:
+#     PROVIDER = 'openai_chat'
+#     MODEL_NAME = 'gpt-4'
+#     ORCHESTRATOR = {
+#         'api_base': 'https://api.openai.com/v1',
+#         'api_key': 'your-openai-key'
+#     }
+#     EXPERTS = {
+#         'default': {
+#             'api_base': 'https://api.openai.com/v1',
+#             'api_key': 'your-openai-key'
+#         }
+#     }
+#
+# Mixed providers:
+# class Model:
+#     ORCHESTRATOR = {
+#         'model_name': 'gpt-4',
+#         'api_base': 'https://api.openai.com/v1',
+#         'api_key': 'your-openai-key'
+#     }
+#     EXPERTS = {
+#         'weather': {
+#             'model_name': 'claude-3-sonnet',
+#             'api_base': 'https://api.anthropic.com',
+#             'api_key': 'your-anthropic-key'
+#         },
+#         'games': {
+#             'model_name': 'llama3:8b'  # Uses local Ollama
+#         }
+#     }
@@ -0,0 +1,51 @@
+"""Model Factory for creating language model instances.
+
+Separates model creation logic from configuration.
+"""
+
+import dspy
+
+from config import Config
+
+
+class ModelFactory:
+    """Factory class for creating language model instances based on configuration."""
+
+    @staticmethod
+    def create_dspy_model(agent_type: str, agent_name: str = None) -> dspy.LM:
+        """Create a dspy.LM object for a specific agent with conditional parameters.
+
+        Only includes api_base and api_key if they are configured.
+
+        Args:
+            agent_type (str): 'orchestrator' or 'expert'
+            agent_name (str): For experts, specific agent name like 'weather', 'games'
+
+        Returns:
+            dspy.LM: Configured language model object
+
+        """
+        config = Config.Model.get_agent_config(agent_type, agent_name)
+
+        # Build dspy.LM parameters conditionally
+        lm_params = {"model": f"{config['provider']}/{config['model_name']}"}
+
+        # Only add api_base if it's configured (not None)
+        if config.get("api_base"):
+            lm_params["api_base"] = config["api_base"]
+
+        # Only add api_key if it's configured (not None)
+        if config.get("api_key"):
+            lm_params["api_key"] = config["api_key"]
+
+        return dspy.LM(**lm_params)
+
+    @staticmethod
+    def create_orchestrator_model() -> dspy.LM:
+        """Create orchestrator model."""
+        return ModelFactory.create_dspy_model("orchestrator")
+
+    @staticmethod
+    def create_weather_model() -> dspy.LM:
+        """Create weather expert model."""
+        return ModelFactory.create_dspy_model("expert", "ingest")
@@ -0,0 +1,23 @@
+import dspy
+
+class ingestionSignature(dspy.Signature):
+    """You are going to be given dungeon masters notes, on session plans, recaps, npcs, players.
+    You must summarize these document in one sentence 
+    and extract as many relevant tags aspossible as a JSON list:
+    {{'synopsis': '...', 'tags': [...]}}\n\nDocument:\n{content}"
+    /no_think
+    """
+
+    note: str = dspy.InputField()
+    answer: str = dspy.OutputField()
+
+
+class IngestionAgent(dspy.Module):
+    """The Ingestion Agent is responsible for Document tagging and summarising."""
+
+    def __init__(self):
+        """Initialize the Oracle with available expert tools."""
+        # self.tools = []
+        self.ingest = dspy.Predict(
+            signature=ingestionSignature
+        )
@@ -0,0 +1,36 @@
+import dspy
+
+from core import ModelFactory
+
+from .file import FileAgent
+
+
+class OrchestratorSignature(dspy.Signature):
+    """
+    """
+
+    question: str = dspy.InputField()
+    history: dspy.History = dspy.InputField()
+    answer: str = dspy.OutputField()
+
+
+class TheOracle(dspy.Module):
+    """The Oracle is the orchestrator of all the agents."""
+
+    def __init__(self):
+        """Initialize the Oracle with available expert tools."""
+        self.tools = [
+            self.consult_file_expert,
+        ]
+        self.oracle = dspy.ReAct(
+            signature=OrchestratorSignature, tools=self.tools, max_iters=10
+        )
+
+    def consult_file_expert(self, command: str) -> str:
+        """Use this expert when you want to save or retrieve information from files.
+
+        Also used to find files and update files
+        """
+        with dspy.context(lm=ModelFactory.create_file_model()):
+            result = FileAgent().file_agent(command=command)
+            return result.answer
@@ -0,0 +1,229 @@
+# ingest.py
+
+import os
+import json
+import dspy
+import turso
+import requests
+
+import json
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import List
+from tqdm import tqdm
+from langchain_core.embeddings import Embeddings
+from langchain_community.vectorstores import FAISS
+from langchain_core.documents import Document
+from typing import List
+from pathlib import Path
+from langchain_community.document_loaders import TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+from experts.ingestion_agent import IngestionAgent
+
+# exit() 
+
+CHROMA_PATH = "vector_vault"
+DATA_DIR = "/home/cosmic/DnD"
+
+def load_documents():
+    """
+    Recursively walk through DATA_DIR and load all .md files as plain text.
+    Each document gets metadata including source filename and full path.
+    Ideal for RAG embedding pipelines.
+    """
+    docs = []
+    
+    # Define loader mapping
+    loaders = {
+        ".md": TextLoader,
+    }
+
+    data_path = Path(DATA_DIR)  # Ensure DATA_DIR is defined elsewhere as a string or Path
+
+    if not data_path.exists() or not data_path.is_dir():
+        print(f"⚠️ Data directory '{DATA_DIR}' does not exist or is not a directory.")
+        return docs
+
+    # Walk recursively through all files
+    for file_path in data_path.rglob("*"):
+        if file_path.is_file() and file_path.suffix.lower() == ".md":
+            try:
+                loader = loaders[file_path.suffix](file_path)
+                loaded_docs = loader.load()
+
+                # Add metadata to each document
+                for doc in loaded_docs:
+                    doc.metadata["source"] = file_path.name      # e.g., "document.md"
+                    doc.metadata["full_path"] = str(file_path)   # e.g., "/data/docs/document.md"
+
+                docs.extend(loaded_docs)
+                print(f"✅ Loaded: {file_path}")  # Remove this line if you want it silent
+
+            except Exception as e:
+                print(f"❌ Failed to load {file_path}: {e}")
+
+    print(f"📊 Total documents loaded: {len(docs)}")
+    return docs
+
+
+def chunk_documents(docs):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=800,
+        chunk_overlap=100,
+        separators=["\n\n", "\n", ". ", " ", ""]
+    )
+    return text_splitter.split_documents(docs)
+
+
+
+
+
+
+def enrich_chunks(chunks: List) -> List:
+    enriched = []
+    # Define your base model name — the same for all 10 slots
+    MODEL_BASE = "lm_studio/qwen/qwen3-8b"
+    API_BASE = "http://192.168.0.49:1234/v1/"
+    dspy.configure(lm=dspy.LM("lm_studio/qwen/qwen3-8b", api_base="http://192.168.0.49:1234/v1/"))
+
+    def process_single_chunk(args):
+        i, chunk = args
+        lm_index = i % 8
+        print(f"Processing chunk {i+1}/{len(chunks)} | using model {lm_index}")
+
+        try:
+            with dspy.context(lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base = API_BASE)):
+                response = IngestionAgent().ingest(note=chunk)  # ← Uses thread's selected LM!
+            
+            answer = response.answer
+            start = answer.find('{')
+            end = answer.rfind('}') + 1
+            json_str = answer[start:end]
+            metadata = json.loads(json_str)
+
+        except Exception as e:
+            print(f"⚠️ Failed to parse JSON for chunk {i}: {e}")
+            metadata = {"synopsis": "Summary failed", "tags": ["error"]}
+
+        # Update the chunk's metadata
+        chunk.metadata.update(metadata)
+        return chunk
+
+    # Run 10 parallel workers — each will pick a different model slot
+    with ThreadPoolExecutor(max_workers=8) as executor:
+        futures = [executor.submit(process_single_chunk, (i, chunk)) for i, chunk in enumerate(chunks)]
+
+        for future in tqdm(as_completed(futures), total=len(chunks), desc="Enriching chunks"):
+            enriched.append(future.result())
+
+    # Restore original order
+    enriched.sort(key=lambda x: chunks.index(x))
+
+    return enriched
+
+class PrecomputedEmbeddings(Embeddings):
+    def __init__(self, embeddings: List[List[float]]):
+        self.embeddings = embeddings  # Store all precomputed vectors
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        return self.embeddings  # Return the precomputed ones (order must match!)
+    
+    def embed_query(self, text):
+        return self.embeddings[0]
+
+def embedder(texts: List[str]) -> List[List[float]]:
+    embeddings = []
+    base_url = "http://192.168.0.49:1234"  # ✅ Add 'http://'
+    embed_url = f"{base_url}/v1/embeddings"
+    headers = {"Content-Type": "application/json"}
+
+    for text in texts:
+        payload = {
+            "model": "text-embedding-qwen3-embedding-8b",
+            "input": text
+        }
+
+        try:
+            response = requests.post(embed_url, json=payload, headers=headers)  # ✅ POST not GET
+            if response.status_code == 200:
+                data = response.json()  # ✅ Parse JSON!
+                embedding = data["data"][0]["embedding"]  # ✅ Extract the actual vector
+                embeddings.append(embedding)
+            else:
+                print(f"❌ Embedding failed for '{text[:30]}...': {response.status_code} - {response.text}")
+                # Optionally: insert placeholder zeros if you need to continue
+                # embeddings.append([0.0] * 768)  # ← adjust dimension as needed!
+        except Exception as e:
+            print(f"⚠️ Exception embedding '{text[:30]}...': {e}")
+            # embeddings.append([0.0] * 768)  # fallback
+
+    return embeddings
+
+def store_chunks_with_embeddings_locally(chunks, db_path="./local_faiss_db"):
+    """
+    Stores pre-computed chunks and their embeddings into a local FAISS database.
+    
+    Args:
+        chunks: list of LangChain Document objects (with page_content and metadata)
+        embeddings: list of embedding vectors (list of lists of floats) — must match length of chunks
+        db_path: where to save the FAISS index files locally
+    """
+
+    texts = [chunk.page_content for chunk in chunks]
+    embeddings = embedder(texts)
+    if len(chunks) != len(embeddings):
+        raise ValueError(f"Mismatch! Got {len(chunks)} chunks but {len(embeddings)} embeddings.")
+
+    # Create LangChain Document list (we already have this)
+    documents = chunks  # assuming they're already Document objects
+
+    # Build FAISS vectorstore using precomputed embeddings
+    # FAISS.from_embeddings() lets us pass our own embeddings + texts
+    vectorstore = FAISS.from_embeddings(
+        text_embeddings=list(zip([doc.page_content for doc in documents], embeddings)),
+        embedding=PrecomputedEmbeddings(embeddings[0])  # We’ll define this next
+    )
+
+    # Save to disk
+    vectorstore.save_local(db_path)
+    print(f"✅ Successfully stored {len(chunks)} chunks + embeddings into local FAISS DB at '{db_path}'")
+
+# # Store in Turso
+# def store_in_turso(chunks):
+#     ## needs refactor, not using chroma
+#     client = turso.PersistentClient(path=CHROMA_PATH)
+#     collection = client.get_or_create_collection("documents")
+
+#     ids = [f"doc_{i}" for i in range(len(chunks))]
+#     metadatas = [chunk.metadata for chunk in chunks]
+#     embeddings = embedder(texts)
+
+#     collection.add(
+#         ids=ids,
+#         documents=texts,
+#         embeddings=embeddings,
+#         metadatas=metadatas
+#     )
+#     print(f"✅ Successfully stored {len(chunks)} chunks in Chroma DB.")
+
+def main():
+    print("🔍 Loading documents...")
+    docs = load_documents()
+    if not docs:
+        print("⚠️ No files found in 'documents/'. Add some PDFs, TXT, or DOCX.")
+        return
+
+    print(f"📄 Loaded {len(docs)} documents. Splitting into chunks...")
+    chunks = chunk_documents(docs)
+    print(f"🧩 Created {len(chunks)} chunks.")
+
+    print("🧠 Generating summaries and tags using local LLM... (this may take a few minutes)")
+    enriched_chunks = enrich_chunks(chunks)
+
+    print("💾 Storing in vector database...")
+    store_chunks_with_embeddings_locally(enriched_chunks)
+
+    print("🎉 Ingestion complete!")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,98 @@
+import streamlit as st
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain_community.llms import Ollama
+from langchain_core.prompts import PromptTemplate
+import chromadb
+
+# CONFIG
+BASE_IP = "192.168.0.49"
+LM_STUDIO_PORT = "1234"
+CHROMA_PATH = "vector_db"
+MODEL_NAME = "lmstudio-community/qwen/qwen3-next-80b-a3b-instruct-q8_0.gguf"  # Use "llama3", "phi3", etc.
+EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+
+# Load embedding model
+embedder = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+
+# Load local LLM for answering
+llm = Ollama(model=MODEL_NAME, temperature=0.3)
+
+# Initialize Chroma client
+client = chromadb.PersistentClient(path=CHROMA_PATH)
+collection = client.get_collection("documents")
+
+# Prompt template
+prompt_template = """
+You are a helpful assistant that answers questions using ONLY the context provided.
+Do not make up information or use external knowledge.
+
+Question: {question}
+
+Context:
+{context}
+
+If you cannot find an answer, say "I don't know based on the provided documents."
+
+Answer:
+"""
+
+prompt = PromptTemplate.from_template(prompt_template)
+
+# Streamlit UI
+st.title("📄 Local RAG Knowledge Assistant")
+st.write("Upload files to `documents/` and run `ingest.py` first.")
+
+query = st.text_input("Ask a question about your documents:",
+                      placeholder="What are the key financial metrics?")
+
+if query:
+    with st.spinner("Searching for relevant info..."):
+        # Embed query
+        query_embedding = embedder.embed_query(query)
+
+        # Retrieve top 5 most similar chunks
+        results = collection.query(
+            query_embeddings=[query_embedding],
+            n_results=5,
+            include=["documents", "metadatas"]
+        )
+
+        documents = results["documents"][0]
+        metadatas = results["metadatas"][0]
+
+        # Build context from retrieved chunks + metadata
+        context = ""
+        for i, doc in enumerate(documents):
+            meta = metadatas[i]
+            synopsis = meta.get("synopsis", "No summary")
+            tags = ", ".join(meta.get("tags", [])) if isinstance(
+                meta.get("tags"), list) else str(meta.get("tags"))
+            source = meta.get("source", "Unknown")
+
+            context += f"""
+--- Document Snippet ---
+{doc}
+
+Synopsis: {synopsis}
+Tags: {tags}
+Source: {source}
+---
+"""
+
+        # Ask LLM
+        full_prompt = prompt.format(question=query, context=context)
+
+    with st.spinner("Generating answer..."):
+        response = llm.invoke(full_prompt)
+
+    st.subheader("🔍 Answer:")
+    st.write(response)
+
+    st.subheader("📚 Sources (retrieved chunks):")
+    for i, doc in enumerate(documents):
+        meta = metadatas[i]
+        source = meta.get("source", "Unknown")
+        tags = ", ".join(meta.get("tags", [])) if isinstance(
+            meta.get("tags"), list) else str(meta.get("tags"))
+        st.markdown(f"**Source**: `{source}` | **Tags**: {tags}")
+        st.text_area(f"Snippet {i+1}", doc, height=120, disabled=True)