just sync

2026-02-08 15:09:39 +00:00
parent a1612864e2
commit 8d0a74e865
4 changed files with 48 additions and 71 deletions
@@ -8,7 +8,3 @@ Is RAG still the "thing"? - What is the cutting edge
    Too little context and the llm doesnt have enough info to give an accurate answer 
    Too much conflicting context (poison)
    too much context (confusion)
 Turso - better? vector store, but sqlite so data <3 
 How can we RaG in AW?
@@ -4,3 +4,5 @@ git-sync:
    git add .
    git commit -m 'just sync'
    git push
 sync-git: git-sync
@@ -1,7 +1,7 @@
-from pathlib import Path
+# from pathlib import Path
-
+import turso
 import dspy
-from langchain_community.vectorstores import FAISS
+#  from langchain_community.vectorstores import FAISS
 from config_loader import load_config
 from embedding import LocalLMEmbeddings
@@ -12,93 +12,73 @@ DATABASE_PATH = CFG["ingestion"]["db_path"]
 EMBEDDING_MODEL = CFG["models"]["embedding"]
 API_BASE = CFG["api"]["base_url"]
 import turso
 # Inside your retrieval logic:
-def retrieve_from_turso(question, k=5):
+def retrieve_from_turso(embedded_question, k=5):
    # Example query: search for relevant notes using full-text search or embedding similarity
    # Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available
    query = f"""
-    SELECT source, synopsis, tags, entities, content, embedding
+    SELECT file_path, synopsis, tags, entities, chunk_data,
    vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance
    FROM notes
-    WHERE content LIKE ? OR synopsis LIKE ?
+    ORDER BY distance ASC
-    ORDER BY (similarity(embedding, ?)) DESC
+    LIMIT {k};
    LIMIT {k}
    """
-    # You'll need to generate or store embeddings in the DB or use a function to compute similarity
+    con = turso.connect(DATABASE_PATH)
-    # If embeddings are stored, you can query them directly
+    cur = con.cursor()
-    # Otherwise, you'll need to compute embeddings in Python and compare
+    cur.execute(query)
-    results = turso.execute(query, (f"%{question}%", f"%{question}%", question))
+    rows = cur.fetchall()
-    return results
+    return rows
 # --- DSPy Signature ---
 class DnDContextQA(dspy.Signature):
-    """Answer DnD campaign questions using provided snippets and full file context.
+    """Answer DnD campaign questions using provided details.
    /no_think
    """
    context = dspy.InputField(
-        desc="Relevant chunks and full file contents from the campaign notes."
+        desc="Relevant chunks and metadata from the campaign notes."
    )
    question = dspy.InputField()
    answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
 # --- DSPy Module ---
 class DnDRAG(dspy.Module):
-    def __init__(self, db_path=DATABASE_PATH, k=3):
+    def __init__(self):
        super().__init__()
-        # 1. Setup Embeddings & Load FAISS
+        self.embeddings_model = LocalLMEmbeddings(
-        self.embeddings = LocalLMEmbeddings(model=EMBEDDING_MODEL, base_url=API_BASE)
+            model=EMBEDDING_MODEL,
-        self.vectorstore = FAISS.load_local(
+            base_url=API_BASE,
-            db_path, self.embeddings, allow_dangerous_deserialization=True
+            batch_size=1, # we only send 1 question at a time.
-        )
+            )
        self.k = k
        # 2. Setup the Predictor (Chain of Thought for better reasoning)
        self.generate_answer = dspy.ChainOfThought(DnDContextQA)
    def get_full_file_content(self, file_path):
        """Helper to read the full source file if it exists."""
        try:
            return Path(file_path).read_text(encoding="utf-8")
        except Exception:
            return ""
    def forward(self, question):
-        # 1. Search for top-k chunks
+        # Use Turso to retrieve relevant notes
-        results = self.vectorstore.similarity_search(question, k=self.k)
+        embedded_question = self.embeddings_model._post_request(question)
-
+        results = retrieve_from_turso(embedded_question, k=5)  # k is limit to return
        # 2. Extract unique file paths to load "Full Context"
        # This prevents the LLM from being 'blind' to the rest of a relevant session note
        unique_paths = list(set([doc.metadata.get("full_path") for doc in results]))
        # Format context as before
        context_parts = []
-        for i, doc in enumerate(results):
+        for i, row in enumerate(results):
-            source = doc.metadata.get("source", "Unknown")
+            source = row[0]  # file_path
-            synopsis = doc.metadata.get("synopsis", "None")
+            synopsis = row[1]  # synopsis
-            tags = doc.metadata.get("tags", "None")
+            tags = row[2]    # tags
-            entities = doc.metadata.get("entities", "None")
+            entities = row[3]  # entities
            content = row[4]  # chunk_data
            context_parts.append(f"""
 --- Chunk {i+1} from {source} ---
-synpsis: {synopsis}, 
+synopsis: {synopsis},
-tags: {tags}, 
+tags: {tags},
 entities: {entities}
-{doc.page_content}
+{content}
 """)
-        #print(context_parts)
+            
        print('Closest embedding hits')
        for part in context_parts:
            print(part)
        context = "\n\n".join(context_parts)
-        # 3. Add the Full Content of the top match (optional, but requested!)
+        prediction = self.generate_answer(context=context, question=question)
-        # We'll just take the top 1 file to avoid context window explosion
+        return dspy.Prediction(answer=prediction.answer, context=context)
        if unique_paths:
            top_file_content = self.get_full_file_content(unique_paths[0])
            context_parts.append(
                f"\n=== FULL SOURCE FILE: {Path(unique_paths[0]).name} ===\n{top_file_content[:10000]}"
            )
        # 4. Join everything into one context string
        context_str = "\n\n".join(context_parts)
        # 5. Generate Response
        prediction = self.generate_answer(context=context_str, question=question)
        return dspy.Prediction(answer=prediction.answer, context=context_str)
@@ -1,6 +1,6 @@
 import sys
 import dspy
 # import turso
 from config_loader import load_config
 from experts.dnd_agent import DnDRAG
@@ -10,7 +10,6 @@ RETRIEVE_MODEL = CFG["models"]["retrieval"]
 API_BASE = CFG["api"]["base_url"]
 API_VERSION = CFG["api"]["api_version"]
 def main():
    # 1. Setup the LLM
    print("🚀 Initializing Qwen-8B via LM Studio...")
@@ -18,7 +17,7 @@ def main():
    dspy.configure(lm=lm)
    # 2. Load the RAG System (only happens once!)
-    print("📚 Loading FAISS index and campaign notes...")
+    print("📚 Loading campaign notes...")
    try:
        rag_system = DnDRAG()
        print("✅ Ready! Ask me anything about the campaign. (Type 'exit' or 'q' to quit)")