just sync

This commit is contained in:
2026-02-08 15:09:39 +00:00
parent a1612864e2
commit 8d0a74e865
4 changed files with 48 additions and 71 deletions
-4
View File
@@ -8,7 +8,3 @@ Is RAG still the "thing"? - What is the cutting edge
Too little context and the llm doesnt have enough info to give an accurate answer Too little context and the llm doesnt have enough info to give an accurate answer
Too much conflicting context (poison) Too much conflicting context (poison)
too much context (confusion) too much context (confusion)
Turso - better? vector store, but sqlite so data <3
How can we RaG in AW?
+2
View File
@@ -4,3 +4,5 @@ git-sync:
git add . git add .
git commit -m 'just sync' git commit -m 'just sync'
git push git push
sync-git: git-sync
+44 -64
View File
@@ -1,7 +1,7 @@
from pathlib import Path # from pathlib import Path
import turso
import dspy import dspy
from langchain_community.vectorstores import FAISS # from langchain_community.vectorstores import FAISS
from config_loader import load_config from config_loader import load_config
from embedding import LocalLMEmbeddings from embedding import LocalLMEmbeddings
@@ -12,93 +12,73 @@ DATABASE_PATH = CFG["ingestion"]["db_path"]
EMBEDDING_MODEL = CFG["models"]["embedding"] EMBEDDING_MODEL = CFG["models"]["embedding"]
API_BASE = CFG["api"]["base_url"] API_BASE = CFG["api"]["base_url"]
import turso
# Inside your retrieval logic: # Inside your retrieval logic:
def retrieve_from_turso(question, k=5): def retrieve_from_turso(embedded_question, k=5):
# Example query: search for relevant notes using full-text search or embedding similarity # Example query: search for relevant notes using full-text search or embedding similarity
# Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available # Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available
query = f""" query = f"""
SELECT source, synopsis, tags, entities, content, embedding SELECT file_path, synopsis, tags, entities, chunk_data,
vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance
FROM notes FROM notes
WHERE content LIKE ? OR synopsis LIKE ? ORDER BY distance ASC
ORDER BY (similarity(embedding, ?)) DESC LIMIT {k};
LIMIT {k}
""" """
# You'll need to generate or store embeddings in the DB or use a function to compute similarity con = turso.connect(DATABASE_PATH)
# If embeddings are stored, you can query them directly cur = con.cursor()
# Otherwise, you'll need to compute embeddings in Python and compare cur.execute(query)
results = turso.execute(query, (f"%{question}%", f"%{question}%", question)) rows = cur.fetchall()
return results return rows
# --- DSPy Signature --- # --- DSPy Signature ---
class DnDContextQA(dspy.Signature): class DnDContextQA(dspy.Signature):
"""Answer DnD campaign questions using provided snippets and full file context. """Answer DnD campaign questions using provided details.
/no_think
""" """
context = dspy.InputField( context = dspy.InputField(
desc="Relevant chunks and full file contents from the campaign notes." desc="Relevant chunks and metadata from the campaign notes."
) )
question = dspy.InputField() question = dspy.InputField()
answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.") answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
# --- DSPy Module ---
class DnDRAG(dspy.Module): class DnDRAG(dspy.Module):
def __init__(self, db_path=DATABASE_PATH, k=3): def __init__(self):
super().__init__() super().__init__()
# 1. Setup Embeddings & Load FAISS self.embeddings_model = LocalLMEmbeddings(
self.embeddings = LocalLMEmbeddings(model=EMBEDDING_MODEL, base_url=API_BASE) model=EMBEDDING_MODEL,
self.vectorstore = FAISS.load_local( base_url=API_BASE,
db_path, self.embeddings, allow_dangerous_deserialization=True batch_size=1, # we only send 1 question at a time.
) )
self.k = k
# 2. Setup the Predictor (Chain of Thought for better reasoning)
self.generate_answer = dspy.ChainOfThought(DnDContextQA) self.generate_answer = dspy.ChainOfThought(DnDContextQA)
def get_full_file_content(self, file_path):
"""Helper to read the full source file if it exists."""
try:
return Path(file_path).read_text(encoding="utf-8")
except Exception:
return ""
def forward(self, question): def forward(self, question):
# 1. Search for top-k chunks # Use Turso to retrieve relevant notes
results = self.vectorstore.similarity_search(question, k=self.k) embedded_question = self.embeddings_model._post_request(question)
results = retrieve_from_turso(embedded_question, k=5) # k is limit to return
# 2. Extract unique file paths to load "Full Context"
# This prevents the LLM from being 'blind' to the rest of a relevant session note
unique_paths = list(set([doc.metadata.get("full_path") for doc in results]))
# Format context as before
context_parts = [] context_parts = []
for i, doc in enumerate(results): for i, row in enumerate(results):
source = doc.metadata.get("source", "Unknown") source = row[0] # file_path
synopsis = doc.metadata.get("synopsis", "None") synopsis = row[1] # synopsis
tags = doc.metadata.get("tags", "None") tags = row[2] # tags
entities = doc.metadata.get("entities", "None") entities = row[3] # entities
content = row[4] # chunk_data
context_parts.append(f""" context_parts.append(f"""
--- Chunk {i+1} from {source} --- --- Chunk {i+1} from {source} ---
synpsis: {synopsis}, synopsis: {synopsis},
tags: {tags}, tags: {tags},
entities: {entities} entities: {entities}
{doc.page_content} {content}
""") """)
#print(context_parts)
print('Closest embedding hits')
for part in context_parts:
print(part)
context = "\n\n".join(context_parts)
# 3. Add the Full Content of the top match (optional, but requested!) prediction = self.generate_answer(context=context, question=question)
# We'll just take the top 1 file to avoid context window explosion return dspy.Prediction(answer=prediction.answer, context=context)
if unique_paths:
top_file_content = self.get_full_file_content(unique_paths[0])
context_parts.append(
f"\n=== FULL SOURCE FILE: {Path(unique_paths[0]).name} ===\n{top_file_content[:10000]}"
)
# 4. Join everything into one context string
context_str = "\n\n".join(context_parts)
# 5. Generate Response
prediction = self.generate_answer(context=context_str, question=question)
return dspy.Prediction(answer=prediction.answer, context=context_str)
+2 -3
View File
@@ -1,6 +1,6 @@
import sys import sys
import dspy import dspy
# import turso
from config_loader import load_config from config_loader import load_config
from experts.dnd_agent import DnDRAG from experts.dnd_agent import DnDRAG
@@ -10,7 +10,6 @@ RETRIEVE_MODEL = CFG["models"]["retrieval"]
API_BASE = CFG["api"]["base_url"] API_BASE = CFG["api"]["base_url"]
API_VERSION = CFG["api"]["api_version"] API_VERSION = CFG["api"]["api_version"]
def main(): def main():
# 1. Setup the LLM # 1. Setup the LLM
print("🚀 Initializing Qwen-8B via LM Studio...") print("🚀 Initializing Qwen-8B via LM Studio...")
@@ -18,7 +17,7 @@ def main():
dspy.configure(lm=lm) dspy.configure(lm=lm)
# 2. Load the RAG System (only happens once!) # 2. Load the RAG System (only happens once!)
print("📚 Loading FAISS index and campaign notes...") print("📚 Loading campaign notes...")
try: try:
rag_system = DnDRAG() rag_system = DnDRAG()
print("✅ Ready! Ask me anything about the campaign. (Type 'exit' or 'q' to quit)") print("✅ Ready! Ask me anything about the campaign. (Type 'exit' or 'q' to quit)")