just sync
This commit is contained in:
@@ -8,7 +8,3 @@ Is RAG still the "thing"? - What is the cutting edge
|
|||||||
Too little context and the llm doesnt have enough info to give an accurate answer
|
Too little context and the llm doesnt have enough info to give an accurate answer
|
||||||
Too much conflicting context (poison)
|
Too much conflicting context (poison)
|
||||||
too much context (confusion)
|
too much context (confusion)
|
||||||
|
|
||||||
Turso - better? vector store, but sqlite so data <3
|
|
||||||
|
|
||||||
How can we RaG in AW?
|
|
||||||
|
|||||||
@@ -4,3 +4,5 @@ git-sync:
|
|||||||
git add .
|
git add .
|
||||||
git commit -m 'just sync'
|
git commit -m 'just sync'
|
||||||
git push
|
git push
|
||||||
|
|
||||||
|
sync-git: git-sync
|
||||||
+41
-61
@@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
# from pathlib import Path
|
||||||
|
import turso
|
||||||
import dspy
|
import dspy
|
||||||
from langchain_community.vectorstores import FAISS
|
# from langchain_community.vectorstores import FAISS
|
||||||
|
|
||||||
from config_loader import load_config
|
from config_loader import load_config
|
||||||
from embedding import LocalLMEmbeddings
|
from embedding import LocalLMEmbeddings
|
||||||
@@ -12,93 +12,73 @@ DATABASE_PATH = CFG["ingestion"]["db_path"]
|
|||||||
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
||||||
API_BASE = CFG["api"]["base_url"]
|
API_BASE = CFG["api"]["base_url"]
|
||||||
|
|
||||||
import turso
|
|
||||||
|
|
||||||
# Inside your retrieval logic:
|
# Inside your retrieval logic:
|
||||||
def retrieve_from_turso(question, k=5):
|
def retrieve_from_turso(embedded_question, k=5):
|
||||||
# Example query: search for relevant notes using full-text search or embedding similarity
|
# Example query: search for relevant notes using full-text search or embedding similarity
|
||||||
# Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available
|
# Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available
|
||||||
query = f"""
|
query = f"""
|
||||||
SELECT source, synopsis, tags, entities, content, embedding
|
SELECT file_path, synopsis, tags, entities, chunk_data,
|
||||||
|
vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance
|
||||||
FROM notes
|
FROM notes
|
||||||
WHERE content LIKE ? OR synopsis LIKE ?
|
ORDER BY distance ASC
|
||||||
ORDER BY (similarity(embedding, ?)) DESC
|
LIMIT {k};
|
||||||
LIMIT {k}
|
|
||||||
"""
|
"""
|
||||||
# You'll need to generate or store embeddings in the DB or use a function to compute similarity
|
con = turso.connect(DATABASE_PATH)
|
||||||
# If embeddings are stored, you can query them directly
|
cur = con.cursor()
|
||||||
# Otherwise, you'll need to compute embeddings in Python and compare
|
cur.execute(query)
|
||||||
results = turso.execute(query, (f"%{question}%", f"%{question}%", question))
|
rows = cur.fetchall()
|
||||||
return results
|
return rows
|
||||||
|
|
||||||
# --- DSPy Signature ---
|
# --- DSPy Signature ---
|
||||||
class DnDContextQA(dspy.Signature):
|
class DnDContextQA(dspy.Signature):
|
||||||
"""Answer DnD campaign questions using provided snippets and full file context.
|
"""Answer DnD campaign questions using provided details.
|
||||||
/no_think
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
context = dspy.InputField(
|
context = dspy.InputField(
|
||||||
desc="Relevant chunks and full file contents from the campaign notes."
|
desc="Relevant chunks and metadata from the campaign notes."
|
||||||
)
|
)
|
||||||
question = dspy.InputField()
|
question = dspy.InputField()
|
||||||
answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
|
answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
|
||||||
|
|
||||||
|
|
||||||
# --- DSPy Module ---
|
|
||||||
class DnDRAG(dspy.Module):
|
class DnDRAG(dspy.Module):
|
||||||
def __init__(self, db_path=DATABASE_PATH, k=3):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
# 1. Setup Embeddings & Load FAISS
|
self.embeddings_model = LocalLMEmbeddings(
|
||||||
self.embeddings = LocalLMEmbeddings(model=EMBEDDING_MODEL, base_url=API_BASE)
|
model=EMBEDDING_MODEL,
|
||||||
self.vectorstore = FAISS.load_local(
|
base_url=API_BASE,
|
||||||
db_path, self.embeddings, allow_dangerous_deserialization=True
|
batch_size=1, # we only send 1 question at a time.
|
||||||
)
|
)
|
||||||
self.k = k
|
|
||||||
|
|
||||||
# 2. Setup the Predictor (Chain of Thought for better reasoning)
|
|
||||||
self.generate_answer = dspy.ChainOfThought(DnDContextQA)
|
self.generate_answer = dspy.ChainOfThought(DnDContextQA)
|
||||||
|
|
||||||
def get_full_file_content(self, file_path):
|
|
||||||
"""Helper to read the full source file if it exists."""
|
|
||||||
try:
|
|
||||||
return Path(file_path).read_text(encoding="utf-8")
|
|
||||||
except Exception:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def forward(self, question):
|
def forward(self, question):
|
||||||
# 1. Search for top-k chunks
|
# Use Turso to retrieve relevant notes
|
||||||
results = self.vectorstore.similarity_search(question, k=self.k)
|
embedded_question = self.embeddings_model._post_request(question)
|
||||||
|
results = retrieve_from_turso(embedded_question, k=5) # k is limit to return
|
||||||
# 2. Extract unique file paths to load "Full Context"
|
|
||||||
# This prevents the LLM from being 'blind' to the rest of a relevant session note
|
|
||||||
unique_paths = list(set([doc.metadata.get("full_path") for doc in results]))
|
|
||||||
|
|
||||||
|
# Format context as before
|
||||||
context_parts = []
|
context_parts = []
|
||||||
for i, doc in enumerate(results):
|
for i, row in enumerate(results):
|
||||||
source = doc.metadata.get("source", "Unknown")
|
source = row[0] # file_path
|
||||||
synopsis = doc.metadata.get("synopsis", "None")
|
synopsis = row[1] # synopsis
|
||||||
tags = doc.metadata.get("tags", "None")
|
tags = row[2] # tags
|
||||||
entities = doc.metadata.get("entities", "None")
|
entities = row[3] # entities
|
||||||
|
content = row[4] # chunk_data
|
||||||
|
|
||||||
|
|
||||||
context_parts.append(f"""
|
context_parts.append(f"""
|
||||||
--- Chunk {i+1} from {source} ---
|
--- Chunk {i+1} from {source} ---
|
||||||
synpsis: {synopsis},
|
synopsis: {synopsis},
|
||||||
tags: {tags},
|
tags: {tags},
|
||||||
entities: {entities}
|
entities: {entities}
|
||||||
{doc.page_content}
|
{content}
|
||||||
""")
|
""")
|
||||||
#print(context_parts)
|
|
||||||
|
|
||||||
# 3. Add the Full Content of the top match (optional, but requested!)
|
print('Closest embedding hits')
|
||||||
# We'll just take the top 1 file to avoid context window explosion
|
for part in context_parts:
|
||||||
if unique_paths:
|
print(part)
|
||||||
top_file_content = self.get_full_file_content(unique_paths[0])
|
|
||||||
context_parts.append(
|
|
||||||
f"\n=== FULL SOURCE FILE: {Path(unique_paths[0]).name} ===\n{top_file_content[:10000]}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# 4. Join everything into one context string
|
context = "\n\n".join(context_parts)
|
||||||
context_str = "\n\n".join(context_parts)
|
|
||||||
|
|
||||||
# 5. Generate Response
|
prediction = self.generate_answer(context=context, question=question)
|
||||||
prediction = self.generate_answer(context=context_str, question=question)
|
return dspy.Prediction(answer=prediction.answer, context=context)
|
||||||
return dspy.Prediction(answer=prediction.answer, context=context_str)
|
|
||||||
|
|||||||
+2
-3
@@ -1,6 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
import dspy
|
import dspy
|
||||||
|
# import turso
|
||||||
|
|
||||||
from config_loader import load_config
|
from config_loader import load_config
|
||||||
from experts.dnd_agent import DnDRAG
|
from experts.dnd_agent import DnDRAG
|
||||||
@@ -10,7 +10,6 @@ RETRIEVE_MODEL = CFG["models"]["retrieval"]
|
|||||||
API_BASE = CFG["api"]["base_url"]
|
API_BASE = CFG["api"]["base_url"]
|
||||||
API_VERSION = CFG["api"]["api_version"]
|
API_VERSION = CFG["api"]["api_version"]
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# 1. Setup the LLM
|
# 1. Setup the LLM
|
||||||
print("🚀 Initializing Qwen-8B via LM Studio...")
|
print("🚀 Initializing Qwen-8B via LM Studio...")
|
||||||
@@ -18,7 +17,7 @@ def main():
|
|||||||
dspy.configure(lm=lm)
|
dspy.configure(lm=lm)
|
||||||
|
|
||||||
# 2. Load the RAG System (only happens once!)
|
# 2. Load the RAG System (only happens once!)
|
||||||
print("📚 Loading FAISS index and campaign notes...")
|
print("📚 Loading campaign notes...")
|
||||||
try:
|
try:
|
||||||
rag_system = DnDRAG()
|
rag_system = DnDRAG()
|
||||||
print("✅ Ready! Ask me anything about the campaign. (Type 'exit' or 'q' to quit)")
|
print("✅ Ready! Ask me anything about the campaign. (Type 'exit' or 'q' to quit)")
|
||||||
|
|||||||
Reference in New Issue
Block a user