chore: 🧹 removing clutter
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
# --- Connection Settings ---
|
||||
api:
|
||||
base_url: "http://192.168.0.49:1234"
|
||||
api_version: "/v1/"
|
||||
|
||||
# --- Model Settings ---
|
||||
models:
|
||||
inference: "lm_studio/qwen/qwen3-8b"
|
||||
embedding: "text-embedding-qwen3-embedding-8b"
|
||||
|
||||
# --- Ingestion Settings ---
|
||||
ingestion:
|
||||
data_dir: "/home/cosmic/DnD"
|
||||
db_path: "./local_faiss_db"
|
||||
max_workers: 8
|
||||
chunk_size: 800
|
||||
chunk_overlap: 100
|
||||
|
||||
# --- Retrieval Settings ---
|
||||
retrieval:
|
||||
top_k: 4
|
||||
context_limit: 10000 # Max characters from full file context
|
||||
@@ -0,0 +1,10 @@
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
def load_config(config_path="src/config.yaml"):
|
||||
with open(config_path, "r") as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
# Usage example:
|
||||
# CFG = load_config()
|
||||
# print(CFG['api']['base_url'])
|
||||
@@ -1,50 +0,0 @@
|
||||
"""Model Factory for creating language model instances.
|
||||
|
||||
Separates model creation logic from configuration.
|
||||
"""
|
||||
|
||||
import dspy
|
||||
from config import Config
|
||||
|
||||
|
||||
class ModelFactory:
|
||||
"""Factory class for creating language model instances based on configuration."""
|
||||
|
||||
@staticmethod
|
||||
def create_dspy_model(agent_type: str, agent_name: str = None) -> dspy.LM:
|
||||
"""Create a dspy.LM object for a specific agent with conditional parameters.
|
||||
|
||||
Only includes api_base and api_key if they are configured.
|
||||
|
||||
Args:
|
||||
agent_type (str): 'orchestrator' or 'expert'
|
||||
agent_name (str): For experts, specific agent name like 'weather', 'games'
|
||||
|
||||
Returns:
|
||||
dspy.LM: Configured language model object
|
||||
|
||||
"""
|
||||
config = Config.Model.get_agent_config(agent_type, agent_name)
|
||||
|
||||
# Build dspy.LM parameters conditionally
|
||||
lm_params = {"model": f"{config['provider']}/{config['model_name']}"}
|
||||
|
||||
# Only add api_base if it's configured (not None)
|
||||
if config.get("api_base"):
|
||||
lm_params["api_base"] = config["api_base"]
|
||||
|
||||
# Only add api_key if it's configured (not None)
|
||||
if config.get("api_key"):
|
||||
lm_params["api_key"] = config["api_key"]
|
||||
|
||||
return dspy.LM(**lm_params)
|
||||
|
||||
@staticmethod
|
||||
def create_orchestrator_model() -> dspy.LM:
|
||||
"""Create orchestrator model."""
|
||||
return ModelFactory.create_dspy_model("orchestrator")
|
||||
|
||||
@staticmethod
|
||||
def create_weather_model() -> dspy.LM:
|
||||
"""Create weather expert model."""
|
||||
return ModelFactory.create_dspy_model("expert", "ingest")
|
||||
@@ -0,0 +1,62 @@
|
||||
import dspy
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from embedding import LocalLMEmbeddings
|
||||
from pathlib import Path
|
||||
|
||||
# --- DSPy Signature ---
|
||||
class DnDContextQA(dspy.Signature):
|
||||
"""Answer DnD campaign questions using provided snippets and full file context.
|
||||
/no_think"""
|
||||
context = dspy.InputField(desc="Relevant chunks and full file contents from the campaign notes.")
|
||||
question = dspy.InputField()
|
||||
answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
|
||||
|
||||
# --- DSPy Module ---
|
||||
class DnDRAG(dspy.Module):
|
||||
def __init__(self, db_path="./local_faiss_db", k=3):
|
||||
super().__init__()
|
||||
# 1. Setup Embeddings & Load FAISS
|
||||
self.embeddings = LocalLMEmbeddings(
|
||||
model="text-embedding-qwen3-embedding-8b",
|
||||
base_url="http://192.168.0.49:1234"
|
||||
)
|
||||
self.vectorstore = FAISS.load_local(
|
||||
db_path, self.embeddings, allow_dangerous_deserialization=True
|
||||
)
|
||||
self.k = k
|
||||
|
||||
# 2. Setup the Predictor (Chain of Thought for better reasoning)
|
||||
self.generate_answer = dspy.ChainOfThought(DnDContextQA)
|
||||
|
||||
def get_full_file_content(self, file_path):
|
||||
"""Helper to read the full source file if it exists."""
|
||||
try:
|
||||
return Path(file_path).read_text(encoding='utf-8')
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def forward(self, question):
|
||||
# 1. Search for top-k chunks
|
||||
results = self.vectorstore.similarity_search(question, k=self.k)
|
||||
|
||||
# 2. Extract unique file paths to load "Full Context"
|
||||
# This prevents the LLM from being 'blind' to the rest of a relevant session note
|
||||
unique_paths = list(set([doc.metadata.get("full_path") for doc in results]))
|
||||
|
||||
context_parts = []
|
||||
for i, doc in enumerate(results):
|
||||
source = doc.metadata.get("source", "Unknown")
|
||||
context_parts.append(f"--- Chunk {i+1} from {source} ---\n{doc.page_content}")
|
||||
|
||||
# 3. Add the Full Content of the top match (optional, but requested!)
|
||||
# We'll just take the top 1 file to avoid context window explosion
|
||||
if unique_paths:
|
||||
top_file_content = self.get_full_file_content(unique_paths[0])
|
||||
context_parts.append(f"\n=== FULL SOURCE FILE: {Path(unique_paths[0]).name} ===\n{top_file_content[:10000]}")
|
||||
|
||||
# 4. Join everything into one context string
|
||||
context_str = "\n\n".join(context_parts)
|
||||
|
||||
# 5. Generate Response
|
||||
prediction = self.generate_answer(context=context_str, question=question)
|
||||
return dspy.Prediction(answer=prediction.answer, context=context_str)
|
||||
@@ -1,22 +1,31 @@
|
||||
import dspy
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
|
||||
# 1. Define the structure of your metadata
|
||||
class DocMetadata(BaseModel):
|
||||
synopsis: str = Field(description="A one-sentence summary of the document.")
|
||||
tags: List[str] = Field(description="Relevant tags (NPCs, Locations, Items, Plot Points).")
|
||||
entities: List[str] = Field(description="Key names of people, places, or factions.")
|
||||
|
||||
class ingestionSignature(dspy.Signature):
|
||||
"""You are going to be given dungeon masters notes, on session plans, recaps, npcs, players.
|
||||
You must summarize these document in one sentence
|
||||
and extract as many relevant tags aspossible as a JSON list:
|
||||
{{'synopsis': '...', 'tags': [...]}}\n\nDocument:\n{content}"
|
||||
/no_think
|
||||
class IngestionSignature(dspy.Signature):
|
||||
"""
|
||||
|
||||
note: str = dspy.InputField()
|
||||
answer: str = dspy.OutputField()
|
||||
|
||||
You are an expert Dungeon Master's assistant.
|
||||
Analyze the provided notes and extract a concise synopsis and relevant metadata.
|
||||
"""
|
||||
note: str = dspy.InputField(desc="The DM notes or session recap content.")
|
||||
# By using the Pydantic model as the type, DSPy handles the JSON formatting for you
|
||||
answer: DocMetadata = dspy.OutputField()
|
||||
|
||||
class IngestionAgent(dspy.Module):
|
||||
"""The Ingestion Agent is responsible for Document tagging and summarising."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the Oracle with available expert tools."""
|
||||
# self.tools = []
|
||||
self.ingest = dspy.Predict(signature=ingestionSignature)
|
||||
super().__init__()
|
||||
# We use TypedPredictor to enforce the Pydantic schema
|
||||
# We use ChainOfThought because it helps 8B models "reason" through the tags
|
||||
# before committing to the final JSON structure.
|
||||
self.process = dspy.TypedPredictor(IngestionSignature)
|
||||
|
||||
def forward(self, note: str):
|
||||
# The .answer will now be a DocMetadata object, not a string!
|
||||
prediction = self.process(note=note)
|
||||
return prediction
|
||||
@@ -1,33 +0,0 @@
|
||||
import dspy
|
||||
|
||||
from core import ModelFactory
|
||||
|
||||
from .file import FileAgent
|
||||
|
||||
|
||||
class OrchestratorSignature(dspy.Signature):
|
||||
""" """
|
||||
|
||||
question: str = dspy.InputField()
|
||||
history: dspy.History = dspy.InputField()
|
||||
answer: str = dspy.OutputField()
|
||||
|
||||
|
||||
class TheOracle(dspy.Module):
|
||||
"""The Oracle is the orchestrator of all the agents."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the Oracle with available expert tools."""
|
||||
self.tools = [
|
||||
self.consult_file_expert,
|
||||
]
|
||||
self.oracle = dspy.ReAct(signature=OrchestratorSignature, tools=self.tools, max_iters=10)
|
||||
|
||||
def consult_file_expert(self, command: str) -> str:
|
||||
"""Use this expert when you want to save or retrieve information from files.
|
||||
|
||||
Also used to find files and update files
|
||||
"""
|
||||
with dspy.context(lm=ModelFactory.create_file_model()):
|
||||
result = FileAgent().file_agent(command=command)
|
||||
return result.answer
|
||||
+19
-25
@@ -10,8 +10,11 @@ from tqdm import tqdm
|
||||
|
||||
from embedding import LocalLMEmbeddings
|
||||
from experts.ingestion_agent import IngestionAgent
|
||||
from config_loader import load_config
|
||||
|
||||
DATA_DIR = "/home/cosmic/DnD"
|
||||
|
||||
CFG = load_config()
|
||||
DATA_DIR = CFG["ingestion"]["data_dir"]
|
||||
|
||||
def load_documents():
|
||||
docs = []
|
||||
@@ -41,47 +44,38 @@ def load_documents():
|
||||
def chunk_documents(docs):
|
||||
# LangChain preserves metadata during splitting automatically
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=800,
|
||||
chunk_overlap=100,
|
||||
chunk_size=CFG["ingestion"]["chunk_size"],
|
||||
chunk_overlap=CFG["ingestion"]["chunk_overlap"],
|
||||
separators=["\n\n", "\n", ". ", " ", ""]
|
||||
)
|
||||
return text_splitter.split_documents(docs)
|
||||
|
||||
def enrich_chunks(chunks: list) -> list:
|
||||
MODEL_BASE = "lm_studio/qwen/qwen3-8b"
|
||||
API_BASE = "http://192.168.0.49:1234/v1/"
|
||||
MODEL_BASE = CFG["models"]["inference"]
|
||||
API_BASE = CFG["api"]["base_url"]
|
||||
API_VERSION = CFG["api"]["api_version"]
|
||||
|
||||
def process_single_chunk(indexed_chunk):
|
||||
idx, chunk = indexed_chunk
|
||||
lm_index = idx % 8
|
||||
|
||||
try:
|
||||
# Configure context for this specific thread
|
||||
with dspy.context(lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE)):
|
||||
# Pass the text, but we will update the original chunk object
|
||||
response = IngestionAgent().ingest(note=chunk.page_content)
|
||||
with dspy.context(lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE+API_VERSION)):
|
||||
response = IngestionAgent().forward(note=chunk.page_content)
|
||||
|
||||
answer = response.answer
|
||||
start = answer.find("{")
|
||||
end = answer.rfind("}") + 1
|
||||
metadata_extracted = json.loads(answer[start:end])
|
||||
|
||||
# UPDATE: Put AI data in a sub-key to avoid overwriting 'source'
|
||||
chunk.metadata["enrichment"] = metadata_extracted
|
||||
# Also flatten tags for easier searching if needed
|
||||
if "tags" in metadata_extracted:
|
||||
chunk.metadata["tags"] = metadata_extracted["tags"]
|
||||
# This is now an object, not a string!
|
||||
metadata = response.answer.dict()
|
||||
|
||||
except Exception as e:
|
||||
# If enrichment fails, we KEEP the chunk but flag the error
|
||||
# This ensures 'source' and 'full_path' are NEVER lost
|
||||
chunk.metadata["enrichment_error"] = str(e)
|
||||
chunk.metadata["tags"] = ["error"]
|
||||
print(f"⚠️ Failed for chunk {idx}: {e}")
|
||||
metadata = {"synopsis": "Summary failed", "tags": ["error"], "entities": []}
|
||||
|
||||
return idx, chunk
|
||||
chunk.metadata.update(metadata)
|
||||
return chunk
|
||||
|
||||
|
||||
enriched_results = []
|
||||
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
with ThreadPoolExecutor(max_workers=CFG["ingestion"]["max_workers"]) as executor:
|
||||
# Wrap chunks in enumerate to keep track of order
|
||||
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
|
||||
|
||||
|
||||
-105
@@ -1,105 +0,0 @@
|
||||
import chromadb
|
||||
import streamlit as st
|
||||
from langchain.embeddings import HuggingFaceEmbeddings
|
||||
from langchain_community.llms import Ollama
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
|
||||
# CONFIG
|
||||
BASE_IP = "192.168.0.49"
|
||||
LM_STUDIO_PORT = "1234"
|
||||
CHROMA_PATH = "vector_db"
|
||||
MODEL_NAME = (
|
||||
"lmstudio-community/qwen/qwen3-next-80b-a3b-instruct-q8_0.gguf" # Use "llama3", "phi3", etc.
|
||||
)
|
||||
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
||||
|
||||
# Load embedding model
|
||||
embedder = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
|
||||
|
||||
# Load local LLM for answering
|
||||
llm = Ollama(model=MODEL_NAME, temperature=0.3)
|
||||
|
||||
# Initialize Chroma client
|
||||
client = chromadb.PersistentClient(path=CHROMA_PATH)
|
||||
collection = client.get_collection("documents")
|
||||
|
||||
# Prompt template
|
||||
prompt_template = """
|
||||
You are a helpful assistant that answers questions using ONLY the context provided.
|
||||
Do not make up information or use external knowledge.
|
||||
|
||||
Question: {question}
|
||||
|
||||
Context:
|
||||
{context}
|
||||
|
||||
If you cannot find an answer, say "I don't know based on the provided documents."
|
||||
|
||||
Answer:
|
||||
"""
|
||||
|
||||
prompt = PromptTemplate.from_template(prompt_template)
|
||||
|
||||
# Streamlit UI
|
||||
st.title("📄 Local RAG Knowledge Assistant")
|
||||
st.write("Upload files to `documents/` and run `ingest.py` first.")
|
||||
|
||||
query = st.text_input(
|
||||
"Ask a question about your documents:", placeholder="What are the key financial metrics?"
|
||||
)
|
||||
|
||||
if query:
|
||||
with st.spinner("Searching for relevant info..."):
|
||||
# Embed query
|
||||
query_embedding = embedder.embed_query(query)
|
||||
|
||||
# Retrieve top 5 most similar chunks
|
||||
results = collection.query(
|
||||
query_embeddings=[query_embedding], n_results=5, include=["documents", "metadatas"]
|
||||
)
|
||||
|
||||
documents = results["documents"][0]
|
||||
metadatas = results["metadatas"][0]
|
||||
|
||||
# Build context from retrieved chunks + metadata
|
||||
context = ""
|
||||
for i, doc in enumerate(documents):
|
||||
meta = metadatas[i]
|
||||
synopsis = meta.get("synopsis", "No summary")
|
||||
tags = (
|
||||
", ".join(meta.get("tags", []))
|
||||
if isinstance(meta.get("tags"), list)
|
||||
else str(meta.get("tags"))
|
||||
)
|
||||
source = meta.get("source", "Unknown")
|
||||
|
||||
context += f"""
|
||||
--- Document Snippet ---
|
||||
{doc}
|
||||
|
||||
Synopsis: {synopsis}
|
||||
Tags: {tags}
|
||||
Source: {source}
|
||||
---
|
||||
"""
|
||||
|
||||
# Ask LLM
|
||||
full_prompt = prompt.format(question=query, context=context)
|
||||
|
||||
with st.spinner("Generating answer..."):
|
||||
response = llm.invoke(full_prompt)
|
||||
|
||||
st.subheader("🔍 Answer:")
|
||||
st.write(response)
|
||||
|
||||
st.subheader("📚 Sources (retrieved chunks):")
|
||||
for i, doc in enumerate(documents):
|
||||
meta = metadatas[i]
|
||||
source = meta.get("source", "Unknown")
|
||||
tags = (
|
||||
", ".join(meta.get("tags", []))
|
||||
if isinstance(meta.get("tags"), list)
|
||||
else str(meta.get("tags"))
|
||||
)
|
||||
st.markdown(f"**Source**: `{source}` | **Tags**: {tags}")
|
||||
st.text_area(f"Snippet {i + 1}", doc, height=120, disabled=True)
|
||||
+1
-62
@@ -1,67 +1,6 @@
|
||||
import sys
|
||||
import dspy
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from embedding import LocalLMEmbeddings
|
||||
from pathlib import Path
|
||||
|
||||
# --- DSPy Signature ---
|
||||
class DnDContextQA(dspy.Signature):
|
||||
"""Answer DnD campaign questions using provided snippets and full file context."""
|
||||
context = dspy.InputField(desc="Relevant chunks and full file contents from the campaign notes.")
|
||||
question = dspy.InputField()
|
||||
answer = dspy.OutputField(desc="A detailed answer based on the notes, citing the source file.")
|
||||
|
||||
# --- DSPy Module ---
|
||||
class DnDRAG(dspy.Module):
|
||||
def __init__(self, db_path="./local_faiss_db", k=3):
|
||||
super().__init__()
|
||||
# 1. Setup Embeddings & Load FAISS
|
||||
self.embeddings = LocalLMEmbeddings(
|
||||
model="text-embedding-qwen3-embedding-8b",
|
||||
base_url="http://192.168.0.49:1234"
|
||||
)
|
||||
self.vectorstore = FAISS.load_local(
|
||||
db_path, self.embeddings, allow_dangerous_deserialization=True
|
||||
)
|
||||
self.k = k
|
||||
|
||||
# 2. Setup the Predictor (Chain of Thought for better reasoning)
|
||||
self.generate_answer = dspy.ChainOfThought(DnDContextQA)
|
||||
|
||||
def get_full_file_content(self, file_path):
|
||||
"""Helper to read the full source file if it exists."""
|
||||
try:
|
||||
return Path(file_path).read_text(encoding='utf-8')
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def forward(self, question):
|
||||
# 1. Search for top-k chunks
|
||||
results = self.vectorstore.similarity_search(question, k=self.k)
|
||||
|
||||
# 2. Extract unique file paths to load "Full Context"
|
||||
# This prevents the LLM from being 'blind' to the rest of a relevant session note
|
||||
unique_paths = list(set([doc.metadata.get("full_path") for doc in results]))
|
||||
|
||||
context_parts = []
|
||||
for i, doc in enumerate(results):
|
||||
source = doc.metadata.get("source", "Unknown")
|
||||
context_parts.append(f"--- Chunk {i+1} from {source} ---\n{doc.page_content}")
|
||||
|
||||
# 3. Add the Full Content of the top match (optional, but requested!)
|
||||
# We'll just take the top 1 file to avoid context window explosion
|
||||
if unique_paths:
|
||||
top_file_content = self.get_full_file_content(unique_paths[0])
|
||||
context_parts.append(f"\n=== FULL SOURCE FILE: {Path(unique_paths[0]).name} ===\n{top_file_content[:10000]}")
|
||||
|
||||
# 4. Join everything into one context string
|
||||
context_str = "\n\n".join(context_parts)
|
||||
|
||||
# 5. Generate Response
|
||||
prediction = self.generate_answer(context=context_str, question=question)
|
||||
return dspy.Prediction(answer=prediction.answer, context=context_str)
|
||||
|
||||
|
||||
from experts.dnd_agent import DnDRAG
|
||||
|
||||
def main():
|
||||
# 1. Setup the LLM
|
||||
|
||||
-31
@@ -1,31 +0,0 @@
|
||||
from langchain_community.vectorstores import FAISS
|
||||
|
||||
from embedding import LocalLMEmbeddings
|
||||
|
||||
|
||||
def retrieve_enriched_context(query, db_path="./local_faiss_db"):
|
||||
# 1. Re-initialize the same embedding model
|
||||
embeddings_model = LocalLMEmbeddings(
|
||||
model="text-embedding-qwen3-embedding-8b", base_url="http://192.168.0.49:1234"
|
||||
)
|
||||
|
||||
# 2. Load the index from disk
|
||||
# allow_dangerous_deserialization is required because FAISS uses pickle
|
||||
vectorstore = FAISS.load_local(db_path, embeddings_model, allow_dangerous_deserialization=True)
|
||||
|
||||
# 3. Perform the search
|
||||
# k=4 means "bring back the top 4 most relevant chunks"
|
||||
results_with_scores = vectorstore.similarity_search_with_score(query, k=4)
|
||||
|
||||
return results_with_scores
|
||||
|
||||
|
||||
# --- Example Usage ---
|
||||
query = "the party get free bread but i cant remember why?"
|
||||
hits = retrieve_enriched_context(query)
|
||||
|
||||
for doc, score in hits:
|
||||
print(f"\n🎯 [Score: {score:.4f}]")
|
||||
print(f"📄 Content: {doc.page_content[:200]}...")
|
||||
print(f"🛠️ Metadata (Enrichment): {doc.metadata}")
|
||||
# print(f"doc: {doc}")
|
||||
Reference in New Issue
Block a user