From 8ca23187e35b316e1896f21ae78511c5a8203171 Mon Sep 17 00:00:00 2001 From: Jake Pullen Date: Wed, 28 Jan 2026 12:36:47 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=94=97=20Tidy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config.yaml | 4 +++- src/experts/dnd_agent.py | 16 +++++++++++++--- src/ingest.py | 26 ++++++++++++++++---------- src/retrieve.py | 10 +++++++++- 4 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/config.yaml b/src/config.yaml index d3da9ff..8ee22c9 100644 --- a/src/config.yaml +++ b/src/config.yaml @@ -5,8 +5,9 @@ api: # --- Model Settings --- models: - inference: "lm_studio/qwen/qwen3-8b" + enrich: "lm_studio/qwen/qwen3-8b" embedding: "text-embedding-qwen3-embedding-8b" + retrieval: "lm_studio/qwen/qwen3-next-80b" # --- Ingestion Settings --- ingestion: @@ -15,6 +16,7 @@ ingestion: max_workers: 8 chunk_size: 800 chunk_overlap: 100 + embedding_batch_size: 32 # --- Retrieval Settings --- retrieval: diff --git a/src/experts/dnd_agent.py b/src/experts/dnd_agent.py index f6af137..d246020 100644 --- a/src/experts/dnd_agent.py +++ b/src/experts/dnd_agent.py @@ -3,6 +3,16 @@ from langchain_community.vectorstores import FAISS from embedding import LocalLMEmbeddings from pathlib import Path +from config_loader import load_config + + +CFG = load_config() + +DATABASE_PATH = CFG["ingestion"]["db_path"] +EMBEDDING_MODEL = CFG["models"]["embedding"] +API_BASE = CFG["api"]["base_url"] + + # --- DSPy Signature --- class DnDContextQA(dspy.Signature): """Answer DnD campaign questions using provided snippets and full file context. @@ -13,12 +23,12 @@ class DnDContextQA(dspy.Signature): # --- DSPy Module --- class DnDRAG(dspy.Module): - def __init__(self, db_path="./local_faiss_db", k=3): + def __init__(self, db_path=DATABASE_PATH, k=3): super().__init__() # 1. Setup Embeddings & Load FAISS self.embeddings = LocalLMEmbeddings( - model="text-embedding-qwen3-embedding-8b", - base_url="http://192.168.0.49:1234" + model=EMBEDDING_MODEL, + base_url=API_BASE ) self.vectorstore = FAISS.load_local( db_path, self.embeddings, allow_dangerous_deserialization=True diff --git a/src/ingest.py b/src/ingest.py index cb2733e..b6c1494 100644 --- a/src/ingest.py +++ b/src/ingest.py @@ -15,6 +15,15 @@ from config_loader import load_config CFG = load_config() DATA_DIR = CFG["ingestion"]["data_dir"] +DATABASE_PATH = CFG["ingestion"]["db_path"] +MODEL_BASE = CFG["models"]["enrich"] +EMBEDDING_MODEL = CFG["models"]["embedding"] +API_BASE = CFG["api"]["base_url"] +API_VERSION = CFG["api"]["api_version"] +MAX_WORKERS=CFG["ingestion"]["max_workers"] +CHUNK_SIZE=CFG["ingestion"]["chunk_size"], +CHUNK_OVERLAP=CFG["ingestion"]["chunk_overlap"] +EMBEDDING_BATCH_SIZE=CFG["ingestion"]["embedding_batch_size"] def load_documents(): docs = [] @@ -44,16 +53,13 @@ def load_documents(): def chunk_documents(docs): # LangChain preserves metadata during splitting automatically text_splitter = RecursiveCharacterTextSplitter( - chunk_size=CFG["ingestion"]["chunk_size"], - chunk_overlap=CFG["ingestion"]["chunk_overlap"], + chunk_size=CHUNK_SIZE, + chunk_overlap=CHUNK_OVERLAP, separators=["\n\n", "\n", ". ", " ", ""] ) return text_splitter.split_documents(docs) def enrich_chunks(chunks: list) -> list: - MODEL_BASE = CFG["models"]["inference"] - API_BASE = CFG["api"]["base_url"] - API_VERSION = CFG["api"]["api_version"] def process_single_chunk(indexed_chunk): idx, chunk = indexed_chunk @@ -75,7 +81,7 @@ def enrich_chunks(chunks: list) -> list: enriched_results = [] - with ThreadPoolExecutor(max_workers=CFG["ingestion"]["max_workers"]) as executor: + with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: # Wrap chunks in enumerate to keep track of order futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)] @@ -86,11 +92,11 @@ def enrich_chunks(chunks: list) -> list: enriched_results.sort(key=lambda x: x[0]) return [item[1] for item in enriched_results] -def store_chunks_locally(chunks, db_path="./local_faiss_db"): +def store_chunks_locally(chunks, db_path=DATABASE_PATH): embeddings_model = LocalLMEmbeddings( - model="text-embedding-qwen3-embedding-8b", - base_url="http://192.168.0.49:1234", - batch_size=32, + model=EMBEDDING_MODEL, + base_url=API_BASE, + batch_size=EMBEDDING_BATCH_SIZE, ) print(f"Index creation started for {len(chunks)} chunks...") diff --git a/src/retrieve.py b/src/retrieve.py index b5dda6a..bfaed72 100644 --- a/src/retrieve.py +++ b/src/retrieve.py @@ -1,11 +1,19 @@ import sys import dspy from experts.dnd_agent import DnDRAG +from config_loader import load_config + + +CFG = load_config() +RETRIEVE_MODEL = CFG["models"]["retrieval"] +API_BASE = CFG["api"]["base_url"] +API_VERSION = CFG["api"]["api_version"] + def main(): # 1. Setup the LLM print("🚀 Initializing Qwen-8B via LM Studio...") - lm = dspy.LM("lm_studio/qwen/qwen3-8b", api_base="http://192.168.0.49:1234/v1/") + lm = dspy.LM(RETRIEVE_MODEL, api_base=API_BASE+API_VERSION) dspy.configure(lm=lm) # 2. Load the RAG System (only happens once!)