feat: ✨ AI Read File Tool, Configurable system prompts and loading lots of llms
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
data/*
|
data/*
|
||||||
|
*.log
|
||||||
|
|
||||||
# Python-generated files
|
# Python-generated files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
Read File Tool for Retrieve Agent
|
---Read File Tool for Retrieve Agent---
|
||||||
|
|
||||||
Easy Config of system prompts
|
---Easy Config of system prompts---
|
||||||
examples into prompts & better prompts
|
examples into prompts & better prompts
|
||||||
|
|
||||||
|
LMS CLI script to load multiple models and to make each model accept multiple inferences
|
||||||
|
|
||||||
context engineering, - only include vector hits that are x distance?
|
context engineering, - only include vector hits that are x distance?
|
||||||
|
|
||||||
AI in the middle - make the ai generate the string for vector search
|
AI in the middle - make the ai generate the string for vector search
|
||||||
@@ -19,7 +21,3 @@ QA specific embedding models?
|
|||||||
|
|
||||||
Evaluation metrics, how good is it doing?
|
Evaluation metrics, how good is it doing?
|
||||||
rate my response!?
|
rate my response!?
|
||||||
examples into prompts & better prompts
|
|
||||||
|
|
||||||
common model attributes - temp & top-k
|
|
||||||
|
|
||||||
|
|||||||
Executable
+10
@@ -0,0 +1,10 @@
|
|||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-0" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-1" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-2" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-3" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-4" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-5" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-6" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-7" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-8" --ttl 1800
|
||||||
|
lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-9" --ttl 1800
|
||||||
+19
-6
@@ -5,7 +5,7 @@ api:
|
|||||||
|
|
||||||
# --- Model Settings ---
|
# --- Model Settings ---
|
||||||
models:
|
models:
|
||||||
enrich: "lm_studio/qwen/qwen3-8b"
|
enrich: "lm_studio/qwen-"
|
||||||
embedding: "text-embedding-qwen3-embedding-8b"
|
embedding: "text-embedding-qwen3-embedding-8b"
|
||||||
retrieval: "lm_studio/qwen/qwen3-30b-a3b-2507"
|
retrieval: "lm_studio/qwen/qwen3-30b-a3b-2507"
|
||||||
|
|
||||||
@@ -13,13 +13,26 @@ models:
|
|||||||
ingestion:
|
ingestion:
|
||||||
data_dir: "/home/devin/DnD"
|
data_dir: "/home/devin/DnD"
|
||||||
db_path: "./data/dmv.db"
|
db_path: "./data/dmv.db"
|
||||||
max_workers: 8
|
active_llms: 10
|
||||||
|
parallel_requests_per_llm: 4
|
||||||
chunk_size: 800
|
chunk_size: 800
|
||||||
chunk_overlap: 100
|
chunk_overlap: 100
|
||||||
embedding_batch_size: 32
|
embedding_batch_size: 32
|
||||||
time_file_location: "./data/time_file.txt"
|
time_file_location: "./data/time_file.txt"
|
||||||
|
|
||||||
# --- Retrieval Settings ---
|
# ---- Agent Settings ----
|
||||||
retrieval:
|
ingestion_agent:
|
||||||
top_k: 4
|
ingestion_signature: |
|
||||||
context_limit: 10000 # Max characters from full file context
|
You are an expert Dungeon Master's assistant.
|
||||||
|
Analyze the provided notes and extract a concise synopsis and relevant metadata.
|
||||||
|
synopsis = A one-sentence summary of the document.
|
||||||
|
tags = Relevant tags (NPCs, Locations, Items, Plot Points).
|
||||||
|
entities = a list of Key names of people, places, or factions.
|
||||||
|
"note -> synopsis:str, tags: list[str], entities: list[str]"
|
||||||
|
|
||||||
|
retrieval_agent:
|
||||||
|
retrieval_signature: |
|
||||||
|
You are an expert Dungeon Master's assistant.
|
||||||
|
Given the context and the question, answer the question.
|
||||||
|
Do not make things up, base all of your answers on the context.
|
||||||
|
Always site your sources
|
||||||
|
|||||||
+7
-3
@@ -1,12 +1,16 @@
|
|||||||
import requests
|
import requests
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from config_loader import load_config
|
||||||
|
|
||||||
|
CFG = load_config()
|
||||||
|
API_BASE = CFG["api"]["base_url"]
|
||||||
|
API_VERSION = CFG["api"]["api_version"]
|
||||||
|
|
||||||
class LocalLMEmbeddings(Embeddings):
|
class LocalLMEmbeddings(Embeddings):
|
||||||
def __init__(
|
def __init__(
|
||||||
self, model: str, base_url: str = "http://192.168.0.49:1234", batch_size: int = 32
|
self, model: str, base_url: str = API_BASE, batch_size: int = 32
|
||||||
):
|
):
|
||||||
self.url = f"{base_url}/v1/embeddings"
|
self.url = f"{base_url}/{API_VERSION}/embeddings"
|
||||||
self.model = model
|
self.model = model
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
|
|
||||||
@@ -27,7 +31,7 @@ class LocalLMEmbeddings(Embeddings):
|
|||||||
return [[] for _ in input_texts]
|
return [[] for _ in input_texts]
|
||||||
|
|
||||||
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
||||||
"""Splits 500+ chunks into batches of 32 and processes them."""
|
"""Splits chunks into batches of 32 and processes them."""
|
||||||
all_embeddings = []
|
all_embeddings = []
|
||||||
|
|
||||||
for i in range(0, len(texts), self.batch_size):
|
for i in range(0, len(texts), self.batch_size):
|
||||||
|
|||||||
@@ -1,21 +1,16 @@
|
|||||||
import dspy
|
import dspy
|
||||||
from typing import List
|
from typing import List
|
||||||
|
from config_loader import load_config
|
||||||
|
|
||||||
|
CFG = load_config()
|
||||||
|
INGESTION_CONFIG = CFG["ingestion_agent"]
|
||||||
|
|
||||||
class IngestionSignature(dspy.Signature):
|
class IngestionSignature(dspy.Signature):
|
||||||
"""You are an expert Dungeon Master's assistant.
|
f"{INGESTION_CONFIG["ingestion_signature"]}"
|
||||||
Analyze the provided notes and extract a concise synopsis and relevant metadata.
|
|
||||||
synopsis = A one-sentence summary of the document.
|
|
||||||
tags = Relevant tags (NPCs, Locations, Items, Plot Points).
|
|
||||||
entities = Key names of people, places, or factions.
|
|
||||||
"note -> synopsis:str, tags: list[str], entities: list[str]"
|
|
||||||
/no_think
|
|
||||||
"""
|
|
||||||
|
|
||||||
note: str = dspy.InputField(desc="The DM notes or session recap content.")
|
note: str = dspy.InputField(desc="The DM notes or session recap content.")
|
||||||
answer: dict[str,str|List] = dspy.OutputField(desc="the metadata dictionary with the keys; synopsis, tags, entities")
|
answer: dict[str,str|List] = dspy.OutputField(desc="the metadata dictionary with the keys; synopsis, tags, entities")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class IngestionAgent(dspy.Module):
|
class IngestionAgent(dspy.Module):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.ingest = dspy.Predict(IngestionSignature)
|
self.ingest = dspy.Predict(IngestionSignature)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# from pathlib import Path
|
import os
|
||||||
import turso
|
import turso
|
||||||
import dspy
|
import dspy
|
||||||
# from langchain_community.vectorstores import FAISS
|
|
||||||
|
|
||||||
from config_loader import load_config
|
from config_loader import load_config
|
||||||
from embedding import LocalLMEmbeddings
|
from embedding import LocalLMEmbeddings
|
||||||
@@ -11,11 +11,10 @@ CFG = load_config()
|
|||||||
DATABASE_PATH = CFG["ingestion"]["db_path"]
|
DATABASE_PATH = CFG["ingestion"]["db_path"]
|
||||||
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
||||||
API_BASE = CFG["api"]["base_url"]
|
API_BASE = CFG["api"]["base_url"]
|
||||||
|
RETRIEVAL_CONFIG = CFG["retrieval_agent"]
|
||||||
|
|
||||||
|
|
||||||
# Inside your retrieval logic:
|
|
||||||
def retrieve_from_turso(embedded_question, k=5):
|
def retrieve_from_turso(embedded_question, k=5):
|
||||||
# Example query: search for relevant notes using full-text search or embedding similarity
|
|
||||||
# Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available
|
|
||||||
query = f"""
|
query = f"""
|
||||||
SELECT file_path, synopsis, tags, entities, chunk_data,
|
SELECT file_path, synopsis, tags, entities, chunk_data,
|
||||||
vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance
|
vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance
|
||||||
@@ -31,8 +30,7 @@ def retrieve_from_turso(embedded_question, k=5):
|
|||||||
|
|
||||||
# --- DSPy Signature ---
|
# --- DSPy Signature ---
|
||||||
class DnDContextQA(dspy.Signature):
|
class DnDContextQA(dspy.Signature):
|
||||||
"""Answer DnD campaign questions using provided details.
|
f"{RETRIEVAL_CONFIG["retrieval_signature"]}"
|
||||||
"""
|
|
||||||
|
|
||||||
context = dspy.InputField(
|
context = dspy.InputField(
|
||||||
desc="Relevant chunks and metadata from the campaign notes."
|
desc="Relevant chunks and metadata from the campaign notes."
|
||||||
@@ -49,7 +47,11 @@ class DnDRAG(dspy.Module):
|
|||||||
base_url=API_BASE,
|
base_url=API_BASE,
|
||||||
batch_size=1, # we only send 1 question at a time.
|
batch_size=1, # we only send 1 question at a time.
|
||||||
)
|
)
|
||||||
self.generate_answer = dspy.ChainOfThought(DnDContextQA)
|
# Tools exposed to the ReAct loop
|
||||||
|
self.tools = [
|
||||||
|
self.load_file
|
||||||
|
]
|
||||||
|
self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)
|
||||||
|
|
||||||
def forward(self, question):
|
def forward(self, question):
|
||||||
# Use Turso to retrieve relevant notes
|
# Use Turso to retrieve relevant notes
|
||||||
@@ -74,11 +76,22 @@ entities: {entities}
|
|||||||
{content}
|
{content}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
print('Closest embedding hits')
|
# print('Closest embedding hits')
|
||||||
for part in context_parts:
|
# for part in context_parts:
|
||||||
print(part)
|
# print(part)
|
||||||
|
|
||||||
context = "\n\n".join(context_parts)
|
context = "\n\n".join(context_parts)
|
||||||
|
|
||||||
prediction = self.generate_answer(context=context, question=question)
|
prediction = self.generate_answer(context=context, question=question)
|
||||||
return dspy.Prediction(answer=prediction.answer, context=context)
|
return dspy.Prediction(answer=prediction.answer, context=context)
|
||||||
|
|
||||||
|
def load_file(self, file_path) -> str | None:
|
||||||
|
"""Load and return specified file."""
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
try:
|
||||||
|
with open(file_path) as file:
|
||||||
|
return file.read()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
+6
-4
@@ -19,7 +19,9 @@ MODEL_BASE = CFG["models"]["enrich"]
|
|||||||
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
||||||
API_BASE = CFG["api"]["base_url"]
|
API_BASE = CFG["api"]["base_url"]
|
||||||
API_VERSION = CFG["api"]["api_version"]
|
API_VERSION = CFG["api"]["api_version"]
|
||||||
MAX_WORKERS = CFG["ingestion"]["max_workers"]
|
# MAX_WORKERS = CFG["ingestion"]["max_workers"]
|
||||||
|
ACTIVE_LLMS = CFG["ingestion"]["active_llms"]
|
||||||
|
PARALLEL_REQUESTS_PER_LLM = CFG["ingestion"]["parallel_requests_per_llm"]
|
||||||
CHUNK_SIZE = CFG["ingestion"]["chunk_size"]
|
CHUNK_SIZE = CFG["ingestion"]["chunk_size"]
|
||||||
CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
|
CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
|
||||||
EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
|
EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
|
||||||
@@ -75,10 +77,10 @@ def chunk_documents(docs):
|
|||||||
def enrich_chunks(chunks: list) -> list:
|
def enrich_chunks(chunks: list) -> list:
|
||||||
def process_single_chunk(indexed_chunk):
|
def process_single_chunk(indexed_chunk):
|
||||||
idx, chunk = indexed_chunk
|
idx, chunk = indexed_chunk
|
||||||
lm_index = idx % 8
|
lm_index = idx % ACTIVE_LLMS
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE + API_VERSION)):
|
with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
|
||||||
response = IngestionAgent().ingest(note=chunk.page_content)
|
response = IngestionAgent().ingest(note=chunk.page_content)
|
||||||
|
|
||||||
# This is now an object, not a string!
|
# This is now an object, not a string!
|
||||||
@@ -92,7 +94,7 @@ def enrich_chunks(chunks: list) -> list:
|
|||||||
return (idx, chunk)
|
return (idx, chunk)
|
||||||
|
|
||||||
enriched_results = []
|
enriched_results = []
|
||||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
|
||||||
# Wrap chunks in enumerate to keep track of order
|
# Wrap chunks in enumerate to keep track of order
|
||||||
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
|
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
|
||||||
|
|
||||||
|
|||||||
+81
-5
@@ -1,16 +1,93 @@
|
|||||||
import sys
|
import sys
|
||||||
import dspy
|
import dspy
|
||||||
# import turso
|
import logging
|
||||||
|
from dspy.utils.callback import BaseCallback
|
||||||
|
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
from config_loader import load_config
|
from config_loader import load_config
|
||||||
from experts.dnd_agent import DnDRAG
|
from experts.retrieval_agent import DnDRAG
|
||||||
|
|
||||||
CFG = load_config()
|
CFG = load_config()
|
||||||
RETRIEVE_MODEL = CFG["models"]["retrieval"]
|
RETRIEVE_MODEL = CFG["models"]["retrieval"]
|
||||||
API_BASE = CFG["api"]["base_url"]
|
API_BASE = CFG["api"]["base_url"]
|
||||||
API_VERSION = CFG["api"]["api_version"]
|
API_VERSION = CFG["api"]["api_version"]
|
||||||
|
|
||||||
|
class CallbackHandler(BaseCallback):
|
||||||
|
"""Custom callback class for logging agent interactions."""
|
||||||
|
|
||||||
|
def __init__(self, logger):
|
||||||
|
"""Initialize the callback with a logger instance."""
|
||||||
|
super().__init__()
|
||||||
|
self.logger = logger
|
||||||
|
|
||||||
|
def on_module_end(self, call_id, outputs, exception):
|
||||||
|
"""Handle module end events for logging."""
|
||||||
|
step = "Reasoning" if self._is_reasoning_output(outputs) else "Acting"
|
||||||
|
self.logger.debug(f"== {step} Step ===")
|
||||||
|
for k, v in outputs.items():
|
||||||
|
self.logger.debug(f" {k}: {v}")
|
||||||
|
|
||||||
|
def on_lm_start(self, call_id, instance, inputs):
|
||||||
|
"""Handle language model start events for logging."""
|
||||||
|
self.logger.debug(f"LM is called with inputs: {inputs}")
|
||||||
|
|
||||||
|
def on_tool_start(self, call_id, instance, inputs):
|
||||||
|
"""Handle tool start events for logging."""
|
||||||
|
self.logger.debug(f"Tool {instance} called with inputs: {inputs}")
|
||||||
|
|
||||||
|
def on_tool_end(self, call_id, outputs, exception):
|
||||||
|
"""Handle tool end events for logging."""
|
||||||
|
self.logger.debug(f"Tool finished with outputs: {outputs}")
|
||||||
|
|
||||||
|
def on_lm_end(self, call_id, outputs, exception):
|
||||||
|
"""Handle language model end events for logging."""
|
||||||
|
self.logger.debug(f"LM is finished with outputs: {outputs}")
|
||||||
|
|
||||||
|
def _is_reasoning_output(self, outputs):
|
||||||
|
return any(k.startswith("Thought") for k in outputs)
|
||||||
|
|
||||||
|
def setup_logging():
|
||||||
|
"""Set up logging configuration for Merlin."""
|
||||||
|
# Create a custom logger
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Set the minimum level for the logger
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# Create a console handler
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
# Create a file handler with rotation every 5MB
|
||||||
|
file_handler = RotatingFileHandler(
|
||||||
|
"dmv.log", maxBytes=5 * 1024 * 1024, backupCount=3
|
||||||
|
)
|
||||||
|
file_handler.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# Create a formatter
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set the formatter for the handler
|
||||||
|
console_handler.setFormatter(formatter)
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
# Add the handler to the logger
|
||||||
|
logger.addHandler(console_handler)
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
return logger
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
logger = setup_logging()
|
||||||
|
logger.debug("main application started")
|
||||||
|
|
||||||
|
# Add verbose callback
|
||||||
|
dspy.configure(verbose_errors=True)
|
||||||
|
dspy.configure(callbacks=[CallbackHandler(logger)])
|
||||||
# 1. Setup the LLM
|
# 1. Setup the LLM
|
||||||
print("🚀 Initializing Qwen-8B via LM Studio...")
|
print("🚀 Initializing Qwen-8B via LM Studio...")
|
||||||
lm = dspy.LM(RETRIEVE_MODEL, api_base=API_BASE + API_VERSION)
|
lm = dspy.LM(RETRIEVE_MODEL, api_base=API_BASE + API_VERSION)
|
||||||
@@ -32,7 +109,7 @@ def main():
|
|||||||
query = input("📝 Query: ").strip()
|
query = input("📝 Query: ").strip()
|
||||||
|
|
||||||
# Exit conditions
|
# Exit conditions
|
||||||
if query.lower() in ["exit", "quit", "q"]:
|
if query.lower() in ["exit", "quit", "q", "bye"]:
|
||||||
print("Farewell, traveler. Good luck on your quest!")
|
print("Farewell, traveler. Good luck on your quest!")
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -47,11 +124,10 @@ def main():
|
|||||||
print(response.answer)
|
print(response.answer)
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("\n\nExiting... See you next session!")
|
print("\n\nRude?!.... Exiting...")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"\n⚠️ An error occurred: {e}")
|
print(f"\n⚠️ An error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
import dspy
|
||||||
|
|
||||||
|
base_url= "http://framework.tawny-bellatrix.ts.net:1234"
|
||||||
|
model_name= "lm_studio/qwen-0"
|
||||||
|
|
||||||
|
|
||||||
|
lm = dspy.LM(
|
||||||
|
model=model_name,
|
||||||
|
api_base=f"{base_url}/v1/"
|
||||||
|
)
|
||||||
|
dspy.configure(lm=lm)
|
||||||
|
|
||||||
|
|
||||||
|
# question = "How can i use dspy framework to add 'chat_template_kwargs={\"enable_thinking\": False}' to my API call to LM Studio? i know it uses litellm under the hood"
|
||||||
|
# question = "Hi there, do you have a name? if not i want you to name yourself."
|
||||||
|
question = "how long would it take light to travel from the sun to the earth? /no_think"
|
||||||
|
|
||||||
|
# Call with request_kwargs to inject the template kwargs
|
||||||
|
response = lm(
|
||||||
|
messages=[{"role": "user", "content": question}]
|
||||||
|
# extra_body={"enable_thinking": False}
|
||||||
|
# enable_thinking=False
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
Reference in New Issue
Block a user