feat: AI Read File Tool, Configurable system prompts and loading lots of llms

This commit is contained in:
2026-03-04 15:48:25 +00:00
parent bbaebf1f70
commit 0d0e747682
10 changed files with 184 additions and 47 deletions
+6 -4
View File
@@ -19,7 +19,9 @@ MODEL_BASE = CFG["models"]["enrich"]
EMBEDDING_MODEL = CFG["models"]["embedding"]
API_BASE = CFG["api"]["base_url"]
API_VERSION = CFG["api"]["api_version"]
MAX_WORKERS = CFG["ingestion"]["max_workers"]
# MAX_WORKERS = CFG["ingestion"]["max_workers"]
ACTIVE_LLMS = CFG["ingestion"]["active_llms"]
PARALLEL_REQUESTS_PER_LLM = CFG["ingestion"]["parallel_requests_per_llm"]
CHUNK_SIZE = CFG["ingestion"]["chunk_size"]
CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
@@ -75,10 +77,10 @@ def chunk_documents(docs):
def enrich_chunks(chunks: list) -> list:
def process_single_chunk(indexed_chunk):
idx, chunk = indexed_chunk
lm_index = idx % 8
lm_index = idx % ACTIVE_LLMS
try:
with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE + API_VERSION)):
with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
response = IngestionAgent().ingest(note=chunk.page_content)
# This is now an object, not a string!
@@ -92,7 +94,7 @@ def enrich_chunks(chunks: list) -> list:
return (idx, chunk)
enriched_results = []
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
# Wrap chunks in enumerate to keep track of order
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]