feat: ✨ AI Read File Tool, Configurable system prompts and loading lots of llms

2026-03-04 15:48:25 +00:00
parent bbaebf1f70
commit 0d0e747682
10 changed files with 184 additions and 47 deletions
@@ -19,7 +19,9 @@ MODEL_BASE = CFG["models"]["enrich"]
 EMBEDDING_MODEL = CFG["models"]["embedding"]
 API_BASE = CFG["api"]["base_url"]
 API_VERSION = CFG["api"]["api_version"]
-MAX_WORKERS = CFG["ingestion"]["max_workers"]
+# MAX_WORKERS = CFG["ingestion"]["max_workers"]
+ACTIVE_LLMS = CFG["ingestion"]["active_llms"]
+PARALLEL_REQUESTS_PER_LLM = CFG["ingestion"]["parallel_requests_per_llm"]
 CHUNK_SIZE = CFG["ingestion"]["chunk_size"]
 CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
 EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
@@ -75,10 +77,10 @@ def chunk_documents(docs):
 def enrich_chunks(chunks: list) -> list:
    def process_single_chunk(indexed_chunk):
        idx, chunk = indexed_chunk
-        lm_index = idx % 8
+        lm_index = idx % ACTIVE_LLMS

        try:
-            with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE + API_VERSION)):
+            with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
                response = IngestionAgent().ingest(note=chunk.page_content)

                # This is now an object, not a string!
@@ -92,7 +94,7 @@ def enrich_chunks(chunks: list) -> list:
        return (idx, chunk)

    enriched_results = []
-    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+    with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
        # Wrap chunks in enumerate to keep track of order
        futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]