feat: ✨ Working again with proper use of enriched content

2026-02-01 09:02:41 +00:00
parent 6b9eecf24c
commit cabf4f5eab
7 changed files with 49 additions and 36 deletions
@@ -18,11 +18,10 @@ MODEL_BASE = CFG["models"]["enrich"]
 EMBEDDING_MODEL = CFG["models"]["embedding"]
 API_BASE = CFG["api"]["base_url"]
 API_VERSION = CFG["api"]["api_version"]
-MAX_WORKERS = CFG["ingestion"]["max_workers"]
-CHUNK_SIZE = (CFG["ingestion"]["chunk_size"],)
-CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
-EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
-
+MAX_WORKERS=CFG["ingestion"]["max_workers"]
+CHUNK_SIZE=CFG["ingestion"]["chunk_size"]
+CHUNK_OVERLAP=CFG["ingestion"]["chunk_overlap"]
+EMBEDDING_BATCH_SIZE=CFG["ingestion"]["embedding_batch_size"]

 def load_documents():
    docs = []
@@ -64,20 +63,19 @@ def enrich_chunks(chunks: list) -> list:
        lm_index = idx % 8

        try:
-            with dspy.context(
-                lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE + API_VERSION)
-            ):
-                response = IngestionAgent().forward(note=chunk.page_content)
-
+            with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE+API_VERSION)):
+                response = IngestionAgent().ingest(note=chunk.page_content)
+                
                # This is now an object, not a string!
-                metadata = response.answer.dict()
+                metadata = response.answer 

        except Exception as e:
            print(f"⚠️ Failed for chunk {idx}: {e}")
            metadata = {"synopsis": "Summary failed", "tags": ["error"], "entities": []}

-            chunk.metadata.update(metadata)
-            return chunk
+        chunk.metadata.update(metadata)
+        return (idx, chunk)
+            

    enriched_results = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: