feat: ✨ Working again with proper use of enriched content
This commit is contained in:
+11
-13
@@ -18,11 +18,10 @@ MODEL_BASE = CFG["models"]["enrich"]
|
||||
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
||||
API_BASE = CFG["api"]["base_url"]
|
||||
API_VERSION = CFG["api"]["api_version"]
|
||||
MAX_WORKERS = CFG["ingestion"]["max_workers"]
|
||||
CHUNK_SIZE = (CFG["ingestion"]["chunk_size"],)
|
||||
CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
|
||||
EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
|
||||
|
||||
MAX_WORKERS=CFG["ingestion"]["max_workers"]
|
||||
CHUNK_SIZE=CFG["ingestion"]["chunk_size"]
|
||||
CHUNK_OVERLAP=CFG["ingestion"]["chunk_overlap"]
|
||||
EMBEDDING_BATCH_SIZE=CFG["ingestion"]["embedding_batch_size"]
|
||||
|
||||
def load_documents():
|
||||
docs = []
|
||||
@@ -64,20 +63,19 @@ def enrich_chunks(chunks: list) -> list:
|
||||
lm_index = idx % 8
|
||||
|
||||
try:
|
||||
with dspy.context(
|
||||
lm=dspy.LM(f"{MODEL_BASE}:{lm_index}", api_base=API_BASE + API_VERSION)
|
||||
):
|
||||
response = IngestionAgent().forward(note=chunk.page_content)
|
||||
|
||||
with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE+API_VERSION)):
|
||||
response = IngestionAgent().ingest(note=chunk.page_content)
|
||||
|
||||
# This is now an object, not a string!
|
||||
metadata = response.answer.dict()
|
||||
metadata = response.answer
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed for chunk {idx}: {e}")
|
||||
metadata = {"synopsis": "Summary failed", "tags": ["error"], "entities": []}
|
||||
|
||||
chunk.metadata.update(metadata)
|
||||
return chunk
|
||||
chunk.metadata.update(metadata)
|
||||
return (idx, chunk)
|
||||
|
||||
|
||||
enriched_results = []
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
|
||||
Reference in New Issue
Block a user