feat: ✨ AI Read File Tool, Configurable system prompts and loading lots of llms
This commit is contained in:
+6
-4
@@ -19,7 +19,9 @@ MODEL_BASE = CFG["models"]["enrich"]
|
||||
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
||||
API_BASE = CFG["api"]["base_url"]
|
||||
API_VERSION = CFG["api"]["api_version"]
|
||||
MAX_WORKERS = CFG["ingestion"]["max_workers"]
|
||||
# MAX_WORKERS = CFG["ingestion"]["max_workers"]
|
||||
ACTIVE_LLMS = CFG["ingestion"]["active_llms"]
|
||||
PARALLEL_REQUESTS_PER_LLM = CFG["ingestion"]["parallel_requests_per_llm"]
|
||||
CHUNK_SIZE = CFG["ingestion"]["chunk_size"]
|
||||
CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
|
||||
EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
|
||||
@@ -75,10 +77,10 @@ def chunk_documents(docs):
|
||||
def enrich_chunks(chunks: list) -> list:
|
||||
def process_single_chunk(indexed_chunk):
|
||||
idx, chunk = indexed_chunk
|
||||
lm_index = idx % 8
|
||||
lm_index = idx % ACTIVE_LLMS
|
||||
|
||||
try:
|
||||
with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE + API_VERSION)):
|
||||
with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
|
||||
response = IngestionAgent().ingest(note=chunk.page_content)
|
||||
|
||||
# This is now an object, not a string!
|
||||
@@ -92,7 +94,7 @@ def enrich_chunks(chunks: list) -> list:
|
||||
return (idx, chunk)
|
||||
|
||||
enriched_results = []
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
|
||||
# Wrap chunks in enumerate to keep track of order
|
||||
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user