diff --git a/load_ingestion_llms.sh b/load_ingestion_llms.sh index 9cd74f8..a190f05 100755 --- a/load_ingestion_llms.sh +++ b/load_ingestion_llms.sh @@ -1,10 +1,5 @@ -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-0" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-1" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-2" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-3" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-4" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-5" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-6" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-7" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-8" --ttl 1800 -lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-9" --ttl 1800 \ No newline at end of file +lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-0" --ttl 1800 +lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-1" --ttl 1800 +lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-2" --ttl 1800 +lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-3" --ttl 1800 +lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-4" --ttl 1800 \ No newline at end of file diff --git a/src/config.yaml b/src/config.yaml index f50e1aa..43b463b 100644 --- a/src/config.yaml +++ b/src/config.yaml @@ -13,8 +13,8 @@ models: ingestion: data_dir: "/home/cosmic/DnD" db_path: "./data/dmv.db" - active_llms: 10 - parallel_requests_per_llm: 4 + active_llms: 5 + parallel_requests_per_llm: 2 chunk_size: 800 chunk_overlap: 100 embedding_batch_size: 32 diff --git a/src/ingest.py b/src/ingest.py index c50f4f5..19a53b6 100644 --- a/src/ingest.py +++ b/src/ingest.py @@ -1,12 +1,13 @@ -import turso from concurrent.futures import ThreadPoolExecutor, as_completed -from pathlib import Path from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List + import dspy +import turso from langchain_community.document_loaders import TextLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from tqdm import tqdm -from typing import List, Dict, Any from config_loader import load_config from embedding import LocalLMEmbeddings @@ -80,7 +81,10 @@ def enrich_chunks(chunks: list) -> list: lm_index = idx % ACTIVE_LLMS try: - with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}): + with dspy.context( + lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), + chat_template_kwargs={"enable_thinking": False}, + ): response = IngestionAgent().ingest(note=chunk.page_content) # This is now an object, not a string! @@ -94,7 +98,7 @@ def enrich_chunks(chunks: list) -> list: return (idx, chunk) enriched_results = [] - with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor: + with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM * ACTIVE_LLMS) as executor: # Wrap chunks in enumerate to keep track of order futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)] @@ -379,4 +383,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/src/retrieve.py b/src/retrieve.py index cd4e00f..2882a21 100644 --- a/src/retrieve.py +++ b/src/retrieve.py @@ -1,10 +1,10 @@ -import sys -import dspy import logging -from dspy.utils.callback import BaseCallback - +import sys from logging.handlers import RotatingFileHandler +import dspy +from dspy.utils.callback import BaseCallback + from config_loader import load_config from experts.retrieval_agent import DnDRAG