chore: 🧹 llm running tweeks
This commit is contained in:
+5
-10
@@ -1,10 +1,5 @@
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-0" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-1" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-2" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-3" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-4" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-5" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-6" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-7" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-8" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-9" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-0" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-1" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-2" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-3" --ttl 1800
|
||||
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-4" --ttl 1800
|
||||
+2
-2
@@ -13,8 +13,8 @@ models:
|
||||
ingestion:
|
||||
data_dir: "/home/cosmic/DnD"
|
||||
db_path: "./data/dmv.db"
|
||||
active_llms: 10
|
||||
parallel_requests_per_llm: 4
|
||||
active_llms: 5
|
||||
parallel_requests_per_llm: 2
|
||||
chunk_size: 800
|
||||
chunk_overlap: 100
|
||||
embedding_batch_size: 32
|
||||
|
||||
+9
-6
@@ -1,12 +1,13 @@
|
||||
import turso
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import dspy
|
||||
import turso
|
||||
from langchain_community.document_loaders import TextLoader
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
from tqdm import tqdm
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from config_loader import load_config
|
||||
from embedding import LocalLMEmbeddings
|
||||
@@ -80,7 +81,10 @@ def enrich_chunks(chunks: list) -> list:
|
||||
lm_index = idx % ACTIVE_LLMS
|
||||
|
||||
try:
|
||||
with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
|
||||
with dspy.context(
|
||||
lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION),
|
||||
chat_template_kwargs={"enable_thinking": False},
|
||||
):
|
||||
response = IngestionAgent().ingest(note=chunk.page_content)
|
||||
|
||||
# This is now an object, not a string!
|
||||
@@ -94,7 +98,7 @@ def enrich_chunks(chunks: list) -> list:
|
||||
return (idx, chunk)
|
||||
|
||||
enriched_results = []
|
||||
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
|
||||
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM * ACTIVE_LLMS) as executor:
|
||||
# Wrap chunks in enumerate to keep track of order
|
||||
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
|
||||
|
||||
@@ -379,4 +383,3 @@ def main():
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
+4
-4
@@ -1,10 +1,10 @@
|
||||
import sys
|
||||
import dspy
|
||||
import logging
|
||||
from dspy.utils.callback import BaseCallback
|
||||
|
||||
import sys
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
import dspy
|
||||
from dspy.utils.callback import BaseCallback
|
||||
|
||||
from config_loader import load_config
|
||||
from experts.retrieval_agent import DnDRAG
|
||||
|
||||
|
||||
Reference in New Issue
Block a user