chore: 🧹 llm running tweeks
This commit is contained in:
+5
-10
@@ -1,10 +1,5 @@
|
|||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-0" --ttl 1800
|
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-0" --ttl 1800
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-1" --ttl 1800
|
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-1" --ttl 1800
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-2" --ttl 1800
|
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-2" --ttl 1800
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-3" --ttl 1800
|
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-3" --ttl 1800
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-4" --ttl 1800
|
lms load qwen-4b-instruct-2507 --parallel 2 --identifier "qwen-4" --ttl 1800
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-5" --ttl 1800
|
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-6" --ttl 1800
|
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-7" --ttl 1800
|
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-8" --ttl 1800
|
|
||||||
lms load qwen-4b-instruct-2507 --parallel 4 --identifier "qwen-9" --ttl 1800
|
|
||||||
+2
-2
@@ -13,8 +13,8 @@ models:
|
|||||||
ingestion:
|
ingestion:
|
||||||
data_dir: "/home/cosmic/DnD"
|
data_dir: "/home/cosmic/DnD"
|
||||||
db_path: "./data/dmv.db"
|
db_path: "./data/dmv.db"
|
||||||
active_llms: 10
|
active_llms: 5
|
||||||
parallel_requests_per_llm: 4
|
parallel_requests_per_llm: 2
|
||||||
chunk_size: 800
|
chunk_size: 800
|
||||||
chunk_overlap: 100
|
chunk_overlap: 100
|
||||||
embedding_batch_size: 32
|
embedding_batch_size: 32
|
||||||
|
|||||||
+9
-6
@@ -1,12 +1,13 @@
|
|||||||
import turso
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from pathlib import Path
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
import dspy
|
import dspy
|
||||||
|
import turso
|
||||||
from langchain_community.document_loaders import TextLoader
|
from langchain_community.document_loaders import TextLoader
|
||||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from typing import List, Dict, Any
|
|
||||||
|
|
||||||
from config_loader import load_config
|
from config_loader import load_config
|
||||||
from embedding import LocalLMEmbeddings
|
from embedding import LocalLMEmbeddings
|
||||||
@@ -80,7 +81,10 @@ def enrich_chunks(chunks: list) -> list:
|
|||||||
lm_index = idx % ACTIVE_LLMS
|
lm_index = idx % ACTIVE_LLMS
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
|
with dspy.context(
|
||||||
|
lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION),
|
||||||
|
chat_template_kwargs={"enable_thinking": False},
|
||||||
|
):
|
||||||
response = IngestionAgent().ingest(note=chunk.page_content)
|
response = IngestionAgent().ingest(note=chunk.page_content)
|
||||||
|
|
||||||
# This is now an object, not a string!
|
# This is now an object, not a string!
|
||||||
@@ -94,7 +98,7 @@ def enrich_chunks(chunks: list) -> list:
|
|||||||
return (idx, chunk)
|
return (idx, chunk)
|
||||||
|
|
||||||
enriched_results = []
|
enriched_results = []
|
||||||
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
|
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM * ACTIVE_LLMS) as executor:
|
||||||
# Wrap chunks in enumerate to keep track of order
|
# Wrap chunks in enumerate to keep track of order
|
||||||
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
|
futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
|
||||||
|
|
||||||
@@ -379,4 +383,3 @@ def main():
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|||||||
+4
-4
@@ -1,10 +1,10 @@
|
|||||||
import sys
|
|
||||||
import dspy
|
|
||||||
import logging
|
import logging
|
||||||
from dspy.utils.callback import BaseCallback
|
import sys
|
||||||
|
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
|
import dspy
|
||||||
|
from dspy.utils.callback import BaseCallback
|
||||||
|
|
||||||
from config_loader import load_config
|
from config_loader import load_config
|
||||||
from experts.retrieval_agent import DnDRAG
|
from experts.retrieval_agent import DnDRAG
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user