diff --git a/.gitignore b/.gitignore
index 3d689fb..9d9fc24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 data/*
+*.log
 
 # Python-generated files
 __pycache__/
diff --git a/TODO b/TODO
index de347e3..e2429c2 100644
--- a/TODO
+++ b/TODO
@@ -1,8 +1,10 @@
-Read File Tool for Retrieve Agent
+---Read File Tool for Retrieve Agent---
 
-Easy Config of system prompts 
+---Easy Config of system prompts---
     examples into prompts & better prompts
 
+LMS CLI script to load multiple models and to make each model accept multiple inferences
+
 context engineering, - only include vector hits that are x distance?
 
 AI in the middle - make the ai generate the string for vector search
@@ -19,7 +21,3 @@ QA specific embedding models?
 
 Evaluation metrics, how good is it doing? 
     rate my response!? 
-examples into prompts & better prompts
-
-common model attributes - temp & top-k 
-
diff --git a/load_ingestion_llms.sh b/load_ingestion_llms.sh
new file mode 100755
index 0000000..82f40eb
--- /dev/null
+++ b/load_ingestion_llms.sh
@@ -0,0 +1,10 @@
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-0" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-1" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-2" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-3" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-4" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-5" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-6" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-7" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-8" --ttl 1800
+lms load qwen/qwen3.5-4b --parallel 4 --identifier "qwen-9" --ttl 1800
\ No newline at end of file
diff --git a/src/config.yaml b/src/config.yaml
index 377153a..71d9146 100644
--- a/src/config.yaml
+++ b/src/config.yaml
@@ -5,7 +5,7 @@ api:
 
 # --- Model Settings ---
 models:
-  enrich: "lm_studio/qwen/qwen3-8b"
+  enrich: "lm_studio/qwen-"
   embedding: "text-embedding-qwen3-embedding-8b"
   retrieval: "lm_studio/qwen/qwen3-30b-a3b-2507"
 
@@ -13,13 +13,26 @@ models:
 ingestion:
   data_dir: "/home/devin/DnD"
   db_path: "./data/dmv.db"
-  max_workers: 8
+  active_llms: 10
+  parallel_requests_per_llm: 4
   chunk_size: 800
   chunk_overlap: 100
   embedding_batch_size: 32
   time_file_location: "./data/time_file.txt"
 
-# --- Retrieval Settings ---
-retrieval:
-  top_k: 4
-  context_limit: 10000  # Max characters from full file context
\ No newline at end of file
+# ---- Agent Settings ----
+ingestion_agent: 
+  ingestion_signature: |
+    You are an expert Dungeon Master's assistant.
+    Analyze the provided notes and extract a concise synopsis and relevant metadata.
+    synopsis = A one-sentence summary of the document.
+    tags = Relevant tags (NPCs, Locations, Items, Plot Points).
+    entities = a list of Key names of people, places, or factions.
+    "note -> synopsis:str, tags: list[str], entities: list[str]"
+
+retrieval_agent:
+  retrieval_signature: |
+    You are an expert Dungeon Master's assistant.
+    Given the context and the question, answer the question.
+    Do not make things up, base all of your answers on the context.
+    Always site your sources
diff --git a/src/embedding.py b/src/embedding.py
index d7f0a52..8faac55 100644
--- a/src/embedding.py
+++ b/src/embedding.py
@@ -1,12 +1,16 @@
 import requests
 from langchain_core.embeddings import Embeddings
+from config_loader import load_config
 
+CFG = load_config()
+API_BASE = CFG["api"]["base_url"]
+API_VERSION = CFG["api"]["api_version"]
 
 class LocalLMEmbeddings(Embeddings):
     def __init__(
-        self, model: str, base_url: str = "http://192.168.0.49:1234", batch_size: int = 32
+        self, model: str, base_url: str = API_BASE, batch_size: int = 32
     ):
-        self.url = f"{base_url}/v1/embeddings"
+        self.url = f"{base_url}/{API_VERSION}/embeddings"
         self.model = model
         self.batch_size = batch_size
 
@@ -27,7 +31,7 @@ class LocalLMEmbeddings(Embeddings):
             return [[] for _ in input_texts]
 
     def embed_documents(self, texts: list[str]) -> list[list[float]]:
-        """Splits 500+ chunks into batches of 32 and processes them."""
+        """Splits chunks into batches of 32 and processes them."""
         all_embeddings = []
 
         for i in range(0, len(texts), self.batch_size):
diff --git a/src/experts/ingestion_agent.py b/src/experts/ingestion_agent.py
index 960f74f..b71b541 100644
--- a/src/experts/ingestion_agent.py
+++ b/src/experts/ingestion_agent.py
@@ -1,21 +1,16 @@
 import dspy
 from typing import List
+from config_loader import load_config
+
+CFG = load_config()
+INGESTION_CONFIG = CFG["ingestion_agent"]
 
 class IngestionSignature(dspy.Signature):
-    """You are an expert Dungeon Master's assistant.
-    Analyze the provided notes and extract a concise synopsis and relevant metadata.
-    synopsis = A one-sentence summary of the document.
-    tags = Relevant tags (NPCs, Locations, Items, Plot Points).
-    entities = Key names of people, places, or factions.
-    "note -> synopsis:str, tags: list[str], entities: list[str]"
-    /no_think
-    """
+    f"{INGESTION_CONFIG["ingestion_signature"]}"
 
     note: str = dspy.InputField(desc="The DM notes or session recap content.")
     answer: dict[str,str|List] = dspy.OutputField(desc="the metadata dictionary with the keys; synopsis, tags, entities")
 
-
-
 class IngestionAgent(dspy.Module):
     def __init__(self):
         self.ingest = dspy.Predict(IngestionSignature)
diff --git a/src/experts/dnd_agent.py b/src/experts/retrieval_agent.py
similarity index 74%
rename from src/experts/dnd_agent.py
rename to src/experts/retrieval_agent.py
index 63cd55a..7282417 100644
--- a/src/experts/dnd_agent.py
+++ b/src/experts/retrieval_agent.py
@@ -1,7 +1,7 @@
-# from pathlib import Path
+import os
 import turso
 import dspy
-#  from langchain_community.vectorstores import FAISS
+
 
 from config_loader import load_config
 from embedding import LocalLMEmbeddings
@@ -11,11 +11,10 @@ CFG = load_config()
 DATABASE_PATH = CFG["ingestion"]["db_path"]
 EMBEDDING_MODEL = CFG["models"]["embedding"]
 API_BASE = CFG["api"]["base_url"]
+RETRIEVAL_CONFIG = CFG["retrieval_agent"]
+
 
-# Inside your retrieval logic:
 def retrieve_from_turso(embedded_question, k=5):
-    # Example query: search for relevant notes using full-text search or embedding similarity
-    # Note: Turso supports SQLite, so you can use FTS5 or a vector extension if available
     query = f"""
     SELECT file_path, synopsis, tags, entities, chunk_data,
     vector_distance_cos(embedding, vector32('{embedded_question[0]}')) AS distance
@@ -31,8 +30,7 @@ def retrieve_from_turso(embedded_question, k=5):
 
 # --- DSPy Signature ---
 class DnDContextQA(dspy.Signature):
-    """Answer DnD campaign questions using provided details.
-    """
+    f"{RETRIEVAL_CONFIG["retrieval_signature"]}"
 
     context = dspy.InputField(
         desc="Relevant chunks and metadata from the campaign notes."
@@ -49,7 +47,11 @@ class DnDRAG(dspy.Module):
             base_url=API_BASE,
             batch_size=1, # we only send 1 question at a time.
             )
-        self.generate_answer = dspy.ChainOfThought(DnDContextQA)
+                # Tools exposed to the ReAct loop
+        self.tools = [
+            self.load_file
+        ]
+        self.generate_answer = dspy.ReAct(signature=DnDContextQA,tools=self.tools)
 
     def forward(self, question):
         # Use Turso to retrieve relevant notes
@@ -73,12 +75,23 @@ tags: {tags},
 entities: {entities}
 {content}
 """)
-            
-        print('Closest embedding hits')
-        for part in context_parts:
-            print(part)
+
+        # print('Closest embedding hits')
+        # for part in context_parts:
+        #     print(part)
         
         context = "\n\n".join(context_parts)
 
         prediction = self.generate_answer(context=context, question=question)
         return dspy.Prediction(answer=prediction.answer, context=context)
+
+    def load_file(self, file_path) -> str | None:
+        """Load and return specified file."""
+        if os.path.exists(file_path):
+            try:
+                with open(file_path) as file:
+                    return file.read()
+            except Exception:
+                return None
+        else:
+            return None
\ No newline at end of file
diff --git a/src/ingest.py b/src/ingest.py
index 9188013..c50f4f5 100644
--- a/src/ingest.py
+++ b/src/ingest.py
@@ -19,7 +19,9 @@ MODEL_BASE = CFG["models"]["enrich"]
 EMBEDDING_MODEL = CFG["models"]["embedding"]
 API_BASE = CFG["api"]["base_url"]
 API_VERSION = CFG["api"]["api_version"]
-MAX_WORKERS = CFG["ingestion"]["max_workers"]
+# MAX_WORKERS = CFG["ingestion"]["max_workers"]
+ACTIVE_LLMS = CFG["ingestion"]["active_llms"]
+PARALLEL_REQUESTS_PER_LLM = CFG["ingestion"]["parallel_requests_per_llm"]
 CHUNK_SIZE = CFG["ingestion"]["chunk_size"]
 CHUNK_OVERLAP = CFG["ingestion"]["chunk_overlap"]
 EMBEDDING_BATCH_SIZE = CFG["ingestion"]["embedding_batch_size"]
@@ -75,10 +77,10 @@ def chunk_documents(docs):
 def enrich_chunks(chunks: list) -> list:
     def process_single_chunk(indexed_chunk):
         idx, chunk = indexed_chunk
-        lm_index = idx % 8
+        lm_index = idx % ACTIVE_LLMS
 
         try:
-            with dspy.context(lm=dspy.LM(model=MODEL_BASE, api_base=API_BASE + API_VERSION)):
+            with dspy.context(lm=dspy.LM(model=f"{MODEL_BASE}{lm_index}", api_base=API_BASE + API_VERSION), chat_template_kwargs={"enable_thinking": False}):
                 response = IngestionAgent().ingest(note=chunk.page_content)
 
                 # This is now an object, not a string!
@@ -92,7 +94,7 @@ def enrich_chunks(chunks: list) -> list:
         return (idx, chunk)
 
     enriched_results = []
-    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+    with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS_PER_LLM*ACTIVE_LLMS) as executor:
         # Wrap chunks in enumerate to keep track of order
         futures = [executor.submit(process_single_chunk, (i, c)) for i, c in enumerate(chunks)]
 
diff --git a/src/retrieve.py b/src/retrieve.py
index f7cc2af..cd4e00f 100644
--- a/src/retrieve.py
+++ b/src/retrieve.py
@@ -1,16 +1,93 @@
 import sys
 import dspy
-# import turso
+import logging
+from dspy.utils.callback import BaseCallback
+
+from logging.handlers import RotatingFileHandler
 
 from config_loader import load_config
-from experts.dnd_agent import DnDRAG
+from experts.retrieval_agent import DnDRAG
 
 CFG = load_config()
 RETRIEVE_MODEL = CFG["models"]["retrieval"]
 API_BASE = CFG["api"]["base_url"]
 API_VERSION = CFG["api"]["api_version"]
 
+class CallbackHandler(BaseCallback):
+    """Custom callback class for logging agent interactions."""
+
+    def __init__(self, logger):
+        """Initialize the callback with a logger instance."""
+        super().__init__()
+        self.logger = logger
+
+    def on_module_end(self, call_id, outputs, exception):
+        """Handle module end events for logging."""
+        step = "Reasoning" if self._is_reasoning_output(outputs) else "Acting"
+        self.logger.debug(f"== {step} Step ===")
+        for k, v in outputs.items():
+            self.logger.debug(f"  {k}: {v}")
+
+    def on_lm_start(self, call_id, instance, inputs):
+        """Handle language model start events for logging."""
+        self.logger.debug(f"LM is called with inputs: {inputs}")
+
+    def on_tool_start(self, call_id, instance, inputs):
+        """Handle tool start events for logging."""
+        self.logger.debug(f"Tool {instance} called with inputs: {inputs}")
+
+    def on_tool_end(self, call_id, outputs, exception):
+        """Handle tool end events for logging."""
+        self.logger.debug(f"Tool finished with outputs: {outputs}")
+
+    def on_lm_end(self, call_id, outputs, exception):
+        """Handle language model end events for logging."""
+        self.logger.debug(f"LM is finished with outputs: {outputs}")
+
+    def _is_reasoning_output(self, outputs):
+        return any(k.startswith("Thought") for k in outputs)
+
+def setup_logging():
+    """Set up logging configuration for Merlin."""
+    # Create a custom logger
+    logger = logging.getLogger(__name__)
+
+    # Set the minimum level for the logger
+    logger.setLevel(logging.DEBUG)
+
+    # Create a console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+
+    # Create a file handler with rotation every 5MB
+    file_handler = RotatingFileHandler(
+        "dmv.log", maxBytes=5 * 1024 * 1024, backupCount=3
+    )
+    file_handler.setLevel(logging.DEBUG)
+
+    # Create a formatter
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+
+    # Set the formatter for the handler
+    console_handler.setFormatter(formatter)
+    file_handler.setFormatter(formatter)
+
+    # Add the handler to the logger
+    logger.addHandler(console_handler)
+    logger.addHandler(file_handler)
+
+    return logger
+
+
 def main():
+    logger = setup_logging()
+    logger.debug("main application started")
+
+    # Add verbose callback
+    dspy.configure(verbose_errors=True)
+    dspy.configure(callbacks=[CallbackHandler(logger)])
     # 1. Setup the LLM
     print("🚀 Initializing Qwen-8B via LM Studio...")
     lm = dspy.LM(RETRIEVE_MODEL, api_base=API_BASE + API_VERSION)
@@ -32,7 +109,7 @@ def main():
             query = input("📝 Query: ").strip()
 
             # Exit conditions
-            if query.lower() in ["exit", "quit", "q"]:
+            if query.lower() in ["exit", "quit", "q", "bye"]:
                 print("Farewell, traveler. Good luck on your quest!")
                 break
 
@@ -47,11 +124,10 @@ def main():
             print(response.answer)
 
         except KeyboardInterrupt:
-            print("\n\nExiting... See you next session!")
+            print("\n\nRude?!.... Exiting...")
             sys.exit(0)
         except Exception as e:
             print(f"\n⚠️ An error occurred: {e}")
 
-
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/test_no_think.py b/test_no_think.py
new file mode 100644
index 0000000..98f824e
--- /dev/null
+++ b/test_no_think.py
@@ -0,0 +1,25 @@
+import dspy
+
+base_url= "http://framework.tawny-bellatrix.ts.net:1234"
+model_name= "lm_studio/qwen-0"
+
+
+lm = dspy.LM(
+    model=model_name, 
+    api_base=f"{base_url}/v1/"
+)
+dspy.configure(lm=lm)
+
+
+# question = "How can i use dspy framework to add 'chat_template_kwargs={\"enable_thinking\": False}' to my API call to LM Studio? i know it uses litellm under the hood"
+# question = "Hi there, do you have a name? if not i want you to name yourself."
+question = "how long would it take light to travel from the sun to the earth? /no_think"
+
+# Call with request_kwargs to inject the template kwargs
+response = lm(
+    messages=[{"role": "user", "content": question}]
+    # extra_body={"enable_thinking": False} 
+    # enable_thinking=False
+)
+
+print(response)
\ No newline at end of file