feat: AI Powered enhanced queries to get better results

This commit is contained in:
2026-03-07 11:08:21 +00:00
parent 58f20856fd
commit 26c0049fd8
6 changed files with 73 additions and 37 deletions
+6 -7
View File
@@ -16,6 +16,7 @@ from experts.ingestion_agent import IngestionAgent
CFG = load_config()
DATA_DIR = CFG["ingestion"]["data_dir"]
DATABASE_PATH = CFG["ingestion"]["db_path"]
DATABASE_NAME = CFG["ingestion"]["db_name"]
MODEL_BASE = CFG["models"]["enrich"]
EMBEDDING_MODEL = CFG["models"]["embedding"]
API_BASE = CFG["api"]["base_url"]
@@ -139,13 +140,10 @@ def embed_chunks(chunks: List[Any], batch_size: int = EMBEDDING_BATCH_SIZE) -> L
# Process chunks in batches
for i in tqdm(range(0, total_chunks, batch_size), desc="Embedding batches"):
batch = chunks[i : i + batch_size]
print(f"🚀 Processing batch {(i // batch_size) + 1} (Size: {len(batch)})...")
batch_content = [chunk.page_content for chunk in batch]
try:
# Use model's batched embedding method
# batch_embeddings = embeddings_model.embed_query(batch_content)
batch_embeddings = embeddings_model.embed_documents(batch_content)
# Process each chunk in the batch
for j, (chunk, embedding) in enumerate(zip(batch, batch_embeddings)):
# Extract metadata
@@ -228,7 +226,7 @@ def save_to_db(chunk_dicts):
Each dict maps to a row in the 'notes' table.
"""
print("connecting to db")
con = turso.connect(DATABASE_PATH)
con = turso.connect(DATABASE_PATH + DATABASE_NAME)
print("opening cursor")
cur = con.cursor()
@@ -267,7 +265,8 @@ def save_to_db(chunk_dicts):
def create_db():
con = turso.connect(DATABASE_PATH)
Path(DATABASE_PATH).mkdir(exist_ok=True)
con = turso.connect(DATABASE_PATH + DATABASE_NAME)
cur = con.cursor()
cur.execute("""
@@ -334,7 +333,7 @@ def delete_from_db(embedded_chunks):
print(f"Deleting existing rows for {len(file_paths)} file(s)")
con = turso.connect(DATABASE_PATH)
con = turso.connect(DATABASE_PATH + DATABASE_NAME)
cur = con.cursor()
# Use a single DELETE statement with IN clause for efficiency