feat: ✨ AI Powered enhanced queries to get better results
This commit is contained in:
+6
-7
@@ -16,6 +16,7 @@ from experts.ingestion_agent import IngestionAgent
|
||||
CFG = load_config()
|
||||
DATA_DIR = CFG["ingestion"]["data_dir"]
|
||||
DATABASE_PATH = CFG["ingestion"]["db_path"]
|
||||
DATABASE_NAME = CFG["ingestion"]["db_name"]
|
||||
MODEL_BASE = CFG["models"]["enrich"]
|
||||
EMBEDDING_MODEL = CFG["models"]["embedding"]
|
||||
API_BASE = CFG["api"]["base_url"]
|
||||
@@ -139,13 +140,10 @@ def embed_chunks(chunks: List[Any], batch_size: int = EMBEDDING_BATCH_SIZE) -> L
|
||||
# Process chunks in batches
|
||||
for i in tqdm(range(0, total_chunks, batch_size), desc="Embedding batches"):
|
||||
batch = chunks[i : i + batch_size]
|
||||
print(f"🚀 Processing batch {(i // batch_size) + 1} (Size: {len(batch)})...")
|
||||
batch_content = [chunk.page_content for chunk in batch]
|
||||
|
||||
try:
|
||||
# Use model's batched embedding method
|
||||
# batch_embeddings = embeddings_model.embed_query(batch_content)
|
||||
batch_embeddings = embeddings_model.embed_documents(batch_content)
|
||||
|
||||
# Process each chunk in the batch
|
||||
for j, (chunk, embedding) in enumerate(zip(batch, batch_embeddings)):
|
||||
# Extract metadata
|
||||
@@ -228,7 +226,7 @@ def save_to_db(chunk_dicts):
|
||||
Each dict maps to a row in the 'notes' table.
|
||||
"""
|
||||
print("connecting to db")
|
||||
con = turso.connect(DATABASE_PATH)
|
||||
con = turso.connect(DATABASE_PATH + DATABASE_NAME)
|
||||
print("opening cursor")
|
||||
cur = con.cursor()
|
||||
|
||||
@@ -267,7 +265,8 @@ def save_to_db(chunk_dicts):
|
||||
|
||||
|
||||
def create_db():
|
||||
con = turso.connect(DATABASE_PATH)
|
||||
Path(DATABASE_PATH).mkdir(exist_ok=True)
|
||||
con = turso.connect(DATABASE_PATH + DATABASE_NAME)
|
||||
cur = con.cursor()
|
||||
|
||||
cur.execute("""
|
||||
@@ -334,7 +333,7 @@ def delete_from_db(embedded_chunks):
|
||||
|
||||
print(f"Deleting existing rows for {len(file_paths)} file(s)")
|
||||
|
||||
con = turso.connect(DATABASE_PATH)
|
||||
con = turso.connect(DATABASE_PATH + DATABASE_NAME)
|
||||
cur = con.cursor()
|
||||
|
||||
# Use a single DELETE statement with IN clause for efficiency
|
||||
|
||||
Reference in New Issue
Block a user