feat: 🔒Starting the refactor

This commit is contained in:
2026-03-22 08:18:49 +00:00
parent 90c88b068b
commit 986c8103c4
10 changed files with 375 additions and 86 deletions
+5 -2
View File
@@ -12,6 +12,7 @@ from tqdm import tqdm
from config_loader import load_config
from embedding import LocalLMEmbeddings
from experts.ingestion_agent import IngestionAgent
from toon_utils import save_entities_from_chunks
CFG = load_config()
DATA_DIR = CFG["ingestion"]["data_dir"]
@@ -206,7 +207,7 @@ def embed_chunks(chunks: List[Any], batch_size: int = EMBEDDING_BATCH_SIZE) -> L
{
"file_path": normalize_path(chunk.metadata.get("full_path", "unknown")),
"file_name": chunk.metadata.get("source", "unknown"),
"chunk_data": content,
"chunk_data": chunk.page_content,
"synopsis": "Embedding failed",
"tags": ["error"],
"entities": [],
@@ -250,7 +251,7 @@ def save_to_db(chunk_dicts):
entry["chunk_data"],
entry["synopsis"],
",".join(entry["tags"]), # Store as comma-separated string
",".join(entry["entities"]), # Store as comma-separated string
",".join(e.get("name", str(e)) if isinstance(e, dict) else str(e) for e in entry["entities"]), # Store as comma-separated string
embedding_str,
entry["timestamp"],
)
@@ -370,6 +371,8 @@ def main():
embedded_chunks = embed_chunks(enriched_chunks)
print(f"Embedded {len(embedded_chunks)} chunks.")
save_entities_from_chunks(embedded_chunks)
# remove existing rows from notes table that match file path
delete_from_db(embedded_chunks)