pull

2026-03-04 09:11:53 +00:00
parent e24c0cdf33
commit bbaebf1f70
2 changed files with 5 additions and 84 deletions
@@ -19,3 +19,7 @@ QA specific embedding models?

 Evaluation metrics, how good is it doing? 
    rate my response!? 
+examples into prompts & better prompts
+
+common model attributes - temp & top-k 
+
@@ -1,83 +0,0 @@
-import turso
-
-from config_loader import load_config
-from embedding import LocalLMEmbeddings
-
-CFG = load_config()
-EMBEDDING_MODEL = CFG["models"]["embedding"]
-API_BASE = CFG["api"]["base_url"]
-EMBEDDING_BATCH_SIZE=CFG["ingestion"]["embedding_batch_size"]
-
-con = turso.connect("dmv.db")
-cur = con.cursor()
-cur.execute("""
-CREATE TABLE IF NOT EXISTS notes (
-    id INTEGER PRIMARY KEY AUTOINCREMENT,
-    file_path TEXT NOT NULL,
-    file_name TEXT NOT NULL,
-    chunk_data TEXT,
-    embedding F32_BLOB(4096),
-    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
-  )""")
-
-cur.execute("CREATE INDEX IF NOT EXISTS idx_embedding ON notes(embedding);")
-# OR, if using libsql vector extension:
-# cur.execute("CREATE INDEX IF NOT EXISTS idx_embedding_vector ON notes(libsql_vector_idx(embedding));")
-
-embeddings_model = LocalLMEmbeddings(
-    model=EMBEDDING_MODEL,
-    base_url=API_BASE,
-    batch_size=EMBEDDING_BATCH_SIZE,
-)
-
-texts_to_embed = [
-"The quick brown fox jumped over the lazy dog",
-"Tiffany is my wife, she writes books and watches films",
-"Mazie and Bella are my labradour dogs that are two and three years old, they are white and have a pink nose",
-"The movie Titanic is about a love story on a big boat. but the boat sinks in the end"
-]
-
-reply = embeddings_model._post_request(texts_to_embed)
-zipped = zip(texts_to_embed,reply)
-
-
-# Instead of looping and executing one INSERT at a time
-# Batch insert using multiple VALUES
-batch_insert_sql = """
-INSERT INTO notes (file_path, file_name, chunk_data, embedding)
-VALUES (?, ?, ?, vector32(?))
-"""
-
-# Prepare batch data
-batch_data = []
-for number, (text, embed) in enumerate(zipped):
-    batch_data.append((
-        f"path/to/file_{number}",
-        f"file_{number}",
-        text,
-        str(embed)  # format as comma-separated string
-    ))
-
-cur.executemany(batch_insert_sql, batch_data)
-con.commit()
-
-query_string = ["tell me about a film on a ship"]
-query_reply = embeddings_model._post_request(query_string)
-
-
-cur.execute(f"""
-SELECT id,
-       file_path,
-       file_name,
-       chunk_data, 
-       vector_distance_cos(embedding, vector32('{query_reply[0]}')) AS distance
-FROM notes
-ORDER BY distance ASC;
-""")
-       # vector_extract(embedding)
-
-print(query_string[0])
-
-rows = cur.fetchall()
-for row in rows:
-    print(row)