feat: ✨ Working PoC of the Dungeon Masters Vault
This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
# class PrecomputedEmbeddings(Embeddings):
|
||||
# def __init__(self, embeddings: List[List[float]]):
|
||||
# self.embeddings = embeddings # Store all precomputed vectors
|
||||
|
||||
# def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
# return self.embeddings # Return the precomputed ones (order must match!)
|
||||
|
||||
# def embed_query(self, text):
|
||||
# return self.embeddings[0]
|
||||
|
||||
# def embedder(texts: List[str]) -> List[List[float]]:
|
||||
# embeddings = []
|
||||
# base_url = "http://192.168.0.49:1234" # ✅ Add 'http://'
|
||||
# embed_url = f"{base_url}/v1/embeddings"
|
||||
# headers = {"Content-Type": "application/json"}
|
||||
|
||||
# for text in texts:
|
||||
# payload = {
|
||||
# "model": "text-embedding-qwen3-embedding-8b",
|
||||
# "input": text
|
||||
# }
|
||||
|
||||
# try:
|
||||
# response = requests.post(embed_url, json=payload, headers=headers) # ✅ POST not GET
|
||||
# if response.status_code == 200:
|
||||
# data = response.json() # ✅ Parse JSON!
|
||||
# embedding = data["data"][0]["embedding"] # ✅ Extract the actual vector
|
||||
# embeddings.append(embedding)
|
||||
# else:
|
||||
# print(f"❌ Embedding failed for '{text[:30]}...': {response.status_code} - {response.text}")
|
||||
# # Optionally: insert placeholder zeros if you need to continue
|
||||
# # embeddings.append([0.0] * 768) # ← adjust dimension as needed!
|
||||
# except Exception as e:
|
||||
# print(f"⚠️ Exception embedding '{text[:30]}...': {e}")
|
||||
# # embeddings.append([0.0] * 768) # fallback
|
||||
|
||||
# return embeddings
|
||||
|
||||
# def store_chunks_with_embeddings_locally(chunks, db_path="./local_faiss_db"):
|
||||
# """
|
||||
# Stores pre-computed chunks and their embeddings into a local FAISS database.
|
||||
|
||||
# Args:
|
||||
# chunks: list of LangChain Document objects (with page_content and metadata)
|
||||
# embeddings: list of embedding vectors (list of lists of floats) — must match length of chunks
|
||||
# db_path: where to save the FAISS index files locally
|
||||
# """
|
||||
|
||||
# texts = [chunk.page_content for chunk in chunks]
|
||||
# embeddings = embedder(texts)
|
||||
# if len(chunks) != len(embeddings):
|
||||
# raise ValueError(f"Mismatch! Got {len(chunks)} chunks but {len(embeddings)} embeddings.")
|
||||
|
||||
# # Create LangChain Document list (we already have this)
|
||||
# documents = chunks # assuming they're already Document objects
|
||||
|
||||
# # Build FAISS vectorstore using precomputed embeddings
|
||||
# # FAISS.from_embeddings() lets us pass our own embeddings + texts
|
||||
# vectorstore = FAISS.from_embeddings(
|
||||
# text_embeddings=list(zip([doc.page_content for doc in documents], embeddings)),
|
||||
# embedding=PrecomputedEmbeddings(embeddings[0]) # We’ll define this next
|
||||
# )
|
||||
|
||||
# # Save to disk
|
||||
# vectorstore.save_local(db_path)
|
||||
# print(f"✅ Successfully stored {len(chunks)} chunks + embeddings into local FAISS DB at '{db_path}'")
|
||||
Reference in New Issue
Block a user