66 lines
2.9 KiB
Python
66 lines
2.9 KiB
Python
# class PrecomputedEmbeddings(Embeddings):
|
||
# def __init__(self, embeddings: List[List[float]]):
|
||
# self.embeddings = embeddings # Store all precomputed vectors
|
||
|
||
# def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||
# return self.embeddings # Return the precomputed ones (order must match!)
|
||
|
||
# def embed_query(self, text):
|
||
# return self.embeddings[0]
|
||
|
||
# def embedder(texts: List[str]) -> List[List[float]]:
|
||
# embeddings = []
|
||
# base_url = "http://192.168.0.49:1234" # ✅ Add 'http://'
|
||
# embed_url = f"{base_url}/v1/embeddings"
|
||
# headers = {"Content-Type": "application/json"}
|
||
|
||
# for text in texts:
|
||
# payload = {
|
||
# "model": "text-embedding-qwen3-embedding-8b",
|
||
# "input": text
|
||
# }
|
||
|
||
# try:
|
||
# response = requests.post(embed_url, json=payload, headers=headers) # ✅ POST not GET
|
||
# if response.status_code == 200:
|
||
# data = response.json() # ✅ Parse JSON!
|
||
# embedding = data["data"][0]["embedding"] # ✅ Extract the actual vector
|
||
# embeddings.append(embedding)
|
||
# else:
|
||
# print(f"❌ Embedding failed for '{text[:30]}...': {response.status_code} - {response.text}")
|
||
# # Optionally: insert placeholder zeros if you need to continue
|
||
# # embeddings.append([0.0] * 768) # ← adjust dimension as needed!
|
||
# except Exception as e:
|
||
# print(f"⚠️ Exception embedding '{text[:30]}...': {e}")
|
||
# # embeddings.append([0.0] * 768) # fallback
|
||
|
||
# return embeddings
|
||
|
||
# def store_chunks_with_embeddings_locally(chunks, db_path="./local_faiss_db"):
|
||
# """
|
||
# Stores pre-computed chunks and their embeddings into a local FAISS database.
|
||
|
||
# Args:
|
||
# chunks: list of LangChain Document objects (with page_content and metadata)
|
||
# embeddings: list of embedding vectors (list of lists of floats) — must match length of chunks
|
||
# db_path: where to save the FAISS index files locally
|
||
# """
|
||
|
||
# texts = [chunk.page_content for chunk in chunks]
|
||
# embeddings = embedder(texts)
|
||
# if len(chunks) != len(embeddings):
|
||
# raise ValueError(f"Mismatch! Got {len(chunks)} chunks but {len(embeddings)} embeddings.")
|
||
|
||
# # Create LangChain Document list (we already have this)
|
||
# documents = chunks # assuming they're already Document objects
|
||
|
||
# # Build FAISS vectorstore using precomputed embeddings
|
||
# # FAISS.from_embeddings() lets us pass our own embeddings + texts
|
||
# vectorstore = FAISS.from_embeddings(
|
||
# text_embeddings=list(zip([doc.page_content for doc in documents], embeddings)),
|
||
# embedding=PrecomputedEmbeddings(embeddings[0]) # We’ll define this next
|
||
# )
|
||
|
||
# # Save to disk
|
||
# vectorstore.save_local(db_path)
|
||
# print(f"✅ Successfully stored {len(chunks)} chunks + embeddings into local FAISS DB at '{db_path}'") |