import numpy as np
from openai import OpenAI
client = OpenAI(
api_key="sk-你的YeLIn AI密钥",
base_url="https://api.yelinai.com/v1"
)
# 文档库
documents = [
"Python 是一种解释型编程语言",
"JavaScript 常用于网页开发",
"机器学习是人工智能的一个分支",
"数据库用于存储和管理数据",
"深度学习使用多层神经网络"
]
def get_embeddings(texts):
"""批量获取向量"""
response = client.embeddings.create(
model="text-embedding-3-small",
input=texts
)
return [item.embedding for item in response.data]
def semantic_search(query, docs, doc_embeddings, top_k=3):
"""语义搜索"""
query_embedding = get_embeddings([query])[0]
# 计算相似度
similarities = []
for i, doc_emb in enumerate(doc_embeddings):
sim = np.dot(query_embedding, doc_emb) / (
np.linalg.norm(query_embedding) * np.linalg.norm(doc_emb)
)
similarities.append((i, sim))
# 排序返回最相似的文档
similarities.sort(key=lambda x: x[1], reverse=True)
return [(docs[i], sim) for i, sim in similarities[:top_k]]
# 预计算文档向量
doc_embeddings = get_embeddings(documents)
# 搜索
query = "什么是AI?"
results = semantic_search(query, documents, doc_embeddings)
print(f"查询: {query}")
print("搜索结果:")
for doc, score in results:
print(f" [{score:.4f}] {doc}")