-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathqdrant_url.py
More file actions
executable file
·57 lines (46 loc) · 1.45 KB
/
qdrant_url.py
File metadata and controls
executable file
·57 lines (46 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
LOCAL_DB = "/root/data/qdrant_db" # 你的本地 qdrant_db 路径
REMOTE_URL = "http://127.0.0.1:6333" # 你的服务端 URL
COL = "hpqa_corpus"
BATCH = 2000
src = QdrantClient(path=LOCAL_DB)
dst = QdrantClient(url=REMOTE_URL)
if not src.collection_exists(COL):
raise RuntimeError(f"source collection not found: {COL}")
# 从源集合读取向量维度
src_info = src.get_collection(COL)
vsize = src_info.config.params.vectors.size
# 目标端重建同名集合(如不想覆盖,把 recreate_collection 改为 create_collection + 判断)
dst.recreate_collection(
collection_name=COL,
vectors_config=VectorParams(size=vsize, distance=Distance.COSINE),
)
# 可选:给 title 建索引
try:
dst.create_payload_index(COL, "title", field_schema="keyword")
except Exception:
pass
offset = None
total = 0
while True:
points, offset = src.scroll(
collection_name=COL,
limit=BATCH,
with_payload=True,
with_vectors=True,
offset=offset,
)
if not points:
break
to_upsert = [
PointStruct(id=p.id, vector=p.vector, payload=p.payload)
for p in points
]
dst.upsert(collection_name=COL, points=to_upsert, wait=True)
total += len(to_upsert)
print(f"migrated: {total}")
if offset is None:
break
print("done, total:", total)
PY