From fb70e3d1ec711c754e9d23aeff736aef9ddb7f95 Mon Sep 17 00:00:00 2001 From: Shuyou Date: Thu, 28 Nov 2024 18:09:17 +0800 Subject: [PATCH] update docs --- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 14 +- .../manage-indexes/index-with-gpu.json | 2 +- .../manage-indexes/index-with-gpu.md | 6 +- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 4 +- .../manage-indexes/index-with-gpu.md | 2 +- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 4 +- .../manage-indexes/index-with-gpu.json | 2 +- .../manage-indexes/index-with-gpu.md | 6 +- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 10 +- .../manage-indexes/index-with-gpu.json | 2 +- .../manage-indexes/index-with-gpu.md | 6 +- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 6 +- .../manage-indexes/index-with-gpu.json | 2 +- .../manage-indexes/index-with-gpu.md | 12 +- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 10 +- .../manage-indexes/index-with-gpu.json | 2 +- .../manage-indexes/index-with-gpu.md | 14 +- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 8 +- .../manage-indexes/index-with-gpu.json | 2 +- .../manage-indexes/index-with-gpu.md | 18 +- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 8 +- .../manage-indexes/index-with-gpu.json | 2 +- .../manage-indexes/index-with-gpu.md | 2 +- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 10 +- .../manage-indexes/index-with-gpu.json | 2 +- .../manage-indexes/index-with-gpu.md | 14 +- .../site/de/adminGuide/upgrade-pulsar-v3.json | 2 +- .../site/de/adminGuide/upgrade-pulsar-v3.md | 46 +-- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 12 +- localization/v2.5.x/site/de/home/home.md | 2 +- .../v2.5.x/site/de/menuStructure/de.json | 134 ++++---- localization/v2.5.x/site/de/release_notes.md | 4 +- .../de/tutorials/hybrid_search_with_milvus.md | 6 +- .../collections/manage-collections.md | 6 +- .../manage-indexes/index-with-gpu.md | 6 +- .../schema/analyzer/analyzer-overview.json | 2 +- .../schema/analyzer/analyzer-overview.md | 8 +- .../de/userGuide/schema/sparse_vector.json | 2 +- .../site/de/userGuide/schema/sparse_vector.md | 61 +++- .../de/userGuide/search-query-get/boolean.md | 10 +- .../search-query-get/full-text-search.json | 2 +- .../search-query-get/full-text-search.md | 299 ++++++++++++++++- .../search-query-get/keyword-match.json | 2 +- .../search-query-get/keyword-match.md | 297 +++++++++++++++-- .../search-query-get/multi-vector-search.md | 8 +- .../search-query-get/single-vector-search.md | 14 +- .../site/es/adminGuide/upgrade-pulsar-v3.json | 2 +- .../site/es/adminGuide/upgrade-pulsar-v3.md | 48 +-- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 6 +- localization/v2.5.x/site/es/home/home.md | 2 +- .../v2.5.x/site/es/menuStructure/es.json | 138 ++++---- localization/v2.5.x/site/es/release_notes.md | 2 +- .../es/tutorials/hybrid_search_with_milvus.md | 2 +- .../collections/manage-collections.md | 2 +- .../manage-indexes/index-with-gpu.md | 4 +- .../schema/analyzer/analyzer-overview.md | 6 +- .../es/userGuide/schema/sparse_vector.json | 2 +- .../site/es/userGuide/schema/sparse_vector.md | 59 +++- .../es/userGuide/search-query-get/boolean.md | 10 +- .../search-query-get/full-text-search.json | 2 +- .../search-query-get/full-text-search.md | 299 ++++++++++++++++- .../search-query-get/keyword-match.json | 2 +- .../search-query-get/keyword-match.md | 287 ++++++++++++++-- .../search-query-get/multi-vector-search.md | 8 +- .../search-query-get/single-vector-search.md | 12 +- .../site/fr/adminGuide/upgrade-pulsar-v3.json | 2 +- .../site/fr/adminGuide/upgrade-pulsar-v3.md | 50 +-- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 4 +- localization/v2.5.x/site/fr/home/home.md | 2 +- .../v2.5.x/site/fr/menuStructure/fr.json | 136 ++++---- localization/v2.5.x/site/fr/release_notes.md | 4 +- .../fr/tutorials/hybrid_search_with_milvus.md | 4 +- .../collections/manage-collections.md | 2 +- .../manage-indexes/index-with-gpu.md | 4 +- .../schema/analyzer/analyzer-overview.md | 8 +- .../fr/userGuide/schema/sparse_vector.json | 2 +- .../site/fr/userGuide/schema/sparse_vector.md | 57 ++++ .../fr/userGuide/search-query-get/boolean.md | 10 +- .../search-query-get/full-text-search.json | 2 +- .../search-query-get/full-text-search.md | 293 ++++++++++++++++- .../search-query-get/keyword-match.json | 2 +- .../search-query-get/keyword-match.md | 285 ++++++++++++++-- .../search-query-get/multi-vector-search.md | 8 +- .../search-query-get/single-vector-search.md | 10 +- .../site/it/adminGuide/upgrade-pulsar-v3.json | 2 +- .../site/it/adminGuide/upgrade-pulsar-v3.md | 46 +-- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 4 +- localization/v2.5.x/site/it/home/home.md | 2 +- .../v2.5.x/site/it/menuStructure/it.json | 134 ++++---- localization/v2.5.x/site/it/release_notes.md | 2 +- .../it/tutorials/hybrid_search_with_milvus.md | 10 +- .../collections/manage-collections.md | 6 +- .../manage-indexes/index-with-gpu.md | 8 +- .../schema/analyzer/analyzer-overview.md | 6 +- .../it/userGuide/schema/sparse_vector.json | 2 +- .../site/it/userGuide/schema/sparse_vector.md | 57 ++++ .../it/userGuide/search-query-get/boolean.md | 14 +- .../search-query-get/full-text-search.json | 2 +- .../search-query-get/full-text-search.md | 299 ++++++++++++++++- .../search-query-get/keyword-match.json | 2 +- .../search-query-get/keyword-match.md | 294 +++++++++++++++-- .../search-query-get/multi-vector-search.md | 8 +- .../search-query-get/single-vector-search.md | 12 +- .../site/ja/adminGuide/upgrade-pulsar-v3.json | 2 +- .../site/ja/adminGuide/upgrade-pulsar-v3.md | 52 +-- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 4 +- localization/v2.5.x/site/ja/home/home.md | 4 +- .../v2.5.x/site/ja/menuStructure/ja.json | 136 ++++---- localization/v2.5.x/site/ja/release_notes.md | 6 +- .../ja/tutorials/hybrid_search_with_milvus.md | 2 +- .../collections/manage-collections.md | 4 +- .../manage-indexes/index-with-gpu.md | 6 +- .../schema/analyzer/analyzer-overview.md | 12 +- .../ja/userGuide/schema/sparse_vector.json | 2 +- .../site/ja/userGuide/schema/sparse_vector.md | 63 +++- .../ja/userGuide/search-query-get/boolean.md | 10 +- .../search-query-get/full-text-search.json | 2 +- .../search-query-get/full-text-search.md | 307 +++++++++++++++++- .../search-query-get/keyword-match.json | 2 +- .../search-query-get/keyword-match.md | 291 +++++++++++++++-- .../search-query-get/multi-vector-search.md | 10 +- .../search-query-get/single-vector-search.md | 10 +- .../site/ko/adminGuide/upgrade-pulsar-v3.json | 2 +- .../site/ko/adminGuide/upgrade-pulsar-v3.md | 46 +-- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 10 +- localization/v2.5.x/site/ko/home/home.md | 6 +- .../v2.5.x/site/ko/menuStructure/ko.json | 134 ++++---- localization/v2.5.x/site/ko/release_notes.md | 10 +- .../ko/tutorials/hybrid_search_with_milvus.md | 4 +- .../collections/manage-collections.md | 6 +- .../manage-indexes/index-with-gpu.md | 2 +- .../schema/analyzer/analyzer-overview.md | 6 +- .../ko/userGuide/schema/sparse_vector.json | 2 +- .../site/ko/userGuide/schema/sparse_vector.md | 61 +++- .../ko/userGuide/search-query-get/boolean.md | 10 +- .../search-query-get/full-text-search.json | 2 +- .../search-query-get/full-text-search.md | 299 ++++++++++++++++- .../search-query-get/keyword-match.json | 2 +- .../search-query-get/keyword-match.md | 287 ++++++++++++++-- .../search-query-get/multi-vector-search.md | 6 +- .../search-query-get/single-vector-search.md | 10 +- .../site/pt/adminGuide/upgrade-pulsar-v3.json | 2 +- .../site/pt/adminGuide/upgrade-pulsar-v3.md | 54 +-- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 12 +- localization/v2.5.x/site/pt/home/home.md | 4 +- .../v2.5.x/site/pt/menuStructure/pt.json | 134 ++++---- localization/v2.5.x/site/pt/release_notes.md | 10 +- .../pt/tutorials/hybrid_search_with_milvus.md | 4 +- .../collections/manage-collections.md | 14 +- .../manage-indexes/index-with-gpu.md | 6 +- .../schema/analyzer/analyzer-overview.md | 10 +- .../pt/userGuide/schema/sparse_vector.json | 2 +- .../site/pt/userGuide/schema/sparse_vector.md | 67 +++- .../pt/userGuide/search-query-get/boolean.md | 10 +- .../search-query-get/full-text-search.json | 2 +- .../search-query-get/full-text-search.md | 299 ++++++++++++++++- .../search-query-get/keyword-match.json | 2 +- .../search-query-get/keyword-match.md | 285 ++++++++++++++-- .../search-query-get/multi-vector-search.md | 10 +- .../search-query-get/single-vector-search.md | 24 +- .../site/zh/adminGuide/upgrade-pulsar-v3.json | 2 +- .../site/zh/adminGuide/upgrade-pulsar-v3.md | 52 +-- .../run-milvus-k8s/install_cluster-helm.json | 2 +- .../run-milvus-k8s/install_cluster-helm.md | 12 +- localization/v2.5.x/site/zh/home/home.md | 2 +- .../v2.5.x/site/zh/menuStructure/zh.json | 136 ++++---- localization/v2.5.x/site/zh/release_notes.md | 16 +- .../zh/tutorials/hybrid_search_with_milvus.md | 8 +- .../collections/manage-collections.md | 8 +- .../manage-indexes/index-with-gpu.md | 8 +- .../schema/analyzer/analyzer-overview.md | 12 +- .../zh/userGuide/schema/sparse_vector.json | 2 +- .../site/zh/userGuide/schema/sparse_vector.md | 63 +++- .../zh/userGuide/search-query-get/boolean.md | 10 +- .../search-query-get/full-text-search.json | 2 +- .../search-query-get/full-text-search.md | 297 ++++++++++++++++- .../search-query-get/keyword-match.json | 2 +- .../search-query-get/keyword-match.md | 283 ++++++++++++++-- .../search-query-get/multi-vector-search.md | 4 +- .../search-query-get/single-vector-search.md | 18 +- tools/cache.json | 36 +- tools/generate-en.js | 178 +++++----- tools/translate.js | 2 +- 199 files changed, 6190 insertions(+), 1559 deletions(-) diff --git a/localization/v2.4.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.4.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.json index cc7954da6..836c8dc88 100644 --- a/localization/v2.4.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.4.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Milvus in Kubernetes mit Helm starten","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Übersicht","href":"Overview","type":2,"isActive":false},{"label":"Voraussetzungen","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus Helm Chart installieren","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Online-Installation","href":"Online-install","type":2,"isActive":false},{"label":"Offline-Installation","href":"Offline-install","type":2,"isActive":false},{"label":"Upgrade des laufenden Milvus-Clusters","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvus deinstallieren","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Wie geht es weiter?","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Milvus in Kubernetes mit Helm starten","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Übersicht","href":"Overview","type":2,"isActive":false},{"label":"Voraussetzungen","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus Helm Chart installieren","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Online-Installation","href":"Online-install","type":2,"isActive":false},{"label":"Offline-Installation","href":"Offline-install","type":2,"isActive":false},{"label":"Upgrade des laufenden Milvus-Clusters","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvus deinstallieren","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Wie geht es weiter?","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.4.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.md index 90025e850..81deeb8dd 100644 --- a/localization/v2.4.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.4.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -65,7 +65,7 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI
  • Es wird empfohlen, vor der Installation von Milvus das Milvus Sizing Tool zu verwenden, um die Hardware-Anforderungen auf der Grundlage Ihrer Datengröße abzuschätzen. Dies hilft, eine optimale Leistung und Ressourcenzuweisung für Ihre Milvus-Installation zu gewährleisten.

  • -

    Sollten Sie beim Ziehen des Images auf Probleme stoßen, kontaktieren Sie uns unter community@zilliz.com und schildern Sie uns das Problem.

    +

    Sollten Sie beim Ziehen des Images auf Probleme stoßen, wenden Sie sich bitte an community@zilliz.com und schildern Sie das Problem, damit wir Ihnen die notwendige Unterstützung bieten können.

    Milvus Helm Chart installieren

    Bevor Sie Milvus Helm Charts installieren, müssen Sie das Milvus Helm Repository hinzufügen.

    -
    $ helm repo add milvus https://github.com/zilliztech/milvus-helm
    +
    $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
     

    Das Milvus Helm Charts-Repository unter https://github.com/milvus-io/milvus-helm wurde archiviert und Sie können weitere Aktualisierungen wie folgt von https://github.com/zilliztech/milvus-helm erhalten:

    -
    helm repo add zilliztech https://github.com/zilliztech/milvus-helm
    +
    helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
     helm repo update
     # upgrade existing helm release
     helm upgrade my-release zilliztech/milvus
     

    Das archivierte Repository ist weiterhin für die Diagramme bis 4.0.31 verfügbar. Für spätere Versionen verwenden Sie stattdessen das neue Repo.

    -

    Holen Sie sich dann die Milvus-Diagramme wie folgt aus dem Repository:

    +

    Holen Sie sich dann die Milvus-Charts wie folgt aus dem Repository:

    $ helm repo update
     

    Sie können diesen Befehl jederzeit ausführen, um die neuesten Milvus Helm-Diagramme abzurufen.

    @@ -120,9 +120,9 @@ helm upgrade my-release zilliztech/milvus

    Mit dem obigen Befehl wird ein Milvus-Cluster mit seinen Komponenten und Abhängigkeiten unter Verwendung von Standardkonfigurationen bereitgestellt. Um diese Einstellungen anzupassen, empfehlen wir Ihnen, das Milvus Sizing Tool zu verwenden, um die Konfigurationen basierend auf Ihrer tatsächlichen Datengröße anzupassen und dann die entsprechende YAML-Datei herunterzuladen. Weitere Informationen zu den Konfigurationsparametern finden Sie in der Milvus System Configurations Checklist.

      -
    • Der Versionsname sollte nur Buchstaben, Zahlen und Bindestriche enthalten. Punkte sind im Versionsnamen nicht erlaubt.
    • +
    • Der Release-Name sollte nur Buchstaben, Zahlen und Bindestriche enthalten. Punkte sind im Versionsnamen nicht erlaubt.
    • Die Standard-Befehlszeile installiert die Cluster-Version von Milvus bei der Installation von Milvus mit Helm. Bei der Installation von Milvus als Einzelplatzversion sind weitere Einstellungen erforderlich.
    • -
    • Gemäß dem veralteten API-Migrationsleitfaden von Kubernetes wird die API-Version policy/v1beta1 von PodDisruptionBudget ab v1.25 nicht mehr unterstützt. Es wird empfohlen, Manifeste und API-Clients zu migrieren, um stattdessen die policy/v1-API-Version zu verwenden.
      Als Workaround für Benutzer, die noch die API-Version policy/v1beta1 von PodDisruptionBudget auf Kubernetes v1.25 und höher verwenden, können Sie stattdessen den folgenden Befehl ausführen, um Milvus zu installieren:
      helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
    • +
    • Gemäß dem veralteten API-Migrationsleitfaden von Kubernetes wird die API-Version policy/v1beta1 von PodDisruptionBudget ab v1.25 nicht mehr unterstützt. Es wird empfohlen, Manifeste und API-Clients zu migrieren, um stattdessen die policy/v1-API-Version zu verwenden.
      Als Workaround für Benutzer, die noch die API-Version policy/v1beta1 von PodDisruptionBudget auf Kubernetes v1.25 und später verwenden, können Sie stattdessen den folgenden Befehl ausführen, um Milvus zu installieren:
      helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
    • Siehe Milvus Helm Chart und Helm für weitere Informationen.
    @@ -204,7 +204,7 @@ $ wget https://raw.git $ python3 save_image.py --manifest milvus_manifest.yaml

    Die Bilder werden in einem Unterordner namens images im aktuellen Verzeichnis gespeichert.

    -

    4. Bilder laden

    Sie können nun die Bilder wie folgt auf die Hosts in der netzbeschränkten Umgebung laden:

    +

    4. Bilder laden

    Sie können die Bilder nun wie folgt auf die Hosts in der netzbeschränkten Umgebung laden:

    $ for image in $(find . -type f -name "*.tar.gz") ; do gunzip -c $image | docker load; done
     

    5. Milvus bereitstellen

    $ kubectl apply -f milvus_manifest.yaml
    diff --git a/localization/v2.4.x/site/de/userGuide/manage-indexes/index-with-gpu.json b/localization/v2.4.x/site/de/userGuide/manage-indexes/index-with-gpu.json
    index ced761bdc..76cfb3a5b 100644
    --- a/localization/v2.4.x/site/de/userGuide/manage-indexes/index-with-gpu.json
    +++ b/localization/v2.4.x/site/de/userGuide/manage-indexes/index-with-gpu.json
    @@ -1 +1 @@
    -{"codeList":["gpu:\n  initMemSize: 0 #set the initial memory pool size.\n  maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n  initMemSize: 2048 #set the initial memory pool size.\n  maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_CAGRA\",\n    \"params\": {\n        'intermediate_graph_degree': 64,\n        'graph_degree': 32\n    }\n}\n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n    \"params\": {\n        \"nlist\": 1024\n    }\n}\n","index_params = {\n    'index_type': 'GPU_BRUTE_FORCE',\n    'metric_type': 'L2',\n    'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n    field_name=\"vector\", # Name of the vector field on which an index is built\n    index_params=index_params\n)\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {}\n}\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {\n        \"itopk_size\": 128,\n        \"search_width\": 4,\n        \"min_iterations\": 0,\n        \"max_iterations\": 0,\n        \"team_size\": 0\n    }\n}\n","search_params = {\n    \"metric_type\": \"L2\", \n    \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n    data=[[query_vector]], # Your query vector\n    anns_field=\"vector\", # Name of the vector field\n    param=search_params,\n    limit=100 # Number of the results to return\n)\n"],"headingContent":"","anchorList":[{"label":"Index mit GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Konfigurieren Sie die Milvus-Einstellungen für die GPU-Speichersteuerung","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Einen Index erstellen","href":"Build-an-index","type":2,"isActive":false},{"label":"Suche","href":"Search","type":2,"isActive":false},{"label":"Grenzen","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]}
    \ No newline at end of file
    +{"codeList":["gpu:\n  initMemSize: 0 #set the initial memory pool size.\n  maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n  initMemSize: 2048 #set the initial memory pool size.\n  maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_CAGRA\",\n    \"params\": {\n        'intermediate_graph_degree': 64,\n        'graph_degree': 32\n    }\n}\n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n    \"params\": {\n        \"nlist\": 1024\n    }\n}\n","index_params = {\n    'index_type': 'GPU_BRUTE_FORCE',\n    'metric_type': 'L2',\n    'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n    field_name=\"vector\", # Name of the vector field on which an index is built\n    index_params=index_params\n)\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {}\n}\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {\n        \"itopk_size\": 128,\n        \"search_width\": 4,\n        \"min_iterations\": 0,\n        \"max_iterations\": 0,\n        \"team_size\": 0\n    }\n}\n","search_params = {\n    \"metric_type\": \"L2\", \n    \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n    data=[[query_vector]], # Your query vector\n    anns_field=\"vector\", # Name of the vector field\n    param=search_params,\n    limit=100 # Number of the results to return\n)\n"],"headingContent":"Index with GPU","anchorList":[{"label":"Index mit GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Konfigurieren Sie die Milvus-Einstellungen für die GPU-Speichersteuerung","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Einen Index erstellen","href":"Build-an-index","type":2,"isActive":false},{"label":"Suche","href":"Search","type":2,"isActive":false},{"label":"Grenzen","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]}
    \ No newline at end of file
    diff --git a/localization/v2.4.x/site/de/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/de/userGuide/manage-indexes/index-with-gpu.md
    index 193aa900f..b837eeace 100644
    --- a/localization/v2.4.x/site/de/userGuide/manage-indexes/index-with-gpu.md
    +++ b/localization/v2.4.x/site/de/userGuide/manage-indexes/index-with-gpu.md
    @@ -203,7 +203,7 @@ collection.search(
           
         

    Bei der Verwendung von GPU-Indizes müssen bestimmte Einschränkungen beachtet werden:

      -
    • Für GPU_IVF_FLAT beträgt der Höchstwert für Limit 256.

    • +
    • Für GPU_IVF_FLAT beträgt der Höchstwert für limit 1024.

    • Für GPU_IVF_PQ und GPU_CAGRA ist der Höchstwert für limit 1024.

    • Für GPU_BRUTE_FORCE gibt es zwar keinen festen Grenzwert, es wird jedoch empfohlen, 4096 nicht zu überschreiten, um mögliche Leistungsprobleme zu vermeiden.

    • Derzeit unterstützen GPU-Indizes keinen COSINE-Abstand. Wenn der COSINE-Abstand erforderlich ist, sollten die Daten zuerst normalisiert werden, und dann kann der Innenproduktabstand (IP) als Ersatz verwendet werden.

    • @@ -227,7 +227,7 @@ collection.search(
      • Wann ist es sinnvoll, einen GPU-Index zu verwenden?

        -

        Ein GPU-Index ist besonders vorteilhaft in Situationen, die einen hohen Durchsatz oder eine hohe Wiederauffindung erfordern. Bei großen Stapeln kann der Durchsatz der GPU-Indizierung beispielsweise den der CPU-Indizierung um das 100-fache übertreffen. In Szenarien mit kleineren Stapeln übertrifft die GPU-Indizierung die CPU-Indizierung in Bezug auf die Leistung immer noch deutlich. Darüber hinaus kann der Einsatz eines Grafikprozessors den Prozess der Indexerstellung erheblich beschleunigen, wenn Daten schnell eingefügt werden müssen.

      • +

        Ein GPU-Index ist besonders vorteilhaft in Situationen, die einen hohen Durchsatz oder eine hohe Wiederauffindung erfordern. Bei großen Stapeln kann der Durchsatz der GPU-Indizierung beispielsweise den der CPU-Indizierung um das 100-fache übertreffen. In Szenarien mit kleineren Stapeln übertrifft die GPU-Indizierung die CPU-Indizierung in Bezug auf die Leistung immer noch deutlich. Darüber hinaus kann die Einbindung einer GPU den Prozess der Indexerstellung erheblich beschleunigen, wenn Daten schnell eingefügt werden müssen.

      • Für welche Szenarien sind GPU-Indizes wie CAGRA, GPU_IVF_PQ, GPU_IVF_FLAT und GPU_BRUTE_FORCE am besten geeignet?

        -

        CAGRA-Indizes sind ideal für Szenarien, die eine höhere Leistung erfordern, auch wenn dies mit einem höheren Speicherbedarf einhergeht. In Umgebungen, in denen Speicherplatzeinsparungen Priorität haben, kann der GPU_IVF_PQ-Index dazu beitragen, die Speicheranforderungen zu minimieren, auch wenn dies mit einem höheren Präzisionsverlust einhergeht. Der GPU_IVF_FLAT-Index stellt eine ausgewogene Option dar, die einen Kompromiss zwischen Leistung und Speicherbedarf bietet. Der GPU_BRUTE_FORCE-Index schließlich ist für erschöpfende Suchvorgänge konzipiert und garantiert durch die Durchführung von Traversalsuchen eine Abrufrate von 1.

      • +

        CAGRA-Indizes sind ideal für Szenarien, die eine höhere Leistung erfordern, auch wenn dies mit einem höheren Speicherbedarf einhergeht. In Umgebungen, in denen Speicherplatzeinsparungen Priorität haben, kann der GPU_IVF_PQ-Index dazu beitragen, die Speicheranforderungen zu minimieren, auch wenn dies mit einem höheren Verlust an Präzision einhergeht. Der GPU_IVF_FLAT-Index stellt eine ausgewogene Option dar, die einen Kompromiss zwischen Leistung und Speicherbedarf bietet. Der GPU_BRUTE_FORCE-Index schließlich ist für erschöpfende Suchvorgänge konzipiert und garantiert durch die Durchführung von Traversalsuchen eine Abrufrate von 1.

      diff --git a/localization/v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.json index bcccdcf09..ad4edaa0b 100644 --- a/localization/v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Run Milvus in Kubernetes with Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Overview","href":"Overview","type":2,"isActive":false},{"label":"Prerequisites","href":"Prerequisites","type":2,"isActive":false},{"label":"Install Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Online install","href":"Online-install","type":2,"isActive":false},{"label":"Offline install","href":"Offline-install","type":2,"isActive":false},{"label":"Upgrade running Milvus cluster","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Uninstall Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"What's next","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Run Milvus in Kubernetes with Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Overview","href":"Overview","type":2,"isActive":false},{"label":"Prerequisites","href":"Prerequisites","type":2,"isActive":false},{"label":"Install Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Online install","href":"Online-install","type":2,"isActive":false},{"label":"Offline install","href":"Offline-install","type":2,"isActive":false},{"label":"Upgrade running Milvus cluster","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Uninstall Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"What's next","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.md index 5d222b9e7..b8072cee9 100644 --- a/localization/v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

      Before installing Milvus Helm Charts, you need to add Milvus Helm repository.

      -
      $ helm repo add milvus https://github.com/zilliztech/milvus-helm
      +
      $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
       

      The Milvus Helm Charts repo at https://github.com/milvus-io/milvus-helm has been archived and you can get further updates from https://github.com/zilliztech/milvus-helm as follows:

      -
      helm repo add zilliztech https://github.com/zilliztech/milvus-helm
      +
      helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
       helm repo update
       # upgrade existing helm release
       helm upgrade my-release zilliztech/milvus
      diff --git a/localization/v2.4.x/site/en/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/en/userGuide/manage-indexes/index-with-gpu.md
      index 8be205920..e7cc992a1 100644
      --- a/localization/v2.4.x/site/en/userGuide/manage-indexes/index-with-gpu.md
      +++ b/localization/v2.4.x/site/en/userGuide/manage-indexes/index-with-gpu.md
      @@ -203,7 +203,7 @@ collection.search(
             
           

      When using GPU indexes, be aware of certain constraints:

        -
      • For GPU_IVF_FLAT, the maximum value for limit is 256.

      • +
      • For GPU_IVF_FLAT, the maximum value for limit is 1024.

      • For GPU_IVF_PQ and GPU_CAGRA, the maximum value for limit is 1024.

      • While there is no set limit for limit on GPU_BRUTE_FORCE, it is recommended not to exceed 4096 to avoid potential performance issues.

      • Currently, GPU indexes do not support COSINE distance. If COSINE distance is required, data should be normalized first, and then inner product (IP) distance can be used as a substitute.

      • diff --git a/localization/v2.4.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.4.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.json index ac7d6087d..d248b83ab 100644 --- a/localization/v2.4.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.4.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Ejecutar Milvus en Kubernetes con Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Visión general","href":"Overview","type":2,"isActive":false},{"label":"Requisitos previos","href":"Prerequisites","type":2,"isActive":false},{"label":"Instalar Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Instalación en línea","href":"Online-install","type":2,"isActive":false},{"label":"Instalación fuera de línea","href":"Offline-install","type":2,"isActive":false},{"label":"Actualice el cluster Milvus en ejecución","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Desinstalar Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Lo que sigue","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Ejecutar Milvus en Kubernetes con Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Visión general","href":"Overview","type":2,"isActive":false},{"label":"Requisitos previos","href":"Prerequisites","type":2,"isActive":false},{"label":"Instalar Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Instalación en línea","href":"Online-install","type":2,"isActive":false},{"label":"Instalación fuera de línea","href":"Offline-install","type":2,"isActive":false},{"label":"Actualice el cluster Milvus en ejecución","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Desinstalar Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Lo que sigue","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.4.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.md index 45a676d9b..aa4ade3b9 100644 --- a/localization/v2.4.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.4.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

        Antes de instalar Milvus Helm Charts, necesita añadir el repositorio Milvus Helm.

        -
        $ helm repo add milvus https://github.com/zilliztech/milvus-helm
        +
        $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
         

        El repositorio de Milvus Helm Charts en https://github.com/milvus-io/milvus-helm ha sido archivado y puede obtener más actualizaciones en https://github.com/zilliztech/milvus-helm como se indica a continuación:

        -
        helm repo add zilliztech https://github.com/zilliztech/milvus-helm
        +
        helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
         helm repo update
         # upgrade existing helm release
         helm upgrade my-release zilliztech/milvus
        diff --git a/localization/v2.4.x/site/es/userGuide/manage-indexes/index-with-gpu.json b/localization/v2.4.x/site/es/userGuide/manage-indexes/index-with-gpu.json
        index 60d0ce9c4..09fb20f93 100644
        --- a/localization/v2.4.x/site/es/userGuide/manage-indexes/index-with-gpu.json
        +++ b/localization/v2.4.x/site/es/userGuide/manage-indexes/index-with-gpu.json
        @@ -1 +1 @@
        -{"codeList":["gpu:\n  initMemSize: 0 #set the initial memory pool size.\n  maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n  initMemSize: 2048 #set the initial memory pool size.\n  maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_CAGRA\",\n    \"params\": {\n        'intermediate_graph_degree': 64,\n        'graph_degree': 32\n    }\n}\n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n    \"params\": {\n        \"nlist\": 1024\n    }\n}\n","index_params = {\n    'index_type': 'GPU_BRUTE_FORCE',\n    'metric_type': 'L2',\n    'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n    field_name=\"vector\", # Name of the vector field on which an index is built\n    index_params=index_params\n)\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {}\n}\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {\n        \"itopk_size\": 128,\n        \"search_width\": 4,\n        \"min_iterations\": 0,\n        \"max_iterations\": 0,\n        \"team_size\": 0\n    }\n}\n","search_params = {\n    \"metric_type\": \"L2\", \n    \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n    data=[[query_vector]], # Your query vector\n    anns_field=\"vector\", # Name of the vector field\n    param=search_params,\n    limit=100 # Number of the results to return\n)\n"],"headingContent":"","anchorList":[{"label":"Índice con GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Configurar los ajustes de Milvus para el control de la memoria de la GPU","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Construir un índice","href":"Build-an-index","type":2,"isActive":false},{"label":"Buscar en","href":"Search","type":2,"isActive":false},{"label":"Límites","href":"Limits","type":2,"isActive":false},{"label":"PREGUNTAS FRECUENTES","href":"FAQ","type":2,"isActive":false}]}
        \ No newline at end of file
        +{"codeList":["gpu:\n  initMemSize: 0 #set the initial memory pool size.\n  maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n  initMemSize: 2048 #set the initial memory pool size.\n  maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_CAGRA\",\n    \"params\": {\n        'intermediate_graph_degree': 64,\n        'graph_degree': 32\n    }\n}\n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n    \"params\": {\n        \"nlist\": 1024\n    }\n}\n","index_params = {\n    'index_type': 'GPU_BRUTE_FORCE',\n    'metric_type': 'L2',\n    'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n    field_name=\"vector\", # Name of the vector field on which an index is built\n    index_params=index_params\n)\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {}\n}\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {\n        \"itopk_size\": 128,\n        \"search_width\": 4,\n        \"min_iterations\": 0,\n        \"max_iterations\": 0,\n        \"team_size\": 0\n    }\n}\n","search_params = {\n    \"metric_type\": \"L2\", \n    \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n    data=[[query_vector]], # Your query vector\n    anns_field=\"vector\", # Name of the vector field\n    param=search_params,\n    limit=100 # Number of the results to return\n)\n"],"headingContent":"Index with GPU","anchorList":[{"label":"Índice con GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Configure los ajustes de Milvus para el control de la memoria de la GPU","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Construir un índice","href":"Build-an-index","type":2,"isActive":false},{"label":"Buscar en","href":"Search","type":2,"isActive":false},{"label":"Límites","href":"Limits","type":2,"isActive":false},{"label":"PREGUNTAS FRECUENTES","href":"FAQ","type":2,"isActive":false}]}
        \ No newline at end of file
        diff --git a/localization/v2.4.x/site/es/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/es/userGuide/manage-indexes/index-with-gpu.md
        index 6fa0dedc8..d3d7b969a 100644
        --- a/localization/v2.4.x/site/es/userGuide/manage-indexes/index-with-gpu.md
        +++ b/localization/v2.4.x/site/es/userGuide/manage-indexes/index-with-gpu.md
        @@ -22,7 +22,7 @@ title: Índice con GPU
                 >
               
             

        Esta guía describe los pasos para construir un índice con soporte GPU en Milvus, que puede mejorar significativamente el rendimiento de la búsqueda en escenarios de alto rendimiento y alta recuperación. Para más detalles sobre los tipos de índices GPU soportados por Milvus, consulte Índice GPU.

        -

        Configurar los ajustes de Milvus para el control de la memoria de la GPU

        -

        Los parámetros de búsqueda para estos dos tipos de índice son similares a los utilizados en IVF_FLAT e IVF_PQ. Para más información, consulte Realizar una búsqueda de similitud vectorial.

        +

        Los parámetros de búsqueda para estos dos tipos de índice son similares a los utilizados en IVF_FLAT e IVF_PQ. Para obtener más información, consulte Realizar una búsqueda de similitud vectorial.

      Utilice el método search() para realizar una búsqueda de similitud vectorial en el índice GPU.

      # Load data into memory
      @@ -203,7 +203,7 @@ collection.search(
             
           

      Cuando utilice índices GPU, tenga en cuenta ciertas restricciones:

        -
      • Para GPU_IVF_FLAT, el valor máximo para limit es 256.

      • +
      • Para GPU_IVF_FLAT, el valor máximo de límite es 1024.

      • Para GPU_IVF_PQ y GPU_CAGRA, el valor máximo de limit es 1024.

      • Aunque no hay un límite establecido para GPU_BRUTE_FORCE, se recomienda no superar los 4096 para evitar posibles problemas de rendimiento.

      • Actualmente, los índices GPU no soportan la distancia COSINE. Si se requiere la distancia COSINE, los datos deben ser normalizados en primer lugar, y luego la distancia de producto interno (IP) se puede utilizar como sustituto.

      • diff --git a/localization/v2.4.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.4.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.json index 53e35dd73..446a00bdb 100644 --- a/localization/v2.4.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.4.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Exécuter Milvus dans Kubernetes avec Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview","type":2,"isActive":false},{"label":"Conditions préalables","href":"Prerequisites","type":2,"isActive":false},{"label":"Installer Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Installation en ligne","href":"Online-install","type":2,"isActive":false},{"label":"Installation hors ligne","href":"Offline-install","type":2,"isActive":false},{"label":"Mise à niveau du cluster Milvus en cours d'exécution","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Désinstaller Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Prochaines étapes","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Exécuter Milvus dans Kubernetes avec Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview","type":2,"isActive":false},{"label":"Conditions préalables","href":"Prerequisites","type":2,"isActive":false},{"label":"Installation de Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Installation en ligne","href":"Online-install","type":2,"isActive":false},{"label":"Installation hors ligne","href":"Offline-install","type":2,"isActive":false},{"label":"Mise à niveau du cluster Milvus en cours d'exécution","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Désinstaller Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Prochaines étapes","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.4.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.md index d14b9103e..c6ec1343a 100644 --- a/localization/v2.4.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.4.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -67,7 +67,7 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI

        Si vous rencontrez des problèmes en tirant l'image, contactez-nous à l'adresse community@zilliz.com en décrivant le problème et nous vous fournirons l'assistance nécessaire.

        -

        Installer Milvus Helm Chart

        Avant d'installer Milvus Helm Charts, vous devez ajouter le référentiel Milvus Helm.

        -
        $ helm repo add milvus https://github.com/zilliztech/milvus-helm
        +
        $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
         

        Le dépôt Milvus Helm Charts à l'adresse https://github.com/milvus-io/milvus-helm a été archivé et vous pouvez obtenir des mises à jour supplémentaires à l'adresse https://github.com/zilliztech/milvus-helm comme suit :

        -
        helm repo add zilliztech https://github.com/zilliztech/milvus-helm
        +
        helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
         helm repo update
         # upgrade existing helm release
         helm upgrade my-release zilliztech/milvus
        @@ -160,7 +160,7 @@ my-release-pulsar-zookeeper-metadata-98zbr       0/1   Completed  0        3m24s
         

        Si vous avez déployé Milvus en mode autonome, modifiez le nom du pod de my-release-milvus-proxy-xxxxxxxxxx-xxxxx à my-release-milvus-xxxxxxxxxx-xxxxx.

        -

        Ensuite, exécutez la commande suivante pour transférer un port local vers le port auquel Milvus sert.

        +

        Ensuite, exécutez la commande suivante pour transférer un port local vers le port sur lequel Milvus fonctionne.

        $ kubectl port-forward service/my-release-milvus 27017:19530
         Forwarding from 127.0.0.1:27017 -> 19530
         
        @@ -184,7 +184,7 @@ my-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

        Si vous vous trouvez dans un environnement où le réseau est limité, suivez la procédure de cette section pour démarrer un cluster Milvus.

        +

        Si vous vous trouvez dans un environnement où le réseau est restreint, suivez la procédure de cette section pour démarrer un cluster Milvus.

        1. Obtenir le manifeste Milvus

        Exécutez la commande suivante pour obtenir le manifeste Milvus.

        $ helm template my-release milvus/milvus > milvus_manifest.yaml
         
        diff --git a/localization/v2.4.x/site/fr/userGuide/manage-indexes/index-with-gpu.json b/localization/v2.4.x/site/fr/userGuide/manage-indexes/index-with-gpu.json index 08350a0b4..8461a764f 100644 --- a/localization/v2.4.x/site/fr/userGuide/manage-indexes/index-with-gpu.json +++ b/localization/v2.4.x/site/fr/userGuide/manage-indexes/index-with-gpu.json @@ -1 +1 @@ -{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"","anchorList":[{"label":"Index avec GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Configurer les paramètres Milvus pour le contrôle de la mémoire GPU","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Création d'un index","href":"Build-an-index","type":2,"isActive":false},{"label":"Recherche","href":"Search","type":2,"isActive":false},{"label":"Limites","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"Index with GPU","anchorList":[{"label":"Index avec GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Configurer les paramètres Milvus pour le contrôle de la mémoire GPU","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Création d'un index","href":"Build-an-index","type":2,"isActive":false},{"label":"Recherche","href":"Search","type":2,"isActive":false},{"label":"Limites","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/fr/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/fr/userGuide/manage-indexes/index-with-gpu.md index ba3e7443e..7c6d24f1d 100644 --- a/localization/v2.4.x/site/fr/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.4.x/site/fr/userGuide/manage-indexes/index-with-gpu.md @@ -87,7 +87,7 @@ title: Index avec GPU

        Les options possibles pour params sont les suivantes

        • intermediate_graph_degree(int) : Affecte le rappel et le temps de construction en déterminant le degré du graphe avant l'élagage. Les valeurs recommandées sont 32 ou 64.

        • -
        • graph_degree(int): Affecte les performances de recherche et le rappel en déterminant le degré du graphe après l'élagage. En règle générale, il est égal à la moitié du degré intermédiaire du graphe. Une plus grande différence entre ces deux degrés se traduit par un temps de construction plus long. Sa valeur doit être inférieure à la valeur de intermediate_graph_degree.

        • +
        • graph_degree(int) : Affecte les performances de recherche et le rappel en déterminant le degré du graphe après l'élagage. En règle générale, il est égal à la moitié du degré intermédiaire du graphe. Une plus grande différence entre ces deux degrés se traduit par un temps de construction plus long. Sa valeur doit être inférieure à la valeur de intermediate_graph_degree.

        • build_algo(chaîne) : Sélectionne l'algorithme de génération de graphe avant l'élagage. Options possibles :

          • IVF_PQ: offre une meilleure qualité mais un temps de construction plus lent.

          • @@ -203,9 +203,9 @@ collection.search(

            Lorsque vous utilisez des index GPU, vous devez tenir compte de certaines contraintes :

              -
            • Pour GPU_IVF_FLAT, la valeur maximale de la limite est 256.

            • +
            • Pour GPU_IVF_FLAT, la valeur maximale de la limite est de 1024.

            • Pour GPU_IVF_PQ et GPU_CAGRA, la valeur maximale de la limite est de 1024.

            • -
            • Bien qu'il n'y ait pas de limite définie pour GPU_BRUTE_FORCE, il est recommandé de ne pas dépasser 4096 pour éviter les problèmes de performance.

            • +
            • Bien qu'il n'y ait pas de limite fixée pour GPU_BRUTE_FORCE, il est recommandé de ne pas dépasser 4096 pour éviter les problèmes de performance.

            • Actuellement, les index GPU ne prennent pas en charge la distance COSINE. Si la distance COSINE est requise, les données doivent d'abord être normalisées, puis la distance du produit intérieur (IP) peut être utilisée comme substitut.

            • La protection OOM du chargement pour les index GPU n'est pas entièrement prise en charge, une trop grande quantité de données peut entraîner le blocage du QueryNode.

            • Les index GPU ne prennent pas en charge les fonctions de recherche telles que la recherche par plage et la recherche par groupement.

            • diff --git a/localization/v2.4.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.4.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.json index 1b7706ce0..32355e59c 100644 --- a/localization/v2.4.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.4.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Eseguire Milvus in Kubernetes con Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Panoramica","href":"Overview","type":2,"isActive":false},{"label":"Prerequisiti","href":"Prerequisites","type":2,"isActive":false},{"label":"Installare Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Installazione online","href":"Online-install","type":2,"isActive":false},{"label":"Installazione offline","href":"Offline-install","type":2,"isActive":false},{"label":"Aggiornamento del cluster Milvus in esecuzione","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Disinstallare Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Cosa succede dopo","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Eseguire Milvus in Kubernetes con Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Panoramica","href":"Overview","type":2,"isActive":false},{"label":"Prerequisiti","href":"Prerequisites","type":2,"isActive":false},{"label":"Installare Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Installazione online","href":"Online-install","type":2,"isActive":false},{"label":"Installazione offline","href":"Offline-install","type":2,"isActive":false},{"label":"Aggiornamento del cluster Milvus in esecuzione","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Disinstallare Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Cosa succede dopo","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.4.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.md index f92618c86..c6cd8831f 100644 --- a/localization/v2.4.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.4.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -83,16 +83,16 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

              Prima di installare Milvus Helm Charts, è necessario aggiungere il repository Milvus Helm.

              -
              $ helm repo add milvus https://github.com/zilliztech/milvus-helm
              +
              $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
               

              Il repository di Milvus Helm Charts all'indirizzo https://github.com/milvus-io/milvus-helm è stato archiviato ed è possibile ottenere ulteriori aggiornamenti da https://github.com/zilliztech/milvus-helm come segue:

              -
              helm repo add zilliztech https://github.com/zilliztech/milvus-helm
              +
              helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
               helm repo update
               # upgrade existing helm release
               helm upgrade my-release zilliztech/milvus
               
              -

              Il repository archiviato è ancora disponibile per i grafici fino alla versione 4.0.31. Per le versioni successive, utilizzare invece il nuovo repo.

              +

              Il repo archiviato è ancora disponibile per i grafici fino alla versione 4.0.31. Per le versioni successive, utilizzare invece il nuovo repo.

              Quindi prelevare i grafici di Milvus dal repository come segue:

              $ helm repo update
              diff --git a/localization/v2.4.x/site/it/userGuide/manage-indexes/index-with-gpu.json b/localization/v2.4.x/site/it/userGuide/manage-indexes/index-with-gpu.json
              index e7aac2780..e5143eb99 100644
              --- a/localization/v2.4.x/site/it/userGuide/manage-indexes/index-with-gpu.json
              +++ b/localization/v2.4.x/site/it/userGuide/manage-indexes/index-with-gpu.json
              @@ -1 +1 @@
              -{"codeList":["gpu:\n  initMemSize: 0 #set the initial memory pool size.\n  maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n  initMemSize: 2048 #set the initial memory pool size.\n  maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_CAGRA\",\n    \"params\": {\n        'intermediate_graph_degree': 64,\n        'graph_degree': 32\n    }\n}\n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n    \"params\": {\n        \"nlist\": 1024\n    }\n}\n","index_params = {\n    'index_type': 'GPU_BRUTE_FORCE',\n    'metric_type': 'L2',\n    'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n    field_name=\"vector\", # Name of the vector field on which an index is built\n    index_params=index_params\n)\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {}\n}\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {\n        \"itopk_size\": 128,\n        \"search_width\": 4,\n        \"min_iterations\": 0,\n        \"max_iterations\": 0,\n        \"team_size\": 0\n    }\n}\n","search_params = {\n    \"metric_type\": \"L2\", \n    \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n    data=[[query_vector]], # Your query vector\n    anns_field=\"vector\", # Name of the vector field\n    param=search_params,\n    limit=100 # Number of the results to return\n)\n"],"headingContent":"","anchorList":[{"label":"Indice con GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Configurare le impostazioni di Milvus per il controllo della memoria della GPU","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Creare un indice","href":"Build-an-index","type":2,"isActive":false},{"label":"Ricerca","href":"Search","type":2,"isActive":false},{"label":"Limiti","href":"Limits","type":2,"isActive":false},{"label":"DOMANDE FREQUENTI","href":"FAQ","type":2,"isActive":false}]}
              \ No newline at end of file
              +{"codeList":["gpu:\n  initMemSize: 0 #set the initial memory pool size.\n  maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n  initMemSize: 2048 #set the initial memory pool size.\n  maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_CAGRA\",\n    \"params\": {\n        'intermediate_graph_degree': 64,\n        'graph_degree': 32\n    }\n}\n","index_params = {\n    \"metric_type\": \"L2\",\n    \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n    \"params\": {\n        \"nlist\": 1024\n    }\n}\n","index_params = {\n    'index_type': 'GPU_BRUTE_FORCE',\n    'metric_type': 'L2',\n    'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n    field_name=\"vector\", # Name of the vector field on which an index is built\n    index_params=index_params\n)\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {}\n}\n","search_params = {\n    \"metric_type\": \"L2\",\n    \"params\": {\n        \"itopk_size\": 128,\n        \"search_width\": 4,\n        \"min_iterations\": 0,\n        \"max_iterations\": 0,\n        \"team_size\": 0\n    }\n}\n","search_params = {\n    \"metric_type\": \"L2\", \n    \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n    data=[[query_vector]], # Your query vector\n    anns_field=\"vector\", # Name of the vector field\n    param=search_params,\n    limit=100 # Number of the results to return\n)\n"],"headingContent":"Index with GPU","anchorList":[{"label":"Indice con GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Configurare le impostazioni di Milvus per il controllo della memoria della GPU","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Creare un indice","href":"Build-an-index","type":2,"isActive":false},{"label":"Ricerca","href":"Search","type":2,"isActive":false},{"label":"Limiti","href":"Limits","type":2,"isActive":false},{"label":"DOMANDE FREQUENTI","href":"FAQ","type":2,"isActive":false}]}
              \ No newline at end of file
              diff --git a/localization/v2.4.x/site/it/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/it/userGuide/manage-indexes/index-with-gpu.md
              index 12da24839..320ed0b82 100644
              --- a/localization/v2.4.x/site/it/userGuide/manage-indexes/index-with-gpu.md
              +++ b/localization/v2.4.x/site/it/userGuide/manage-indexes/index-with-gpu.md
              @@ -40,7 +40,7 @@ title: Indice con GPU
                   

              Milvus utilizza un pool di memoria grafica globale per allocare la memoria della GPU.

              Supporta due parametri initMemSize e maxMemSize nel file di configurazione di Milvus. La dimensione del pool è inizialmente impostata su initMemSize e verrà automaticamente espansa a maxMemSize dopo aver superato questo limite.

              Il valore predefinito di initMemSize è pari a 1/2 della memoria della GPU disponibile all'avvio di Milvus, mentre il valore predefinito di maxMemSize è pari a tutta la memoria della GPU disponibile.

              -

              Fino a Milvus 2.4.1 (compresa la versione 2.4.1), Milvus utilizzava un pool di memoria GPU unificato. Per le versioni precedenti alla 2.4.1 (compresa la versione 2.4.1), si raccomandava di impostare entrambi i valori a 0.

              +

              Fino a Milvus 2.4.1 (compresa la versione 2.4.1), Milvus utilizzava un pool di memoria GPU unificato. Per le versioni precedenti alla 2.4.1 (inclusa la versione 2.4.1), si raccomandava di impostare entrambi i valori a 0.

              gpu:
                 initMemSize: 0 #set the initial memory pool size.
                 maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. 
              @@ -93,7 +93,7 @@ title: Indice con GPU
               
            • IVF_PQ: Offre una qualità superiore ma un tempo di costruzione più lento.

            • NN_DESCENT: Fornisce una costruzione più rapida con un richiamo potenzialmente inferiore.

            -
          • cache_dataset_on_device(stringa, "true" | "false"): Decide se mettere in cache il dataset originale nella memoria della GPU. Impostando "true" si migliora il richiamo raffinando i risultati della ricerca, mentre impostando "false" si conserva la memoria della GPU.

          • +
          • cache_dataset_on_device(stringa, "true" | "false"): Decide se mettere in cache il dataset originale nella memoria della GPU. Impostando "true" si migliora il richiamo affinando i risultati della ricerca, mentre impostando "false" si conserva la memoria della GPU.

        • IndiceGPU_IVF_FLAT o GPU_IVF_PQ

          index_params = {
          @@ -164,8 +164,8 @@ collection.create_index(
           
          • itopk_size: Determina la dimensione dei risultati intermedi conservati durante la ricerca. Un valore maggiore può migliorare la ricerca a scapito delle prestazioni. Deve essere almeno uguale al valore finale top-k(limite) ed è tipicamente una potenza di 2 (ad esempio, 16, 32, 64, 128).

          • search_width: Specifica il numero di punti di ingresso nel grafo CAGRA durante la ricerca. L'aumento di questo valore può migliorare il richiamo, ma può influire sulle prestazioni della ricerca.

          • -
          • min_iterations / max_iterations: Questi parametri controllano il processo di iterazione della ricerca. Per impostazione predefinita, sono impostati su 0 e CAGRA determina automaticamente il numero di iterazioni in base a itopk_size e search_width. La regolazione manuale di questi valori può aiutare a bilanciare prestazioni e precisione.

          • -
          • team_size: Specifica il numero di thread CUDA utilizzati per calcolare la distanza metrica sulla GPU. I valori comuni sono una potenza di 2 fino a 32 (ad esempio, 2, 4, 8, 16, 32). Ha un impatto minimo sulle prestazioni della ricerca. Il valore predefinito è 0, dove Milvus seleziona automaticamente il team_size in base alla dimensione del vettore.

          • +
          • min_iterations / max_iterations: Questi parametri controllano il processo di iterazione della ricerca. Per impostazione predefinita, sono impostati su 0 e CAGRA determina automaticamente il numero di iterazioni in base a itopk_size e search_width. La regolazione manuale di questi valori può aiutare a bilanciare prestazioni e accuratezza.

          • +
          • team_size: Specifica il numero di thread CUDA utilizzati per calcolare la distanza metrica sulla GPU. I valori più comuni sono una potenza di 2 fino a 32 (ad esempio, 2, 4, 8, 16, 32). Ha un impatto minimo sulle prestazioni della ricerca. Il valore predefinito è 0, dove Milvus seleziona automaticamente il team_size in base alla dimensione del vettore.

        • IndiceGPU_IVF_FLAT o GPU_IVF_PQ

          search_params = {
          @@ -203,11 +203,11 @@ collection.search(
                 
               

          Quando si usano gli indici GPU, occorre tenere conto di alcuni vincoli:

            -
          • Per GPU_IVF_FLAT, il valore massimo di limit è 256.

          • +
          • Per GPU_IVF_FLAT, il valore massimo per il limite è 1024.

          • Per GPU_IVF_PQ e GPU_CAGRA, il valore massimo di limit è 1024.

          • Sebbene non sia stato fissato un limite per GPU_BRUTE_FORCE, si consiglia di non superare i 4096 per evitare potenziali problemi di prestazioni.

          • Attualmente, gli indici GPU non supportano la distanza COSINE. Se è necessaria la distanza COSINE, i dati devono essere prima normalizzati e poi si può usare la distanza del prodotto interno (IP) come sostituto.

          • -
          • Il caricamento della protezione OOM per gli indici GPU non è pienamente supportato; una quantità eccessiva di dati potrebbe causare l'arresto anomalo del QueryNode.

          • +
          • Il caricamento della protezione OOM per gli indici GPU non è pienamente supportato, una quantità eccessiva di dati potrebbe causare l'arresto anomalo del QueryNode.

          • Gli indici GPU non supportano funzioni di ricerca come la ricerca per intervallo e la ricerca per raggruppamento.

          DOMANDE FREQUENTI

          Milvus Helm Chartsをインストールする前に、Milvus Helmリポジトリを追加する必要があります。

          -
          $ helm repo add milvus https://github.com/zilliztech/milvus-helm
          +
          $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
           

          https://github.com/milvus-io/milvus-helm にある Milvus Helm Charts リポジトリはアーカイブされており、https://github.com/zilliztech/milvus-helm から以下のようにアップデートを入手することができます:

          -
          helm repo add zilliztech https://github.com/zilliztech/milvus-helm
          +
          helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
           helm repo update
           # upgrade existing helm release
           helm upgrade my-release zilliztech/milvus
          @@ -123,7 +123,7 @@ helm upgrade my-release zilliztech/milvus
               
        • リリース名にはアルファベット、数字、ダッシュのみを使用してください。リリース名にはドットは使用できません。
        • MilvusをHelmと共にインストールする場合、デフォルトのコマンドラインはMilvusのクラスタバージョンをインストールします。Milvusをスタンドアロンでインストールする場合は、さらなる設定が必要です。
        • Kubernetesのdeprecated API migration guideによると、PodDisruptionBudgetのpolicy/v1beta1APIバージョンはv1.25で提供されなくなった。代わりにpolicy/v1APIバージョンを使用するようにマニフェストとAPIクライアントを移行することが推奨されます。
          Kubernetes v1.25以降でPodDisruptionBudgetのpolicy/v1beta1APIバージョンをまだ使用しているユーザのための回避策として、代わりに以下のコマンドを実行してmilvusをインストールすることができます:
          helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
        • -
        • 詳細はMilvus Helm Chartと Helmを参照してください。
        • +
        • 詳細については、Milvus Helm ChartおよびHelmを参照してください。

        2.Milvusクラスタのステータスの確認

        以下のコマンドを実行し、Milvusクラスタ内のすべてのPodのステータスを確認します。

        @@ -188,7 +188,7 @@ my-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s

        1.Milvusマニフェストの取得

        以下のコマンドを実行してMilvusマニフェストを取得します。

        $ helm template my-release milvus/milvus > milvus_manifest.yaml
         
        -

        上記のコマンドはMilvusクラスタのチャートテンプレートをレンダリングし、その出力をmilvus_manifest.yaml という名前のマニフェストファイルに保存します。このマニフェストを使用すると、コンポーネントと依存関係を持つMilvusクラスタを個別のポッドにインストールできます。

        +

        上記のコマンドはMilvusクラスタのチャートテンプレートをレンダリングし、その出力をmilvus_manifest.yaml という名前のマニフェストファイルに保存します。このマニフェストを使用して、コンポーネントと依存関係を持つMilvusクラスタを個別のポッドにインストールすることができます。

        • すべてのMilvusコンポーネントが単一のポッドに含まれるスタンドアロンモードでMilvusインスタンスをインストールするには、代わりにhelm template my-release --set cluster.enabled=false --set etcd.replicaCount=1 --set minio.mode=standalone --set pulsar.enabled=false milvus/milvus > milvus_manifest.yaml を実行して、スタンドアロンモードのMilvusインスタンス用のチャートテンプレートをレンダリングする必要があります。
        • @@ -284,6 +284,6 @@ $ helm upgrade my-release zilliztech/milvus
      • MilvusのデータバックアップのためのオープンソースツールであるMilvus Backupをご紹介します。

      • Milvusのデバッグとダイナミックコンフィギュレーションアップデートのためのオープンソースツール、Birdwatcherのご紹介。

      • -
      • Milvusを直感的に管理するオープンソースのGUIツールAttuをご紹介します。

      • +
      • Milvusを直感的に管理するオープンソースのGUIツールAttuをご覧ください。

      • PrometheusでMilvusを監視する

      diff --git a/localization/v2.4.x/site/ja/userGuide/manage-indexes/index-with-gpu.json b/localization/v2.4.x/site/ja/userGuide/manage-indexes/index-with-gpu.json index 7fd1f79f6..99af3d7d0 100644 --- a/localization/v2.4.x/site/ja/userGuide/manage-indexes/index-with-gpu.json +++ b/localization/v2.4.x/site/ja/userGuide/manage-indexes/index-with-gpu.json @@ -1 +1 @@ -{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"","anchorList":[{"label":"GPUによるインデックス","href":"Index-with-GPU","type":1,"isActive":false},{"label":"GPUメモリ制御のためのMilvus設定の構成","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"インデックスの構築","href":"Build-an-index","type":2,"isActive":false},{"label":"検索","href":"Search","type":2,"isActive":false},{"label":"制限","href":"Limits","type":2,"isActive":false},{"label":"よくある質問","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"Index with GPU","anchorList":[{"label":"GPUによるインデックス","href":"Index-with-GPU","type":1,"isActive":false},{"label":"GPUメモリ制御のためのMilvus設定の構成","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"インデックスの構築","href":"Build-an-index","type":2,"isActive":false},{"label":"検索","href":"Search","type":2,"isActive":false},{"label":"制限","href":"Limits","type":2,"isActive":false},{"label":"よくある質問","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/ja/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/ja/userGuide/manage-indexes/index-with-gpu.md index 6c11e935c..6c199e5d1 100644 --- a/localization/v2.4.x/site/ja/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.4.x/site/ja/userGuide/manage-indexes/index-with-gpu.md @@ -19,7 +19,7 @@ title: GPUによるインデックス d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

      このガイドでは、MilvusでGPUをサポートしたインデックスを構築する手順を概説します。これにより、高スループットおよび高リコールシナリオにおける検索パフォーマンスを大幅に向上させることができます。MilvusでサポートされるGPUインデックスのタイプの詳細については、GPUインデックスを参照してください。

      +

      このガイドでは、MilvusでGPUをサポートしたインデックスを構築する手順の概要を説明します。MilvusがサポートするGPUインデックスの種類については、GPUインデックスをご参照ください。

      GPUメモリ制御のためのMilvus設定の構成

      MilvusはGPUメモリを割り当てるためにグローバルグラフィックメモリプールを使用します。

      Milvus設定ファイルでinitMemSizemaxMemSize の2つのパラメータをサポートしています。プールサイズは最初はinitMemSize に設定され、この制限を超えると自動的にmaxMemSize に拡張されます。

      デフォルトのinitMemSize は Milvus 起動時に利用可能な GPU メモリの 1/2 で、デフォルトのmaxMemSize は利用可能なすべての GPU メモリと等しくなります。

      -

      Milvus2.4.1(バージョン2.4.1を含む)までは、Milvusは統一されたGPUメモリプールを使用していました。2.4.1以前のバージョン(バージョン2.4.1を含む)では、両方の値を0に設定することが推奨されていました。

      +

      Milvus 2.4.1(バージョン2.4.1を含む)までは、Milvusは統一されたGPUメモリプールを使用していました。2.4.1以前のバージョン(バージョン2.4.1を含む)では、両方の値を0に設定することが推奨されていました。

      gpu:
         initMemSize: 0 #set the initial memory pool size.
         maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. 
      @@ -67,7 +67,7 @@ title: GPUによるインデックス
       

      インデックスパラメータの準備

      GPU インデックスパラメータを設定する際に、index_typemetric_typeparams を定義します:

      • index_type(文字列):index_type (string): ベクトル探索を加速するために使用するインデックスのタイプ。有効なオプションはGPU_CAGRAGPU_IVF_FLATGPU_IVF_PQGPU_BRUTE_FORCEです。

      • -
      • metric_type(文字列):ベクトルの類似度を測定するために使用されるメトリクスのタイプ。有効なオプションはIPL2 です。

      • +
      • metric_type(文字列):ベクトルの類似度を測定するために使用するメトリクスのタイプ。有効なオプションはIPL2 です。

      • params(dict):インデックス固有の構築パラメータ。このパラメータに有効なオプションは、インデックスの種類に依存します。

      以下は、異なるインデックス・タイプの構成例です:

      @@ -85,7 +85,7 @@ title: GPUによるインデックス

      paramsに指定できるオプションは以下の通りです:

      • intermediate_graph_degree(int):プルーニングの前にグラフの次数を決定することで、リコールと構築時間に影響します。推奨値は32または64

      • -
      • graph_degree(int):プルーニング後のグラフ次数を設定することで、検索パフォーマンスとリコールに影響する。通常、intermediate_graph_degreeの半分である。この2つの次数の差が大きいと、構築時間が長くなる。この値はintermediate_graph_degree の値より小さくなければならない。

      • +
      • graph_degree(int):プルーニング後のグラフ次数を設定することで、検索パフォーマンスとリコールに影響する。通常、intermediate_graph_degreeの半分である。この2つの次数の差が大きいと、構築時間が長くなる。この値は、intermediate_graph_degreeの値より小さくなければならない。

      • build_algo(文字列):プルーニング前のグラフ生成アルゴリズムを選択する。可能なオプション:

        • IVF_PQ: 高品質を提供するが、構築時間がかかる。

        • @@ -163,7 +163,7 @@ collection.create_index(
        • itopk_size:検索中に保持される中間結果のサイズを決定します。この値を大きくすると、検索パフォーマンスを犠牲にして再現率が向上する可能性があります。少なくとも最終的なtop-k(限界)値と等しくなければならず、通常は2のべき乗(例:16、32、64、128)である。

        • search_width: 検索中に CAGRA グラフに入る点の数を指定する。この値を大きくすると想起率が向上するが、検索パフォーマンスに影響する可能性がある。

        • min_iterations/max_iterations:これらのパラメータは検索の反復処理を制御する。デフォルトでは0 に設定されており、CAGRA はitopk_sizesearch_width に基づいて自動的に反復回数を決定する。これらの値を手動で調整することで、性能と精度のバランスをとることができます。

        • -
        • team_size:GPU上のメトリック距離計算に使用するCUDAスレッド数を指定します。一般的な値は2のべき乗から32までです(例:2、4、8、16、32)。これは検索性能に軽微な影響を与えます。デフォルト値は0で、Milvusはベクトル次元に基づいて自動的にteam_sizeを選択します。

        • +
        • team_size:GPU上のメトリック距離計算に使用するCUDAスレッド数を指定します。一般的な値は2のべき乗から32までです(例:2、4、8、16、32)。これは検索性能に軽微な影響を与えます。デフォルト値は0で、milvusはベクトル次元に基づいて自動的にteam_sizeを選択します。

      • GPU_IVF_FLATまたはGPU_IVF_PQインデックス

        search_params = {
        @@ -201,7 +201,7 @@ collection.search(
               
             

        GPU インデックスを使用する場合、特定の制約に注意してください:

          -
        • GPU_IVF_FLAT の場合、limitの最大値は 256 です。

        • +
        • GPU_IVF_FLAT の場合、limitの最大値は 1024 です。

        • GPU_IVF_PQGPU_CAGRA の場合、limitの最大値は 1024 です。

        • GPU_BRUTE_FORCE にはlimitの上限は設定されていませんが、潜在的なパ フォーマンスの問題を避けるために 4096 を超えないことが推奨されます。

        • 現在、GPUインデックスはCOSINE距離をサポートしていません。COSINE 距離が必要な場合は、まずデータを正規化し、それから内積 (IP) 距離で代用することができます。

        • @@ -225,7 +225,7 @@ collection.search(
          • GPUインデックスはどのような場合に利用するのが適切ですか?

            -

            GPUインデックスは、高スループットや高リコールが要求される状況で特に有益です。例えば、大きなバッチを扱う場合、GPUインデックスのスループットはCPUインデックスのそれを100倍も上回ることができます。より小さなバッチを扱うシナリオでは、GPUインデックスがCPUインデックスを性能面で大きく上回る。さらに、迅速なデータ挿入が必要な場合、GPUを組み込むことで、インデックスの構築プロセスを大幅にスピードアップすることができます。

          • +

            GPUインデックスは、高スループットや高リコールが要求される状況で特に有益です。例えば、大きなバッチを扱う場合、GPUインデックスのスループットはCPUインデックスのそれを100倍も上回ることができます。より小さなバッチを扱うシナリオでは、GPUインデックスが性能の点でCPUインデックスを大きく上回ることに変わりはありません。さらに、迅速なデータ挿入が必要な場合、GPUを組み込むことで、インデックスの構築プロセスを大幅にスピードアップすることができます。

          • CAGRA、GPU_IVF_PQ、GPU_IVF_FLAT、GPU_BRUTE_FORCE などの GPU インデックスは、どのようなシナリオに最適ですか?

            CAGRA インデックスは、より多くのメモリを消費する代償はあるものの、より高いパフォーマンスを要求するシナリオに最適です。メモリの節約が優先される環境では、GPU_IVF_PQインデックスはストレージ要件を最小化するのに役立ちますが、これは精度の高い損失を伴います。GPU_IVF_FLATインデックスはバランスの取れたオプションとして機能し、性能とメモリ使用量の妥協点を提供します。最後に、GPU_BRUTE_FORCEインデックスは、網羅的検索操作のために設計されており、トラバーサル検索を実行することで、1の再現率を保証します。

          diff --git a/localization/v2.4.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.4.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.json index a7cc00102..aff122121 100644 --- a/localization/v2.4.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.4.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"헬름으로 쿠버네티스에서 밀버스 실행하기","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"개요","href":"Overview","type":2,"isActive":false},{"label":"전제 조건","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus 헬름 차트 설치","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"온라인 설치","href":"Online-install","type":2,"isActive":false},{"label":"오프라인 설치","href":"Offline-install","type":2,"isActive":false},{"label":"실행 중인 Milvus 클러스터 업그레이드","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvus 제거","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"다음 단계","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"헬름으로 쿠버네티스에서 밀버스 실행하기","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"개요","href":"Overview","type":2,"isActive":false},{"label":"전제 조건","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus 헬름 차트 설치","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"온라인 설치","href":"Online-install","type":2,"isActive":false},{"label":"오프라인 설치","href":"Offline-install","type":2,"isActive":false},{"label":"실행 중인 Milvus 클러스터 업그레이드","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvus 제거","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"다음 단계","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.4.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.md index 44777eb93..e5f0673e7 100644 --- a/localization/v2.4.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.4.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

          밀버스 헬름 차트를 설치하기 전에 밀버스 헬름 리포지토리를 추가해야 합니다.

          -
          $ helm repo add milvus https://github.com/zilliztech/milvus-helm
          +
          $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
           

          Milvus Helm Charts 리포지토리( https://github.com/milvus-io/milvus-helm )는 아카이브되어 있으며, 다음과 같이 https://github.com/zilliztech/milvus-helm 에서 추가 업데이트를 받을 수 있습니다:

          -
          helm repo add zilliztech https://github.com/zilliztech/milvus-helm
          +
          helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
           helm repo update
           # upgrade existing helm release
           helm upgrade my-release zilliztech/milvus
          @@ -122,7 +122,7 @@ helm upgrade my-release zilliztech/milvus
             
          • 릴리스 이름에는 문자, 숫자 및 대시만 포함해야 합니다. 릴리스 이름에는 점을 사용할 수 없습니다.
          • 기본 명령줄은 헬름과 함께 Milvus를 설치하는 동안 클러스터 버전의 Milvus를 설치합니다. Milvus를 독립형으로 설치할 때는 추가 설정이 필요하다.
          • -
          • 쿠버네티스의 더 이상 사용되지 않는 API 마이그레이션 가이드에 따르면, v1.25부터 파드장애예산 정책/v1beta1 API 버전은 더 이상 제공되지 않는다. 대신 정책/v1 API 버전을 사용하도록 매니페스트와 API 클라이언트를 마이그레이션하는 것이 좋다.
            쿠버네티스 v1.25 이상에서 여전히 파드디스럽션버짓의 정책/v1beta1 API 버전을 사용하는 사용자를 위한 해결 방법으로, 다음 명령을 실행하여 밀버스를 설치할 수 있다:
            . helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
          • +
          • 쿠버네티스의 더 이상 사용되지 않는 API 마이그레이션 가이드에 따르면, v1.25 버전부터 파드장애예산 정책/v1beta1 API 버전은 더 이상 제공되지 않는다. 대신 정책/v1 API 버전을 사용하도록 매니페스트와 API 클라이언트를 마이그레이션하는 것이 좋다.
            쿠버네티스 v1.25 이상에서 여전히 정책/v1beta1 버전의 파드디스럽션예산 API 버전을 사용하는 사용자를 위한 해결 방법으로, 대신 다음 명령을 실행하여 밀버스를 설치할 수 있다:
            . helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
          • 자세한 내용은 밀버스 헬름 차트와 헬름을 참고한다.
          @@ -188,7 +188,7 @@ my-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s

          1. Milvus 매니페스트 가져오기

          다음 명령을 실행하여 Milvus 매니페스트를 가져옵니다.

          $ helm template my-release milvus/milvus > milvus_manifest.yaml
           
          -

          위 명령은 Milvus 클러스터에 대한 차트 템플릿을 렌더링하고 그 출력을 milvus_manifest.yaml 이라는 매니페스트 파일에 저장합니다. 이 매니페스트를 사용하여 별도의 파드에 구성 요소 및 종속 요소와 함께 Milvus 클러스터를 설치할 수 있습니다.

          +

          위 명령은 Milvus 클러스터에 대한 차트 템플릿을 렌더링하고 출력을 milvus_manifest.yaml 이라는 매니페스트 파일에 저장합니다. 이 매니페스트를 사용하여 별도의 파드에 구성 요소 및 종속 요소와 함께 Milvus 클러스터를 설치할 수 있습니다.

          • 모든 Milvus 구성 요소가 단일 포드 내에 포함된 독립형 모드에서 Milvus 인스턴스를 설치하려면 대신 helm template my-release --set cluster.enabled=false --set etcd.replicaCount=1 --set minio.mode=standalone --set pulsar.enabled=false milvus/milvus > milvus_manifest.yaml 을 실행하여 독립형 모드에서 Milvus 인스턴스에 대한 차트 템플릿을 렌더링해야 합니다.
          • diff --git a/localization/v2.4.x/site/ko/userGuide/manage-indexes/index-with-gpu.json b/localization/v2.4.x/site/ko/userGuide/manage-indexes/index-with-gpu.json index 8f0267800..9c8406896 100644 --- a/localization/v2.4.x/site/ko/userGuide/manage-indexes/index-with-gpu.json +++ b/localization/v2.4.x/site/ko/userGuide/manage-indexes/index-with-gpu.json @@ -1 +1 @@ -{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"","anchorList":[{"label":"GPU를 사용한 색인","href":"Index-with-GPU","type":1,"isActive":false},{"label":"GPU 메모리 제어를 위한 Milvus 설정 구성하기","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"인덱스 구축","href":"Build-an-index","type":2,"isActive":false},{"label":"검색","href":"Search","type":2,"isActive":false},{"label":"제한 사항","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"Index with GPU","anchorList":[{"label":"GPU를 사용한 색인","href":"Index-with-GPU","type":1,"isActive":false},{"label":"GPU 메모리 제어를 위한 Milvus 설정 구성하기","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"인덱스 구축","href":"Build-an-index","type":2,"isActive":false},{"label":"검색","href":"Search","type":2,"isActive":false},{"label":"제한 사항","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/ko/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/ko/userGuide/manage-indexes/index-with-gpu.md index 34ebbc03a..001788a27 100644 --- a/localization/v2.4.x/site/ko/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.4.x/site/ko/userGuide/manage-indexes/index-with-gpu.md @@ -19,7 +19,7 @@ title: GPU를 사용한 색인 d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

            이 가이드에서는 처리량이 많고 호출 횟수가 많은 시나리오에서 검색 성능을 크게 향상시킬 수 있는 Milvus에서 GPU를 지원하는 인덱스를 구축하는 단계를 간략하게 설명합니다. Milvus에서 지원하는 GPU 인덱스 유형에 대한 자세한 내용은 GPU 인덱스를 참조하세요.

            +

            이 가이드에서는 처리량과 호출 횟수가 많은 시나리오에서 검색 성능을 크게 향상시킬 수 있는 Milvus에서 GPU를 지원하는 인덱스를 구축하는 단계를 간략하게 설명합니다. Milvus에서 지원하는 GPU 인덱스 유형에 대한 자세한 내용은 GPU 인덱스를 참조하세요.

            GPU 메모리 제어를 위한 Milvus 설정 구성하기

            Milvus는 글로벌 그래픽 메모리 풀을 사용하여 GPU 메모리를 할당합니다.

            Milvus 설정 파일에서 initMemSizemaxMemSize 두 개의 파라미터를 지원합니다. 풀 크기는 처음에 initMemSize 로 설정되며, 이 한도를 초과하면 maxMemSize 으로 자동 확장됩니다.

            기본값 initMemSize 은 Milvus 시작 시 사용 가능한 GPU 메모리의 1/2이며, 기본값 maxMemSize 은 사용 가능한 모든 GPU 메모리와 동일합니다.

            -

            Milvus 2.4.1(2.4.1 버전 포함)까지 Milvus는 통합 GPU 메모리 풀을 사용했습니다. 2.4.1(버전 2.4.1 포함) 이전 버전에서는 두 값을 모두 0으로 설정하는 것이 좋습니다.

            +

            Milvus 2.4.1(2.4.1 버전 포함)까지 Milvus는 통합 GPU 메모리 풀을 사용했습니다. 2.4.1(버전 2.4.1 포함) 이전 버전에서는 두 값을 모두 0으로 설정하는 것이 권장되었습니다.

            gpu:
               initMemSize: 0 #set the initial memory pool size.
               maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. 
            @@ -112,7 +112,7 @@ title: GPU를 사용한 색인
             

            추가 파라미터 구성이 필요하지 않습니다.

          -

          인덱스 빌드

          index_params에서 인덱스 파라미터를 구성한 후, 인덱스를 빌드하기 위해 create_index() 메서드를 호출하여 인덱스를 빌드합니다.

          +

          인덱스 빌드

          index_params에서 인덱스 파라미터를 구성한 후, 인덱스를 빌드하기 위해 create_index() 메서드를 호출하여 인덱스를 빌드합니다.

          # Get an existing collection
           collection = Collection("YOUR_COLLECTION_NAME")
           
          @@ -160,9 +160,9 @@ collection.create_index(
           

          주요 검색 매개변수는 다음과 같습니다:

            -
          • itopk_size: 검색 중에 보관되는 중간 결과의 크기를 결정합니다. 값이 클수록 검색 성능이 저하되는 대신 회상률이 향상될 수 있습니다. 이 값은 최소한 최종 상위 k(한계) 값과 같아야 하며 일반적으로 2의 거듭제곱입니다(예: 16, 32, 64, 128).

          • -
          • search_width: 검색 중 CAGRA 그래프에 들어가는 진입점 수를 지정합니다. 이 값을 높이면 검색 회수율은 향상될 수 있지만 검색 성능에 영향을 미칠 수 있습니다.

          • -
          • MIN_ITERATIONS / MAX_ITERATIONS: 이 매개변수는 검색 반복 프로세스를 제어합니다. 기본적으로 값은 0으로 설정되어 있으며, CAGRA는 itopk_sizesearch_width에 따라 반복 횟수를 자동으로 결정합니다. 이 값을 수동으로 조정하면 성능과 정확도의 균형을 맞추는 데 도움이 될 수 있습니다.

          • +
          • itopk_size: 검색 중에 보관되는 중간 결과의 크기를 결정합니다. 값이 클수록 검색 성능이 저하되는 대신 회상률이 향상될 수 있습니다. 이 값은 최소한 최종 상위 k(한계) 값과 같아야 하며 일반적으로 2의 거듭제곱(예: 16, 32, 64, 128)입니다.

          • +
          • search_width: 검색 중 CAGRA 그래프에 들어가는 진입점 수를 지정합니다. 이 값을 높이면 검색 회수율은 향상될 수 있지만 검색 성능에 영향을 줄 수 있습니다.

          • +
          • MIN_ITERATIONS / MAX_ITERATIONS: 이 매개변수는 검색 반복 프로세스를 제어합니다. 기본적으로 이 값은 0으로 설정되어 있으며, CAGRA는 itopk_sizesearch_width에 따라 반복 횟수를 자동으로 결정합니다. 이 값을 수동으로 조정하면 성능과 정확도의 균형을 맞추는 데 도움이 될 수 있습니다.

          • team_size: GPU에서 메트릭 거리를 계산하는 데 사용되는 CUDA 스레드 수를 지정합니다. 일반적인 값은 2의 거듭제곱에서 최대 32입니다(예: 2, 4, 8, 16, 32). 검색 성능에 약간의 영향을 미칩니다. 기본값은 0이며, Milvus는 벡터 차원에 따라 자동으로 team_size를 선택합니다.

        • GPU_IVF_FLAT 또는 GPU_IVF_PQ 인덱스

          @@ -201,10 +201,10 @@ collection.search(

          GPU 인덱스를 사용할 때는 특정 제약 조건에 유의해야 합니다:

            -
          • GPU_IVF_FLAT의 경우 제한의 최대값은 256입니다.

          • +
          • GPU_IVF_FLAT의 경우, 제한의 최대값은 1024입니다.

          • GPU_IVF_PQGPU_CAGRA의 경우, 제한의 최대값은 1024입니다.

          • GPU_BRUTE_FORCE에 대한 제한은 설정되어 있지 않지만 잠재적인 성능 문제를 피하기 위해 4096을 초과하지 않는 것이 좋습니다.

          • -
          • 현재 GPU 인덱스는 COSINE 거리를 지원하지 않습니다. COSINE 거리가 필요한 경우 데이터를 먼저 정규화한 다음 내부 곱(IP) 거리를 대체로 사용할 수 있습니다.

          • +
          • 현재 GPU 인덱스는 COSINE 거리를 지원하지 않습니다. COSINE 거리가 필요한 경우 먼저 데이터를 정규화한 다음 내부 곱(IP) 거리를 대체로 사용할 수 있습니다.

          • GPU 인덱스에 대한 로드 OOM 보호는 완전히 지원되지 않으며, 너무 많은 데이터는 QueryNode 충돌을 일으킬 수 있습니다.

          • GPU 인덱스는 범위 검색그룹화 검색과 같은 검색 기능을 지원하지 않습니다.

          @@ -227,5 +227,5 @@ collection.search(
        • GPU 인덱스는 언제 사용하는 것이 적절하나요?

          GPU 인덱스는 특히 높은 처리량이나 높은 리콜이 필요한 상황에서 유용합니다. 예를 들어, 대규모 배치를 처리할 때 GPU 인덱싱의 처리량은 CPU 인덱싱의 처리량을 100배까지 능가할 수 있습니다. 배치 규모가 작은 시나리오에서는 여전히 성능 면에서 GPU 인덱싱이 CPU 인덱싱을 크게 앞섭니다. 또한, 빠른 데이터 삽입이 요구되는 경우, GPU를 통합하면 인덱스 구축 프로세스의 속도를 크게 높일 수 있습니다.

        • CAGRA, GPU_IVF_PQ, GPU_IVF_FLAT, GPU_BRUTE_FORCE와 같은 GPU 인덱스는 어떤 시나리오에 가장 적합할까요?

          -

          CAGRA 인덱스는 더 많은 메모리를 사용하지만 향상된 성능을 요구하는 시나리오에 이상적입니다. 메모리 보존이 우선시되는 환경에서는 GPU_IVF_PQ 인덱스가 스토리지 요구 사항을 최소화하는 데 도움이 될 수 있지만, 정밀도 손실이 더 높습니다. GPU_IVF_FLAT 인덱스는 성능과 메모리 사용량 사이의 절충점을 제공하는 균형 잡힌 옵션입니다. 마지막으로 GPU_BRUTE_FORCE 인덱스는 철저한 검색 작업을 위해 설계되었으며, 순회 검색을 수행하여 1의 리콜률을 보장합니다.

        • +

          CAGRA 인덱스는 더 많은 메모리를 사용하지만 향상된 성능을 요구하는 시나리오에 이상적입니다. 메모리 보존이 우선시되는 환경에서는 GPU_IVF_PQ 인덱스가 스토리지 요구 사항을 최소화하는 데 도움이 될 수 있지만, 정밀도 손실이 더 높습니다. GPU_IVF_FLAT 인덱스는 성능과 메모리 사용량 사이의 절충점을 제공하는 균형 잡힌 옵션으로 사용됩니다. 마지막으로 GPU_BRUTE_FORCE 인덱스는 철저한 검색 작업을 위해 설계되었으며, 순회 검색을 수행하여 1의 리콜률을 보장합니다.

        diff --git a/localization/v2.4.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.4.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.json index 65ce6cfec..efea2ba53 100644 --- a/localization/v2.4.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.4.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Executar o Milvus no Kubernetes com o Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Visão geral","href":"Overview","type":2,"isActive":false},{"label":"Pré-requisitos","href":"Prerequisites","type":2,"isActive":false},{"label":"Instalar o Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Instalação online","href":"Online-install","type":2,"isActive":false},{"label":"Instalação offline","href":"Offline-install","type":2,"isActive":false},{"label":"Atualizar o cluster do Milvus em execução","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Desinstalar o Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"O que vem a seguir","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Executar o Milvus no Kubernetes com o Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Visão geral","href":"Overview","type":2,"isActive":false},{"label":"Pré-requisitos","href":"Prerequisites","type":2,"isActive":false},{"label":"Instalar o Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Instalação online","href":"Online-install","type":2,"isActive":false},{"label":"Instalação offline","href":"Offline-install","type":2,"isActive":false},{"label":"Atualizar o cluster do Milvus em execução","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Desinstalar o Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"O que vem a seguir","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.4.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.md index 3bf77f479..d91a561e0 100644 --- a/localization/v2.4.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.4.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -62,7 +62,7 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI standard (default) k8s.io/minikube-hostpath Delete Immediate false
      • Verifique os requisitos de hardware e software antes da instalação.

      • -
      • Antes de instalar o Milvus, é recomendável usar a Milvus Sizing Tool para estimar os requisitos de hardware com base no tamanho dos seus dados. Isso ajuda a garantir o desempenho ideal e a alocação de recursos para a instalação do Milvus.

      • +
      • Antes de instalar o Milvus, recomenda-se a utilização da Milvus Sizing Tool para estimar os requisitos de hardware com base no tamanho dos dados. Isso ajuda a garantir o desempenho ideal e a alocação de recursos para a instalação do Milvus.

      Se encontrar algum problema ao puxar a imagem, contacte-nos em community@zilliz.com com detalhes sobre o problema, e nós forneceremos o suporte necessário.

      @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

      Antes de instalar o Milvus Helm Charts, é necessário adicionar o repositório Milvus Helm.

      -
      $ helm repo add milvus https://github.com/zilliztech/milvus-helm
      +
      $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
       

      O repositório do Milvus Helm Charts em https://github.com/milvus-io/milvus-helm foi arquivado e pode obter mais actualizações em https://github.com/zilliztech/milvus-helm da seguinte forma:

      -
      helm repo add zilliztech https://github.com/zilliztech/milvus-helm
      +
      helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
       helm repo update
       # upgrade existing helm release
       helm upgrade my-release zilliztech/milvus
      @@ -275,7 +275,7 @@ $ helm upgrade my-release zilliztech/milvus
       
    • Pesquisa híbrida
  • Atualizar o Milvus usando o Helm Chart.

  • -
  • Escalar seu cluster Milvus.

  • +
  • Escalar o cluster do Milvus.

  • Implantar seu cluster Milvus em nuvens:

    • Amazon EKS
    • diff --git a/localization/v2.4.x/site/pt/userGuide/manage-indexes/index-with-gpu.json b/localization/v2.4.x/site/pt/userGuide/manage-indexes/index-with-gpu.json index 2de4c1515..02e9c20c0 100644 --- a/localization/v2.4.x/site/pt/userGuide/manage-indexes/index-with-gpu.json +++ b/localization/v2.4.x/site/pt/userGuide/manage-indexes/index-with-gpu.json @@ -1 +1 @@ -{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"","anchorList":[{"label":"Índice com GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Configurar as definições do Milvus para o controlo da memória da GPU","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Criar um índice","href":"Build-an-index","type":2,"isActive":false},{"label":"Pesquisar","href":"Search","type":2,"isActive":false},{"label":"Limites","href":"Limits","type":2,"isActive":false},{"label":"PERGUNTAS FREQUENTES","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"Index with GPU","anchorList":[{"label":"Índice com GPU","href":"Index-with-GPU","type":1,"isActive":false},{"label":"Configurar as definições do Milvus para o controlo da memória da GPU","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"Criar um índice","href":"Build-an-index","type":2,"isActive":false},{"label":"Pesquisar","href":"Search","type":2,"isActive":false},{"label":"Limites","href":"Limits","type":2,"isActive":false},{"label":"PERGUNTAS FREQUENTES","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/pt/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/pt/userGuide/manage-indexes/index-with-gpu.md index b5d8d15f1..3dbdf4542 100644 --- a/localization/v2.4.x/site/pt/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.4.x/site/pt/userGuide/manage-indexes/index-with-gpu.md @@ -203,7 +203,7 @@ collection.search(

      Ao usar índices de GPU, esteja ciente de certas restrições:

        -
      • Para GPU_IVF_FLAT, o valor máximo para o limite é 256.

      • +
      • Para GPU_IVF_FLAT, o valor máximo para o limite é 1024.

      • Para GPU_IVF_PQ e GPU_CAGRA, o valor máximo para o limite é 1024.

      • Embora não exista um limite definido para o limite em GPU_BRUTE_FORCE, recomenda-se que não exceda 4096 para evitar potenciais problemas de desempenho.

      • Atualmente, os índices GPU não suportam a distância COSINE. Se a distância COSINE for necessária, os dados devem ser normalizados primeiro e, em seguida, a distância do produto interno (IP) pode ser usada como um substituto.

      • diff --git a/localization/v2.4.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.4.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.json index c26ab5ee3..f07b3539c 100644 --- a/localization/v2.4.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.4.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"使用 Helm 在 Kubernetes 中运行 Milvus","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"概述","href":"Overview","type":2,"isActive":false},{"label":"前提条件","href":"Prerequisites","type":2,"isActive":false},{"label":"安装 Milvus Helm 图表","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"在线安装","href":"Online-install","type":2,"isActive":false},{"label":"离线安装","href":"Offline-install","type":2,"isActive":false},{"label":"升级运行中的 Milvus 群集","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"卸载 Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"下一步","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"使用 Helm 在 Kubernetes 中运行 Milvus","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"概述","href":"Overview","type":2,"isActive":false},{"label":"前提条件","href":"Prerequisites","type":2,"isActive":false},{"label":"安装 Milvus Helm 图表","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"在线安装","href":"Online-install","type":2,"isActive":false},{"label":"离线安装","href":"Offline-install","type":2,"isActive":false},{"label":"升级运行中的 Milvus 群集","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"卸载 Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"下一步","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.4.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.md index 1ed271553..d21f76f76 100644 --- a/localization/v2.4.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.4.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

        在安装 Milvus Helm 图表之前,您需要添加 Milvus Helm 资源库。

        -
        $ helm repo add milvus https://github.com/zilliztech/milvus-helm
        +
        $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
         

        位于https://github.com/milvus-io/milvus-helm 的 Milvus Helm Charts 软件仓库已经存档,您可以从https://github.com/zilliztech/milvus-helm 获取进一步更新,具体如下:

        -
        helm repo add zilliztech https://github.com/zilliztech/milvus-helm
        +
        helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
         helm repo update
         # upgrade existing helm release
         helm upgrade my-release zilliztech/milvus
        @@ -117,12 +117,12 @@ helm upgrade my-release zilliztech/milvus
         
        $ helm install my-release milvus/milvus
         

        在上述命令中,my-release 是版本名称,milvus/milvus 是本地安装的图表版本库。要使用其他名称,请将my-release 替换为您认为合适的名称。

        -

        上述命令使用默认配置部署 Milvus 群集及其组件和依赖项。要自定义这些设置,我们建议你使用Milvus 大小调整工具,根据实际数据大小调整配置,然后下载相应的 YAML 文件。要了解有关配置参数的更多信息,请参阅Milvus 系统配置检查表

        +

        上述命令使用默认配置部署 Milvus 群集及其组件和依赖项。要自定义这些设置,我们建议你使用Milvus 大小工具,根据实际数据大小调整配置,然后下载相应的 YAML 文件。要了解有关配置参数的更多信息,请参阅Milvus 系统配置检查表

        • 版本名称只能包含字母、数字和破折号。版本名称中不允许使用点。
        • 在使用 Helm 安装 Milvus 时,默认命令行会安装群集版本的 Milvus。独立安装 Milvus 时需要进一步设置。
        • -
        • 根据Kubernetes 过时的 API 迁移指南,从 v1.25 起,PodDisruptionBudget 的policy/v1beta1API 版本不再提供服务。建议迁移清单和 API 客户端,改用policy/v1API 版本。
          对于仍在 Kubernetes v1.25 及更高版本上使用 PodDisruptionBudget 的policy/v1beta1API 版本的用户,作为一种变通方法,您可以运行以下命令来安装 Milvus:
          helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
        • +
        • 根据Kubernetes 过时的 API 迁移指南,自 v1.25 起,PodDisruptionBudget 的policy/v1beta1API 版本不再提供服务。建议迁移清单和 API 客户端,改用policy/v1API 版本。
          对于仍在 Kubernetes v1.25 及更高版本上使用 PodDisruptionBudget 的policy/v1beta1API 版本的用户,作为一种变通方法,您可以运行以下命令来安装 Milvus:
          helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
        • 更多信息请参见Milvus Helm ChartHelm
        @@ -195,7 +195,7 @@ my-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s
      • 要更改 Milvus 配置,请下载 value.yaml模板,将所需设置放入其中,然后使用helm template -f values.yaml my-release milvus/milvus > milvus_manifest.yaml 渲染相应的清单。
      -

      2.下载图像提取脚本

      图像提取脚本是用 Python 开发的。您应在requirement.txt 文件中下载脚本及其依赖项。

      +

      2.下载图像拉取脚本

      图像提取脚本是用 Python 开发的。您应在requirement.txt 文件中下载该脚本及其依赖项。

      $ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt
       $ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py
       
      diff --git a/localization/v2.4.x/site/zh/userGuide/manage-indexes/index-with-gpu.json b/localization/v2.4.x/site/zh/userGuide/manage-indexes/index-with-gpu.json index 6bbaac675..368d5381d 100644 --- a/localization/v2.4.x/site/zh/userGuide/manage-indexes/index-with-gpu.json +++ b/localization/v2.4.x/site/zh/userGuide/manage-indexes/index-with-gpu.json @@ -1 +1 @@ -{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"","anchorList":[{"label":"使用 GPU 建立索引","href":"Index-with-GPU","type":1,"isActive":false},{"label":"为 GPU 内存控制配置 Milvus 设置","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"建立索引","href":"Build-an-index","type":2,"isActive":false},{"label":"搜索","href":"Search","type":2,"isActive":false},{"label":"限制","href":"Limits","type":2,"isActive":false},{"label":"常见问题","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["gpu:\n initMemSize: 0 #set the initial memory pool size.\n maxMemSize: 0 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","gpu:\n initMemSize: 2048 #set the initial memory pool size.\n maxMemSize: 4096 #maxMemSize sets the maximum memory usage limit. When the memory usage exceed initMemSize, Milvus will attempt to expand the memory pool. \n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_CAGRA\",\n \"params\": {\n 'intermediate_graph_degree': 64,\n 'graph_degree': 32\n }\n}\n","index_params = {\n \"metric_type\": \"L2\",\n \"index_type\": \"GPU_IVF_FLAT\", # Or GPU_IVF_PQ\n \"params\": {\n \"nlist\": 1024\n }\n}\n","index_params = {\n 'index_type': 'GPU_BRUTE_FORCE',\n 'metric_type': 'L2',\n 'params': {}\n}\n","# Get an existing collection\ncollection = Collection(\"YOUR_COLLECTION_NAME\")\n\ncollection.create_index(\n field_name=\"vector\", # Name of the vector field on which an index is built\n index_params=index_params\n)\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {}\n}\n","search_params = {\n \"metric_type\": \"L2\",\n \"params\": {\n \"itopk_size\": 128,\n \"search_width\": 4,\n \"min_iterations\": 0,\n \"max_iterations\": 0,\n \"team_size\": 0\n }\n}\n","search_params = {\n \"metric_type\": \"L2\", \n \"params\": {\"nprobe\": 10}\n}\n","# Load data into memory\ncollection.load()\n\ncollection.search(\n data=[[query_vector]], # Your query vector\n anns_field=\"vector\", # Name of the vector field\n param=search_params,\n limit=100 # Number of the results to return\n)\n"],"headingContent":"Index with GPU","anchorList":[{"label":"使用 GPU 建立索引","href":"Index-with-GPU","type":1,"isActive":false},{"label":"为 GPU 内存控制配置 Milvus 设置","href":"Configure-Milvus-settings-for-GPU-memory-control","type":2,"isActive":false},{"label":"建立索引","href":"Build-an-index","type":2,"isActive":false},{"label":"搜索","href":"Search","type":2,"isActive":false},{"label":"限制","href":"Limits","type":2,"isActive":false},{"label":"常见问题","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/zh/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.4.x/site/zh/userGuide/manage-indexes/index-with-gpu.md index a477980d4..35cf0dcea 100644 --- a/localization/v2.4.x/site/zh/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.4.x/site/zh/userGuide/manage-indexes/index-with-gpu.md @@ -2,7 +2,7 @@ id: index-with-gpu.md order: 3 summary: 本指南介绍如何在 Milvus 中建立支持 GPU 的索引,以提高搜索性能。 -title: 使用 GPU 的索引 +title: 使用 GPU 建立索引 ---

      使用 GPU 建立索引

      Milvus 使用全局图形内存池分配 GPU 内存。

      它支持Milvus 配置文件中的两个参数initMemSizemaxMemSize 。显存池大小初始设置为initMemSize ,超过此限制后将自动扩展至maxMemSize

      -

      当 Milvus 启动时,默认initMemSize 是可用 GPU 内存的 1/2,默认maxMemSize 等于所有可用 GPU 内存。

      +

      Milvus 启动时,默认initMemSize 为可用 GPU 内存的 1/2,默认maxMemSize 等于所有可用 GPU 内存。

      在 Milvus 2.4.1(包括 2.4.1 版)之前,Milvus 使用统一的 GPU 内存池。对于 2.4.1 之前的版本(包括 2.4.1 版),建议将这两个值都设为 0。

      gpu:
         initMemSize: 0 #set the initial memory pool size.
      @@ -64,9 +64,9 @@ title: 使用 GPU 的索引
               >
             
           

      以下示例演示了如何建立不同类型的 GPU 索引。

      -

      准备索引参数

      设置 GPU 索引参数时,请定义index_typemetric_ typeparams

      +

      准备索引参数

      设置 GPU 索引参数时,请定义index_typemetric_typeparams

        -
      • index_type(字符串):用于加速向量搜索的索引类型。有效选项包括GPU_CAGRAGPU_IVF_FLATGPU _ IVF_PQGPU_BRUTE_FORCE

      • +
      • index_type(字符串):用于加速向量搜索的索引类型。有效选项包括GPU_CAGRAGPU_IVF_FLATGPU_IVF_PQGPU_BRUTE_FORCE

      • metric_type(字符串):用于衡量向量相似性的度量类型。有效选项为IPL2

      • params(dict):特定于索引构建参数:特定于索引的构建参数。该参数的有效选项取决于索引类型。

      @@ -91,7 +91,7 @@ title: 使用 GPU 的索引
    • IVF_PQ:提供更高的质量,但构建时间较慢。

    • NN_DESCENT:提供更快的生成速度,但可能会降低召回率。

  • -
  • cache_dataset_on_device(字符串"true "|"false"):决定是否在 GPU 内存中缓存原始数据集。将其设置为"true "可通过细化搜索结果提高召回率,而将其设置为"false "则可节省 GPU 内存。

  • +
  • cache_dataset_on_device(字符串"true"|"false"):决定是否在 GPU 内存中缓存原始数据集。将其设置为"true "可通过细化搜索结果提高召回率,而将其设置为"false "则可节省 GPU 内存。

  • GPU_IVF_FLATGPU_IVF_PQ索引

    index_params = {
    @@ -163,7 +163,7 @@ collection.create_index(
     
  • itopk_size:决定搜索过程中保留的中间结果的大小。较大的值可能会提高召回率,但会降低搜索性能。它至少应等于最终的 top-k(极限)值,通常是 2 的幂次(如 16、32、64、128)。

  • search_width:指定搜索过程中进入 CAGRA 图的入口点数量。增加该值可以提高召回率,但可能会影响搜索性能。

  • min_iterations/max_ iterations:这些参数控制搜索迭代过程。默认情况下,它们被设置为0,CAGRA 会根据itopk_sizesearch_width 自动确定迭代次数。手动调整这些值有助于平衡性能和准确性。

  • -
  • team_size团队规模):指定用于在 GPU 上计算度量距离的 CUDA 线程数。常用值是 2 的幂次,最高为 32(如 2、4、8、16、32)。它对搜索性能影响不大。默认值为0,Milvus 会根据向量维度自动选择team_size

  • +
  • team_size团队规模):指定用于在 GPU 上计算度量距离的 CUDA 线程数。常用值为 2 的幂次,最高为 32(例如 2、4、8、16、32)。它对搜索性能影响不大。默认值为0,Milvus 会根据向量维度自动选择team_size

  • GPU_IVF_FLATGPU_IVF_PQ索引

    search_params = {
    @@ -201,7 +201,7 @@ collection.search(
           
         

    使用 GPU 索引时,请注意某些限制:

      -
    • 对于GPU_IVF_FLAT限制的最大值为 256。

    • +
    • 对于GPU_IVF_FLAT限制的最大值为 1024。

    • 对于GPU_IVF_PQGPU_CAGRAlimit的最大值为 1024。

    • 虽然GPU_BRUTE_FORCE 没有设定限制,但建议不要超过 4096,以避免潜在的性能问题。

    • 目前,GPU 索引不支持 COSINE 距离。如果需要使用 COSINE 距离,应首先对数据进行归一化处理,然后使用内积(IP)距离作为替代。

    • diff --git a/localization/v2.5.x/site/de/adminGuide/upgrade-pulsar-v3.json b/localization/v2.5.x/site/de/adminGuide/upgrade-pulsar-v3.json index 7132f7d21..caa903b0e 100644 --- a/localization/v2.5.x/site/de/adminGuide/upgrade-pulsar-v3.json +++ b/localization/v2.5.x/site/de/adminGuide/upgrade-pulsar-v3.json @@ -1 +1 @@ -{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n\"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\ncreationTimestamp: \"2024-11-22T08:06:59Z\"​\nfinalizers:​\n- milvus.milvus.io/finalizer​\ngeneration: 3​\nlabels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\ncomponents:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\ndependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\nenabled: false # set to false​\n# you may also clean up rest fields under pulsar field​\n# it's ok to keep them though.​\npulsarv3:​\nenabled: true​\n# append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations: null # this field should be removed or set to null​\nresourceVersion: null # this field should be removed or set to null​\nuid: null # this field should be removed or set to null​\nspec:​\ndependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Aufrüstung von Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Fahrplan","href":"Roadmap","type":2,"isActive":false},{"label":"Prozeduren","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n \"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n ]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\n creationTimestamp: \"2024-11-22T08:06:59Z\"​\n finalizers:​\n - milvus.milvus.io/finalizer​\n generation: 3​\n labels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\n components:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\n dependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\n enabled: false # set to false​\n # you may also clean up rest fields under pulsar field​\n # it's ok to keep them though.​\npulsarv3:​\n enabled: true​\n # append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations: null # this field should be removed or set to null​\n resourceVersion: null # this field should be removed or set to null​\n uid: null # this field should be removed or set to null​\nspec:​\n dependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Aufrüstung von Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Fahrplan","href":"Roadmap","type":2,"isActive":false},{"label":"Prozeduren","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/de/adminGuide/upgrade-pulsar-v3.md b/localization/v2.5.x/site/de/adminGuide/upgrade-pulsar-v3.md index 64d61bebc..45d2d0aa2 100644 --- a/localization/v2.5.x/site/de/adminGuide/upgrade-pulsar-v3.md +++ b/localization/v2.5.x/site/de/adminGuide/upgrade-pulsar-v3.md @@ -113,11 +113,11 @@ Forwarding from 127.

    Ausgabe.

    {​
    -"segmentIDs": [​
    +  "segmentIDs": [​
         454097953998181000,​
         454097953999383600,​
         454097953998180800​
    -]​
    +  ]​
     }​
     
     
  • @@ -223,15 +223,15 @@ head milvus.yaml -n 20
    apiVersion: milvus.io/v1beta1​
     kind: Milvus​
     metadata:​
    -annotations:​
    +  annotations:​
         milvus.io/dependency-values-merged: "true"​
         milvus.io/pod-service-label-added: "true"​
         milvus.io/querynode-current-group-id: "0"​
    -creationTimestamp: "2024-11-22T08:06:59Z"​
    -finalizers:​
    -- milvus.milvus.io/finalizer​
    -generation: 3​
    -labels:​
    +  creationTimestamp: "2024-11-22T08:06:59Z"​
    +  finalizers:​
    +  - milvus.milvus.io/finalizer​
    +  generation: 3​
    +  labels:​
         app: milvus​
         milvus.io/operator-version: 1.1.2​
     name: my-release​
    @@ -239,23 +239,23 @@ namespace: default​
     resourceVersion: "692217324"​
     uid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​
     spec:​
    -components:​
    +  components:​
     
     
  • Erstellen Sie eine patch.yaml Datei mit folgendem Inhalt.

    # a patch to retain etcd & storage data and delete pulsar data while delete milvus​
     spec:​
    -dependencies:​
    +  dependencies:​
         etcd:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         storage:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         pulsar:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Delete​
             pvcDeletion: true​
     
    @@ -302,12 +302,12 @@ milvus.milvus.io "my-release" deleted
     
  • Bearbeiten Sie die im vorherigen Schritt gespeicherte values.yaml.

    # change the following:​
     pulsar:​
    -enabled: false # set to false​
    -# you may also clean up rest fields under pulsar field​
    -# it's ok to keep them though.​
    +  enabled: false # set to false​
    +  # you may also clean up rest fields under pulsar field​
    +  # it's ok to keep them though.​
     pulsarv3:​
    -enabled: true​
    -# append other values for pulsar v3 chart if needs​
    +  enabled: true​
    +  # append other values for pulsar v3 chart if needs​
     
     
  • Aktualisieren Sie Ihr lokales Helm-Repositorium

    @@ -368,13 +368,13 @@ my-release-pulsarv3-zookeeper-2 < apiVersion: milvus.io/v1beta1​ kind: Milvus​ metadata:​ -annotations: null # this field should be removed or set to null​ -resourceVersion: null # this field should be removed or set to null​ -uid: null # this field should be removed or set to null​ + annotations: null # this field should be removed or set to null​ + resourceVersion: null # this field should be removed or set to null​ + uid: null # this field should be removed or set to null​ spec:​ -dependencies:​ + dependencies:​ pulsar:​ - inCluster:​ + inCluster:​ chartVersion: pulsar-v3​ # delete all previous values for pulsar v2 and set it to null.​ # you may add additional values here for pulsar v3 if you're sure about it.​ diff --git a/localization/v2.5.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.5.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.json index cc7954da6..836c8dc88 100644 --- a/localization/v2.5.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.5.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Milvus in Kubernetes mit Helm starten","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Übersicht","href":"Overview","type":2,"isActive":false},{"label":"Voraussetzungen","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus Helm Chart installieren","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Online-Installation","href":"Online-install","type":2,"isActive":false},{"label":"Offline-Installation","href":"Offline-install","type":2,"isActive":false},{"label":"Upgrade des laufenden Milvus-Clusters","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvus deinstallieren","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Wie geht es weiter?","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Milvus in Kubernetes mit Helm starten","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Übersicht","href":"Overview","type":2,"isActive":false},{"label":"Voraussetzungen","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus Helm Chart installieren","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Online-Installation","href":"Online-install","type":2,"isActive":false},{"label":"Offline-Installation","href":"Offline-install","type":2,"isActive":false},{"label":"Upgrade des laufenden Milvus-Clusters","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvus deinstallieren","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Wie geht es weiter?","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.5.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.md index ecfbcb76e..ed64adbe1 100644 --- a/localization/v2.5.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.5.x/site/de/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

    Bevor Sie Milvus Helm Charts installieren, müssen Sie das Milvus Helm Repository hinzufügen.

    -
    $ helm repo add milvus https://github.com/zilliztech/milvus-helm
    +
    $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
     

    Das Milvus Helm Charts-Repository unter https://github.com/milvus-io/milvus-helm wurde archiviert und Sie können weitere Aktualisierungen wie folgt von https://github.com/zilliztech/milvus-helm erhalten:

    -
    helm repo add zilliztech https://github.com/zilliztech/milvus-helm
    +
    helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
     helm repo update
     # upgrade existing helm release
     helm upgrade my-release zilliztech/milvus
    @@ -120,9 +120,9 @@ helm upgrade my-release zilliztech/milvus
     

    Mit dem obigen Befehl wird ein Milvus-Cluster mit seinen Komponenten und Abhängigkeiten unter Verwendung von Standardkonfigurationen bereitgestellt. Um diese Einstellungen anzupassen, empfehlen wir Ihnen, das Milvus Sizing Tool zu verwenden, um die Konfigurationen basierend auf Ihrer tatsächlichen Datengröße anzupassen und dann die entsprechende YAML-Datei herunterzuladen. Weitere Informationen zu den Konfigurationsparametern finden Sie in der Milvus System Configurations Checklist.

      -
    • Der Versionsname sollte nur Buchstaben, Zahlen und Bindestriche enthalten. Punkte sind im Versionsnamen nicht erlaubt.
    • +
    • Der Release-Name sollte nur Buchstaben, Zahlen und Bindestriche enthalten. Punkte sind im Versionsnamen nicht erlaubt.
    • Die Standard-Befehlszeile installiert die Cluster-Version von Milvus bei der Installation von Milvus mit Helm. Bei der Installation von Milvus als Einzelplatzversion sind weitere Einstellungen erforderlich.
    • -
    • Gemäß dem veralteten API-Migrationsleitfaden von Kubernetes wird die API-Version policy/v1beta1 von PodDisruptionBudget ab v1.25 nicht mehr unterstützt. Es wird empfohlen, Manifeste und API-Clients zu migrieren, um stattdessen die policy/v1-API-Version zu verwenden.
      Als Abhilfe für Benutzer, die noch die API-Version policy/v1beta1 von PodDisruptionBudget auf Kubernetes v1.25 und später verwenden, können Sie stattdessen den folgenden Befehl ausführen, um Milvus zu installieren:
      helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
    • +
    • Gemäß dem veralteten API-Migrationsleitfaden von Kubernetes wird die API-Version policy/v1beta1 von PodDisruptionBudget ab v1.25 nicht mehr unterstützt. Es wird empfohlen, Manifeste und API-Clients zu migrieren, um stattdessen die policy/v1-API-Version zu verwenden.
      Als Workaround für Benutzer, die noch die API-Version policy/v1beta1 von PodDisruptionBudget auf Kubernetes v1.25 und später verwenden, können Sie stattdessen den folgenden Befehl ausführen, um Milvus zu installieren:
      helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
    • Siehe Milvus Helm Chart und Helm für weitere Informationen.
    @@ -204,7 +204,7 @@ $ wget https://raw.git $ python3 save_image.py --manifest milvus_manifest.yaml

    Die Bilder werden in einem Unterordner namens images im aktuellen Verzeichnis gespeichert.

    -

    4. Bilder laden

    Sie können die Bilder nun wie folgt auf die Hosts in der netzbeschränkten Umgebung laden:

    +

    4. Bilder laden

    Sie können nun die Bilder wie folgt auf die Hosts in der netzbeschränkten Umgebung laden:

    $ for image in $(find . -type f -name "*.tar.gz") ; do gunzip -c $image | docker load; done
     

    5. Milvus bereitstellen

    $ kubectl apply -f milvus_manifest.yaml
    @@ -283,7 +283,7 @@ $ helm upgrade my-release zilliztech/milvus
     
  • Microsoft Azure
  • Erkunden Sie Milvus Backup, ein Open-Source-Tool für Milvus-Datensicherungen.

  • -
  • Birdwatcher, ein Open-Source-Tool zur Fehlersuche in Milvus und für dynamische Konfigurations-Updates.

  • +
  • Birdwatcher, ein Open-Source-Tool zum Debuggen von Milvus und dynamischen Konfigurations-Updates.

  • Entdecken Sie Attu, ein Open-Source-GUI-Tool für die intuitive Milvus-Verwaltung.

  • Überwachen Sie Milvus mit Prometheus.

  • diff --git a/localization/v2.5.x/site/de/home/home.md b/localization/v2.5.x/site/de/home/home.md index 90075f8e4..4726451df 100644 --- a/localization/v2.5.x/site/de/home/home.md +++ b/localization/v2.5.x/site/de/home/home.md @@ -114,7 +114,7 @@ Hier erfahren Sie, was Milvus ist und wie Sie Milvus installieren, verwenden und

    Nov 2024 - Veröffentlichung von Milvus 2.5.0

    • Anleitung für die Durchführung einer Volltextsuche hinzugefügt.
    • -
    • Es wurde eine Anleitung hinzugefügt, wie man einen Schlüsselwortabgleich durchführt.
    • +
    • Anleitung zur Durchführung eines Textabgleichs hinzugefügt.
    • Anleitung hinzugefügt, wie man nullbare und Standardwerte aktiviert.
    • Beschreibungen der Analyzer wurden hinzugefügt.
    • Beschreibungen von Bitmap-Indizes hinzugefügt.
    • diff --git a/localization/v2.5.x/site/de/menuStructure/de.json b/localization/v2.5.x/site/de/menuStructure/de.json index 379f8ff81..92d6762f3 100644 --- a/localization/v2.5.x/site/de/menuStructure/de.json +++ b/localization/v2.5.x/site/de/menuStructure/de.json @@ -286,6 +286,18 @@ "order": 8, "children": [] }, + { + "label": "Metrische Typen", + "id": "metric.md", + "order": 9, + "children": [] + }, + { + "label": "Konsistenzstufe", + "id": "consistency.md", + "order": 10, + "children": [] + }, { "label": "In-Memory-Replik", "id": "replica.md", @@ -602,31 +614,39 @@ ] }, { - "label": "Indizes verwalten", + "label": "Indizes", "id": "manage_indexes", "order": 4, "isMenu": true, "children": [ { - "label": "Index-Vektorfelder", + "label": "Vektor-Indizes", "id": "index-vector-fields.md", "order": 0, "children": [] }, { - "label": "Index Skalarfelder", - "id": "index-scalar-fields.md", + "label": "Skalare Indizes", + "id": "scalar-index", "order": 1, - "children": [] - }, - { - "label": "BITMAP-Index", - "id": "bitmap.md", - "order": 2, - "children": [] + "isMenu": true, + "children": [ + { + "label": "Index Skalarfelder", + "id": "index-scalar-fields.md", + "order": 1, + "children": [] + }, + { + "label": "Bitmap-Index", + "id": "bitmap.md", + "order": 2, + "children": [] + } + ] }, { - "label": "Index mit GPU", + "label": "GPU-fähige Indizes", "id": "index-with-gpu.md", "order": 3, "children": [] @@ -682,7 +702,7 @@ "children": [] }, { - "label": "Schlüsselwort-Match", + "label": "Textübereinstimmung", "id": "keyword-match.md", "order": 7, "children": [] @@ -699,30 +719,6 @@ "order": 9, "children": [] }, - { - "label": "mmap verwenden", - "id": "mmap.md", - "order": 10, - "children": [] - }, - { - "label": "Clustering Verdichtung", - "id": "clustering-compaction.md", - "order": 11, - "children": [] - }, - { - "label": "Konsistenzstufe", - "id": "consistency.md", - "order": 12, - "children": [] - }, - { - "label": "Metrische Typen", - "id": "metric.md", - "order": 13, - "children": [] - }, { "label": "Filterung von Metadaten", "id": "boolean.md", @@ -736,26 +732,6 @@ "children": [] } ] - }, - { - "label": "Datenimport", - "id": "data_import", - "order": 6, - "isMenu": true, - "children": [ - { - "label": "Quelldaten vorbereiten", - "id": "prepare-source-data.md", - "order": 0, - "children": [] - }, - { - "label": "Daten importieren", - "id": "import-data.md", - "order": 1, - "children": [] - } - ] } ] }, @@ -897,11 +873,31 @@ } ] }, + { + "label": "Datenimport", + "id": "data_import", + "order": 5, + "isMenu": true, + "children": [ + { + "label": "Quelldaten vorbereiten", + "id": "prepare-source-data.md", + "order": 0, + "children": [] + }, + { + "label": "Daten importieren", + "id": "import-data.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Milvus-Migration", "id": "milvus_migration", "isMenu": true, - "order": 5, + "order": 6, "children": [ { "label": "Übersicht", @@ -1321,10 +1317,30 @@ } ] }, + { + "label": "Optimierung der Speicherung", + "id": "storage_optimization", + "order": 10, + "isMenu": true, + "children": [ + { + "label": "mmap verwenden", + "id": "mmap.md", + "order": 0, + "children": [] + }, + { + "label": "Clustering Verdichtung", + "id": "clustering-compaction.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Sicherheit", "id": "security", - "order": 10, + "order": 11, "isMenu": true, "children": [ { diff --git a/localization/v2.5.x/site/de/release_notes.md b/localization/v2.5.x/site/de/release_notes.md index ab48739bc..a8fa4d16b 100644 --- a/localization/v2.5.x/site/de/release_notes.md +++ b/localization/v2.5.x/site/de/release_notes.md @@ -49,8 +49,8 @@ title: Hinweise zur Veröffentlichung

      Einzelheiten finden Sie unter Volltextsuche.

      Cluster Management WebUI (Beta)

      Um massive Datenmengen und umfangreiche Funktionen besser zu unterstützen, umfasst das ausgeklügelte Design von Milvus verschiedene Abhängigkeiten, zahlreiche Knotenrollen, komplexe Datenstrukturen und mehr. Diese Aspekte können eine Herausforderung für die Nutzung und Wartung darstellen.

      Milvus 2.5 führt eine integrierte Cluster Management WebUI ein, die die Schwierigkeiten bei der Systemwartung reduziert, indem sie die komplexen Informationen der Milvus-Laufzeitumgebung visualisiert. Dazu gehören Details zu Datenbanken und Sammlungen, Segmenten, Kanälen, Abhängigkeiten, Knotenstatus, Aufgabeninformationen, langsame Abfragen und vieles mehr.

      -

      Textabgleich

      Milvus 2.5 nutzt die Analyse- und Indizierungsfunktionen von Tantivy für die Textvorverarbeitung und den Aufbau von Indizes und unterstützt den präzisen Abgleich von Textdaten in natürlicher Sprache auf der Grundlage bestimmter Begriffe. Diese Funktion wird in erster Linie für die gefilterte Suche nach bestimmten Bedingungen verwendet und kann skalare Filterung zur Verfeinerung von Abfrageergebnissen einbeziehen, was Ähnlichkeitssuchen innerhalb von Vektoren ermöglicht, die skalare Kriterien erfüllen.

      -

      Weitere Informationen finden Sie unter Schlüsselwortabgleich.

      +

      Textabgleich

      Milvus 2.5 nutzt die Analyse- und Indizierungsfunktionen von Tantivy für die Textvorverarbeitung und den Aufbau von Indizes und unterstützt den präzisen Abgleich von Textdaten in natürlicher Sprache auf der Grundlage bestimmter Begriffe. Diese Funktion wird in erster Linie für die gefilterte Suche nach bestimmten Bedingungen verwendet und kann skalare Filter zur Verfeinerung der Abfrageergebnisse einbeziehen, so dass Ähnlichkeitssuchen innerhalb von Vektoren, die skalare Kriterien erfüllen, möglich sind.

      +

      Einzelheiten finden Sie unter Textabgleich.

      Bitmap-Index

      Ein neuer skalarer Datenindex wurde der Milvus-Familie hinzugefügt. Der BitMap-Index verwendet ein Array von Bits, dessen Länge der Anzahl der Zeilen entspricht, um die Existenz von Werten darzustellen und die Suche zu beschleunigen.

      Bitmap-Indizes haben sich traditionell bei Feldern mit niedriger Kardinalität bewährt, die nur eine geringe Anzahl unterschiedlicher Werte haben, z. B. eine Spalte mit Geschlechtsinformationen, die nur zwei mögliche Werte enthält: männlich und weiblich.

      Weitere Informationen finden Sie unter Bitmap-Index.

      diff --git a/localization/v2.5.x/site/de/tutorials/hybrid_search_with_milvus.md b/localization/v2.5.x/site/de/tutorials/hybrid_search_with_milvus.md index e5306629f..b6bb40468 100644 --- a/localization/v2.5.x/site/de/tutorials/hybrid_search_with_milvus.md +++ b/localization/v2.5.x/site/de/tutorials/hybrid_search_with_milvus.md @@ -25,8 +25,8 @@ title: Hybride Suche mit Milvus

      Milvus unterstützt dichte, spärliche und hybride Retrievalmethoden:

      • Dichtes Retrieval: Nutzt den semantischen Kontext, um die Bedeutung hinter den Abfragen zu verstehen.
      • -
      • Sparse Retrieval: Konzentriert sich auf den Abgleich von Schlüsselwörtern, um Ergebnisse auf der Grundlage bestimmter Begriffe zu finden, was einer Volltextsuche entspricht.
      • -
      • Hybrides Retrieval: Kombiniert sowohl Dense- als auch Sparse-Ansätze und erfasst den vollständigen Kontext und spezifische Schlüsselwörter für umfassende Suchergebnisse.
      • +
      • Sparse Retrieval: Konzentriert sich auf den Textabgleich, um Ergebnisse auf der Grundlage bestimmter Begriffe zu finden, was einer Volltextsuche entspricht.
      • +
      • Hybrides Retrieval: Kombiniert sowohl Dense- als auch Sparse-Ansätze und erfasst den gesamten Kontext und spezifische Schlüsselwörter für umfassende Suchergebnisse.

      Durch die Integration dieser Methoden gleicht die Milvus-Hybridsuche semantische und lexikalische Ähnlichkeiten aus und verbessert so die Gesamtrelevanz der Suchergebnisse. Dieses Notebook führt durch den Prozess der Einrichtung und Verwendung dieser Suchstrategien und zeigt ihre Effektivität in verschiedenen Suchszenarien auf.

      Abhängigkeiten und Umgebung

      $ pip install --upgrade pymilvus "pymilvus[model]"
      @@ -308,6 +308,6 @@ formatted_results = doc_text_formatting(ef, query, hybrid_results)
       

      Wie erstelle ich eine neue Shell in einem neuen Terminal mit C-Programmierung (Linux-Terminal)?

      Welches Unternehmen ist in Hyderabad besser zu gründen?

      Welches Unternehmen ist ein guter Start in Hyderabad?

      -

      Welche Mathematik braucht ein absoluter Neuling, um Algorithmen für die Computerprogrammierung zu verstehen? Welche Bücher über Algorithmen sind für einen absoluten Anfänger geeignet?

      +

      Welche Mathematik braucht ein völliger Neuling, um Algorithmen für die Computerprogrammierung zu verstehen? Welche Bücher über Algorithmen sind für einen absoluten Anfänger geeignet?

      Wie kann man das Leben so gestalten, dass es zu einem passt, und wie kann man verhindern, dass das Leben einen geistig und emotional missbraucht?

      Schnell einsatzbereit

      Um zu erfahren, wie man mit diesem Tutorial eine Online-Demo startet, sehen Sie sich bitte die Beispielanwendung an.

      diff --git a/localization/v2.5.x/site/de/userGuide/collections/manage-collections.md b/localization/v2.5.x/site/de/userGuide/collections/manage-collections.md index 2e42a0f28..0f179683d 100644 --- a/localization/v2.5.x/site/de/userGuide/collections/manage-collections.md +++ b/localization/v2.5.x/site/de/userGuide/collections/manage-collections.md @@ -96,7 +96,7 @@ title: Sammlung erklärt d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

      Die Erstellung von Indizes für bestimmte Felder verbessert die Sucheffizienz. Wir empfehlen Ihnen, Indizes für alle Felder zu erstellen, auf die Ihr Dienst angewiesen ist, wobei Indizes für Vektorfelder obligatorisch sind.

      +

      Die Erstellung von Indizes für bestimmte Felder verbessert die Sucheffizienz. Es wird empfohlen, Indizes für alle Felder zu erstellen, auf die Ihr Dienst angewiesen ist, wobei Indizes für Vektorfelder obligatorisch sind.

      Entität

      Entitäten sind Datensätze, die in einer Sammlung denselben Satz von Feldern verwenden. Die Werte in allen Feldern derselben Zeile bilden eine Entität.

      +

      Entitäten sind Datensätze, die in einer Sammlung denselben Satz von Feldern verwenden. Die Werte in allen Feldern der gleichen Zeile bilden eine Entität.

      Sie können so viele Entitäten in eine Sammlung einfügen, wie Sie benötigen. Mit der Anzahl der Entitäten steigt jedoch auch die benötigte Speichergröße, was die Suchleistung beeinträchtigt.

      Weitere Informationen finden Sie unter Schema erklärt.

      Laden und Freigeben

    Darüber hinaus bietet Milvus auch Erweiterungen zur Verbesserung der Suchleistung und -effizienz. Diese sind standardmäßig deaktiviert, und Sie können sie je nach Ihren Serviceanforderungen aktivieren und verwenden. Diese sind

      diff --git a/localization/v2.5.x/site/de/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.5.x/site/de/userGuide/manage-indexes/index-with-gpu.md index a4d82df16..6176b2219 100644 --- a/localization/v2.5.x/site/de/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.5.x/site/de/userGuide/manage-indexes/index-with-gpu.md @@ -68,7 +68,7 @@ title: Index mit GPU

      Die folgenden Beispiele zeigen, wie man GPU-Indizes verschiedener Typen erstellt.

      Vorbereiten der Indexparameter

      Beim Einrichten von GPU-Indexparametern müssen index_type, metric_type und params definiert werden:

        -
      • index_type(string): Der Typ des Indexes, der zur Beschleunigung der Vektorsuche verwendet wird. Gültige Optionen sind GPU_CAGRA, GPU_IVF_FLAT, GPU_IVF_PQ und GPU_BRUTE_FORCE.

      • +
      • index_type(string): Der Typ des Index, der zur Beschleunigung der Vektorsuche verwendet wird. Gültige Optionen sind GPU_CAGRA, GPU_IVF_FLAT, GPU_IVF_PQ und GPU_BRUTE_FORCE.

      • metric_type(Zeichenfolge): Der Typ der Metrik, die zur Messung der Ähnlichkeit von Vektoren verwendet wird. Gültige Optionen sind IP und L2.

      • params(dict): Die indexspezifischen Bauparameter. Die gültigen Optionen für diesen Parameter hängen vom Indextyp ab.

      @@ -203,7 +203,7 @@ collection.search(

      Bei der Verwendung von GPU-Indizes müssen bestimmte Einschränkungen beachtet werden:

        -
      • Für GPU_IVF_FLAT beträgt der Höchstwert für Limit 256.

      • +
      • Für GPU_IVF_FLAT beträgt der Höchstwert für limit 1024.

      • Für GPU_IVF_PQ und GPU_CAGRA ist der Höchstwert für limit 1024.

      • Für GPU_BRUTE_FORCE gibt es zwar keinen festen Grenzwert, es wird jedoch empfohlen, 4096 nicht zu überschreiten, um mögliche Leistungsprobleme zu vermeiden.

      • Derzeit unterstützen GPU-Indizes keinen COSINE-Abstand. Wenn der COSINE-Abstand erforderlich ist, sollten die Daten zuerst normalisiert werden, und dann kann der Innenproduktabstand (IP) als Ersatz verwendet werden.

      • @@ -229,5 +229,5 @@ collection.search(
      • Wann ist es sinnvoll, einen GPU-Index zu verwenden?

        Ein GPU-Index ist besonders vorteilhaft in Situationen, die einen hohen Durchsatz oder eine hohe Wiederauffindung erfordern. Bei großen Stapeln kann der Durchsatz der GPU-Indizierung beispielsweise den der CPU-Indizierung um das 100-fache übertreffen. In Szenarien mit kleineren Stapeln übertrifft die GPU-Indizierung die CPU-Indizierung in Bezug auf die Leistung immer noch deutlich. Darüber hinaus kann der Einsatz eines Grafikprozessors den Prozess der Indexerstellung erheblich beschleunigen, wenn Daten schnell eingefügt werden müssen.

      • Für welche Szenarien sind GPU-Indizes wie CAGRA, GPU_IVF_PQ, GPU_IVF_FLAT und GPU_BRUTE_FORCE am besten geeignet?

        -

        CAGRA-Indizes sind ideal für Szenarien, die eine höhere Leistung erfordern, auch wenn dies mit einem höheren Speicherbedarf einhergeht. In Umgebungen, in denen Speicherplatzeinsparungen Priorität haben, kann der GPU_IVF_PQ-Index dazu beitragen, die Speicheranforderungen zu minimieren, auch wenn dies mit einem höheren Präzisionsverlust einhergeht. Der GPU_IVF_FLAT-Index stellt eine ausgewogene Option dar, die einen Kompromiss zwischen Leistung und Speicherbedarf bietet. Der GPU_BRUTE_FORCE-Index schließlich ist für erschöpfende Suchvorgänge konzipiert und garantiert durch die Durchführung von Traversalsuchen eine Abrufrate von 1.

      • +

        CAGRA-Indizes sind ideal für Szenarien, die eine höhere Leistung erfordern, auch wenn dies mit einem höheren Speicherbedarf einhergeht. In Umgebungen, in denen Speicherplatzeinsparung Priorität hat, kann der GPU_IVF_PQ-Index dazu beitragen, die Speicheranforderungen zu minimieren, auch wenn dies mit einem höheren Präzisionsverlust einhergeht. Der GPU_IVF_FLAT-Index stellt eine ausgewogene Option dar, die einen Kompromiss zwischen Leistung und Speicherbedarf bietet. Der GPU_BRUTE_FORCE-Index schließlich ist für erschöpfende Suchvorgänge konzipiert und garantiert durch die Durchführung von Traversalsuchen eine Abrufrate von 1.

      diff --git a/localization/v2.5.x/site/de/userGuide/schema/analyzer/analyzer-overview.json b/localization/v2.5.x/site/de/userGuide/schema/analyzer/analyzer-overview.json index 408baeec4..8bcac9221 100644 --- a/localization/v2.5.x/site/de/userGuide/schema/analyzer/analyzer-overview.json +++ b/localization/v2.5.x/site/de/userGuide/schema/analyzer/analyzer-overview.json @@ -1 +1 @@ -{"codeList":["analyzer_params = {​\n \"type\": \"standard\", # Uses the standard built-in analyzer​\n \"stop_words\": [\"a\", \"an\", \"for\"] # Defines a list of common words (stop words) to exclude from tokenization​\n}​\n\n","analyzer_params = {​\n \"tokenizer\": \"standard\",​\n \"filter\": [​\n \"lowercase\",​\n {​\n \"type\": \"stop\",​\n \"stop_words\": [\"a\", \"an\", \"for\"]​\n }​\n ]​\n}​\n\n","[\"Vector\", \"Database\", \"Built\", \"for\", \"Scale\"]​\n","analyzer_params = {​\n \"tokenizer\": \"whitespace\",​\n}​\n","[\"vector\", \"database\", \"built\", \"for\", \"scale\"]​\n","analyzer_params = {​\n \"tokenizer\": \"standard\", # Mandatory: Specifies tokenizer​\n \"filter\": [\"lowercase\"], # Optional: Built-in filter that converts text to lowercase​\n}​\n","analyzer_params = {​\n \"tokenizer\": \"standard\", # Mandatory: Specifies tokenizer​\n \"filter\": [​\n {​\n \"type\": \"stop\", # Specifies 'stop' as the filter type​\n \"stop_words\": [\"of\", \"to\"], # Customizes stop words for this filter type​\n }​\n ]​\n}​\n\n","from pymilvus import MilvusClient, DataType​\n​\n# Set up a Milvus client​\nclient = MilvusClient(​\n uri=\"http://localhost:19530\"​\n)​\n​\n# Create schema​\nschema = client.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\n# Add fields to schema​\n​\n# Use a built-in analyzer​\nanalyzer_params_built_in = {​\n \"type\": \"english\"​\n}​\n​\n# Add VARCHAR field `title_en`​\nschema.add_field(​\n field_name='title_en', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params_built_in,​\n enable_match=True, ​\n)​\n​\n# Configure a custom analyzer​\nanalyzer_params_custom = {​\n \"tokenizer\": \"standard\",​\n \"filter\": [​\n \"lowercase\", # Built-in filter​\n {​\n \"type\": \"length\", # Custom filter​\n \"max\": 40​\n },​\n {​\n \"type\": \"stop\", # Custom filter​\n \"stop_words\": [\"of\", \"to\"]​\n }​\n ]​\n}​\n​\n# Add VARCHAR field `title`​\nschema.add_field(​\n field_name='title', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params_custom,​\n enable_match=True, ​\n)​\n​\n# Add vector field​\nschema.add_field(field_name=\"embedding\", datatype=DataType.FLOAT_VECTOR, dim=3)​\n# Add primary field​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True)​\n​\n# Set up index params for vector field​\nindex_params = client.prepare_index_params()​\nindex_params.add_index(field_name=\"embedding\", metric_type=\"COSINE\", index_type=\"AUTOINDEX\")​\n​\n# Create collection with defined schema​\nclient.create_collection(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n schema=schema,​\n index_params=index_params​\n)​\n"],"headingContent":"Analyzer Overview​","anchorList":[{"label":"Analyzer Überblick","href":"Analyzer-Overview​","type":1,"isActive":false},{"label":"Anatomie eines Analyzers","href":"Anatomy-of-an-analyzer​","type":2,"isActive":false},{"label":"Analyzer-Typen","href":"Analyzer-types​","type":2,"isActive":false},{"label":"Beispiel für die Verwendung","href":"Example-use​","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["analyzer_params = {​\n \"type\": \"standard\", # Uses the standard built-in analyzer​\n \"stop_words\": [\"a\", \"an\", \"for\"] # Defines a list of common words (stop words) to exclude from tokenization​\n}​\n\n","analyzer_params = {​\n \"tokenizer\": \"standard\",​\n \"filter\": [​\n \"lowercase\",​\n {​\n \"type\": \"stop\",​\n \"stop_words\": [\"a\", \"an\", \"for\"]​\n }​\n ]​\n}​\n\n","[\"Vector\", \"Database\", \"Built\", \"for\", \"Scale\"]​\n","analyzer_params = {​\n \"tokenizer\": \"whitespace\",​\n}​\n","[\"vector\", \"database\", \"built\", \"for\", \"scale\"]​\n","analyzer_params = {​\n \"tokenizer\": \"standard\", # Mandatory: Specifies tokenizer​\n \"filter\": [\"lowercase\"], # Optional: Built-in filter that converts text to lowercase​\n}​\n","analyzer_params = {​\n \"tokenizer\": \"standard\", # Mandatory: Specifies tokenizer​\n \"filter\": [​\n {​\n \"type\": \"stop\", # Specifies 'stop' as the filter type​\n \"stop_words\": [\"of\", \"to\"], # Customizes stop words for this filter type​\n }​\n ]​\n}​\n\n","from pymilvus import MilvusClient, DataType​\n​\n# Set up a Milvus client​\nclient = MilvusClient(​\n uri=\"http://localhost:19530\"​\n)​\n​\n# Create schema​\nschema = client.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\n# Add fields to schema​\n​\n# Use a built-in analyzer​\nanalyzer_params_built_in = {​\n \"type\": \"english\"​\n}​\n​\n# Add VARCHAR field `title_en`​\nschema.add_field(​\n field_name='title_en', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params_built_in,​\n enable_match=True, ​\n)​\n​\n# Configure a custom analyzer​\nanalyzer_params_custom = {​\n \"tokenizer\": \"standard\",​\n \"filter\": [​\n \"lowercase\", # Built-in filter​\n {​\n \"type\": \"length\", # Custom filter​\n \"max\": 40​\n },​\n {​\n \"type\": \"stop\", # Custom filter​\n \"stop_words\": [\"of\", \"to\"]​\n }​\n ]​\n}​\n​\n# Add VARCHAR field `title`​\nschema.add_field(​\n field_name='title', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params_custom,​\n enable_match=True, ​\n)​\n​\n# Add vector field​\nschema.add_field(field_name=\"embedding\", datatype=DataType.FLOAT_VECTOR, dim=3)​\n# Add primary field​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True)​\n​\n# Set up index params for vector field​\nindex_params = client.prepare_index_params()​\nindex_params.add_index(field_name=\"embedding\", metric_type=\"COSINE\", index_type=\"AUTOINDEX\")​\n​\n# Create collection with defined schema​\nclient.create_collection(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n schema=schema,​\n index_params=index_params​\n)​\n"],"headingContent":"Analyzer Overview​","anchorList":[{"label":"Analyzer Überblick","href":"Analyzer-Overview​","type":1,"isActive":false},{"label":"Anatomie eines Analysators","href":"Anatomy-of-an-analyzer​","type":2,"isActive":false},{"label":"Analyzer-Typen","href":"Analyzer-types​","type":2,"isActive":false},{"label":"Beispiel für die Verwendung","href":"Example-use​","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/de/userGuide/schema/analyzer/analyzer-overview.md b/localization/v2.5.x/site/de/userGuide/schema/analyzer/analyzer-overview.md index a01c673e4..7b38906fa 100644 --- a/localization/v2.5.x/site/de/userGuide/schema/analyzer/analyzer-overview.md +++ b/localization/v2.5.x/site/de/userGuide/schema/analyzer/analyzer-overview.md @@ -24,15 +24,15 @@ summary: >- >

      In der Textverarbeitung ist ein Analyzer eine entscheidende Komponente, die Rohtext in ein strukturiertes, durchsuchbares Format umwandelt. Jeder Analyzer besteht in der Regel aus zwei Kernelementen: Tokenizer und Filter. Gemeinsam wandeln sie den Eingabetext in Token um, verfeinern diese Token und bereiten sie für eine effiziente Indizierung und Suche vor.

      -

      In Milvus werden die Analyzer während der Erstellung der Sammlung konfiguriert, wenn Sie VARCHAR Felder zum Schema der Sammlung hinzufügen. Die von einem Analyzer erzeugten Token können zum Aufbau eines Indexes für den Schlüsselwortabgleich verwendet oder in Sparse Embeddings für die Volltextsuche konvertiert werden. Weitere Informationen finden Sie unter Schlüsselwortabgleich oder Volltextsuche.

      +

      In Milvus werden die Analyzer während der Erstellung der Sammlung konfiguriert, wenn Sie VARCHAR Felder zum Schema der Sammlung hinzufügen. Von einem Analyzer erzeugte Token können zum Aufbau eines Index für den Textabgleich verwendet oder in Sparse Embeddings für die Volltextsuche konvertiert werden. Weitere Informationen finden Sie unter Textabgleich oder Volltextsuche.

      Die Verwendung von Analyzern kann die Leistung beeinträchtigen.

      • Volltextsuche: Bei der Volltextsuche verbrauchen die DataNode- und QueryNode-Channels die Daten langsamer, da sie auf den Abschluss der Tokenisierung warten müssen. Infolgedessen dauert es länger, bis neu eingegebene Daten für die Suche verfügbar sind.

      • -
      • Schlüsselwortabgleich: Beim Stichwortabgleich ist die Indexerstellung ebenfalls langsamer, da die Tokenisierung abgeschlossen werden muss, bevor ein Index erstellt werden kann.

      • +
      • Textabgleich: Beim Textabgleich ist die Indexerstellung ebenfalls langsamer, da die Tokenisierung erst abgeschlossen werden muss, bevor ein Index erstellt werden kann.

      -

      Anatomie eines Analyzers

    -

    Filter

    Filter sind optionale Komponenten, die mit den vom Tokenizer erzeugten Token arbeiten und sie nach Bedarf umwandeln oder verfeinern. Nach Anwendung eines lowercase -Filters auf die tokenisierten Begriffe ["Vector", "Database", "Built", "for", "Scale"] könnte das Ergebnis zum Beispiel so aussehen.

    +

    Filter

    Filter sind optionale Komponenten, die mit den vom Tokenizer erzeugten Token arbeiten und sie je nach Bedarf transformieren oder verfeinern. Nach Anwendung eines lowercase -Filters auf die tokenisierten Begriffe ["Vector", "Database", "Built", "for", "Scale"] könnte das Ergebnis zum Beispiel so aussehen.

    ["vector", "database", "built", "for", "scale"]​
     

    Filter in einem benutzerdefinierten Analyzer können entweder eingebaut oder benutzerdefiniert sein, je nach Konfigurationsbedarf.

    diff --git a/localization/v2.5.x/site/de/userGuide/schema/sparse_vector.json b/localization/v2.5.x/site/de/userGuide/schema/sparse_vector.json index 6ce11abc7..c8258d2bc 100644 --- a/localization/v2.5.x/site/de/userGuide/schema/sparse_vector.json +++ b/localization/v2.5.x/site/de/userGuide/schema/sparse_vector.json @@ -1 +1 @@ -{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"Sparsamer Vektor","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"Überblick","href":"Overview​","type":2,"isActive":false},{"label":"Spärliche Vektoren in Milvus verwenden","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"Sparsamer Vektor","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"Überblick","href":"Overview​","type":2,"isActive":false},{"label":"Spärliche Vektoren in Milvus verwenden","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false},{"label":"Begrenzungen","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/de/userGuide/schema/sparse_vector.md b/localization/v2.5.x/site/de/userGuide/schema/sparse_vector.md index e145ebcca..15916a7b8 100644 --- a/localization/v2.5.x/site/de/userGuide/schema/sparse_vector.md +++ b/localization/v2.5.x/site/de/userGuide/schema/sparse_vector.md @@ -4,9 +4,9 @@ title: Spärlicher Vektor summary: >- Dünne Vektoren sind eine wichtige Methode der Datendarstellung bei der Informationsgewinnung und der Verarbeitung natürlicher Sprache. Während dichte - Vektoren wegen ihrer hervorragenden semantischen Verständnisfähigkeiten + Vektoren wegen ihrer ausgezeichneten semantischen Verständnisfähigkeiten beliebt sind, liefern spärliche Vektoren oft genauere Ergebnisse, wenn es um - Anwendungen geht, die eine präzise Übereinstimmung von Schlüsselwörtern oder + Anwendungen geht, die einen präzisen Abgleich von Schlüsselwörtern oder Phrasen erfordern. ---

    Sparsamer Vektor

  • Weitere Informationen zu den Parametern der Ähnlichkeitssuche finden Sie unter Grundlegende ANN-Suche.

    +

    Begrenzungen

    Bei der Verwendung von spärlichen Vektoren in Milvus sind die folgenden Grenzen zu beachten:

    +
      +
    • Derzeit wird nur die IP-Distanzmetrik für spärliche Vektoren unterstützt. Die hohe Dimensionalität von spärlichen Vektoren macht L2 und Kosinusabstand unpraktisch.

    • +
    • Für spärliche Vektorfelder werden nur die Indextypen SPARSE_INVERTED_INDEX und SPARSE_WAND unterstützt.

    • +
    • Die für spärliche Vektoren unterstützten Datentypen:

      +
        +
      • Der Dimensionsteil muss eine 32-Bit-Ganzzahl ohne Vorzeichen sein;
      • +
      • Der Wertteil kann eine nicht-negative 32-Bit-Gleitkommazahl sein.
      • +
    • +
    • Sparse Vectors müssen die folgenden Anforderungen für das Einfügen und Suchen erfüllen:

      +
        +
      • Mindestens ein Wert im Vektor ist ungleich Null;
      • +
      • Vektorindizes sind nicht-negativ.
      • +
    • +
    +

    FAQ

      +
    • Können Sie den Unterschied zwischen SPARSE_INVERTED_INDEX und SPARSE_WAND erklären, und wie wähle ich zwischen ihnen?

      +

      SPARSE_INVERTED_INDEX ist ein traditioneller invertierter Index, während SPARSE_WAND den Weak-AND-Algorithmus verwendet, um die Anzahl der vollständigen IP-Abstandsauswertungen während der Suche zu reduzieren. SPARSE_WAND ist in der Regel schneller, aber seine Leistung kann mit zunehmender Vektordichte abnehmen. Um zwischen den beiden Algorithmen zu wählen, führen Sie Experimente und Benchmarks durch, die auf Ihrem spezifischen Datensatz und Anwendungsfall basieren.

    • +
    • Wie sollte ich die Parameter drop_ratio_build und drop_ratio_search wählen?

      +

      Die Wahl von drop_ratio_build und drop_ratio_search hängt von den Eigenschaften Ihrer Daten und Ihren Anforderungen an Suchlatenz/Durchsatz und Genauigkeit ab.

    • +
    • Kann die Dimension einer Sparse-Einbettung ein beliebiger diskreter Wert innerhalb des uint32-Raums sein?

      +

      Ja, mit einer Ausnahme. Die Dimension einer spärlichen Einbettung kann ein beliebiger Wert im Bereich von [0, maximum of uint32) sein. Das bedeutet, dass Sie nicht den Maximalwert von uint32 verwenden können.

    • +
    • Wird die Suche nach wachsenden Segmenten über einen Index oder mit roher Gewalt durchgeführt?

      +

      Die Suche nach wachsenden Segmenten wird über einen Index desselben Typs wie der Index des versiegelten Segments durchgeführt. Für neue wachsende Segmente, bevor der Index aufgebaut ist, wird eine Brute-Force-Suche verwendet.

    • +
    • Ist es möglich, sowohl spärliche als auch dichte Vektoren in einer einzigen Sammlung zu haben?

      +

      Ja, mit der Unterstützung für mehrere Vektortypen können Sie Sammlungen mit sowohl spärlichen als auch dichten Vektorspalten erstellen und hybride Suchvorgänge mit ihnen durchführen.

    • +
    diff --git a/localization/v2.5.x/site/de/userGuide/search-query-get/boolean.md b/localization/v2.5.x/site/de/userGuide/search-query-get/boolean.md index 61ee97880..a5d2a30de 100644 --- a/localization/v2.5.x/site/de/userGuide/search-query-get/boolean.md +++ b/localization/v2.5.x/site/de/userGuide/search-query-get/boolean.md @@ -701,9 +701,9 @@ curl --request POST \​

    Match operators​

    Match operators include:​

    • like: Match constants or prefixes (prefix%), infixes (%infix%), and suffixes (%suffix) within constants. It relies on a brute-force search mechanism using wildcards and does not involve text tokenization. While it can achieve exact matches, its query efficiency is relatively low, making it suitable for simple matching tasks or queries on smaller datasets.​

    • -
    • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​ -

      -

      To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to ​Keyword Match.​

      +
    • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​

      +

      +

      To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to Text Match.​

    Example 1: Apply filter on scalar field​

    The following example demonstrates how to filter products whose color is red. In this case, you can quickly filter all red products by matching the prefix 'red%’. Similarly, you can use the expression color in ['red_7025’, 'red_4794’, ‘red_9392’] to filter all red products. However, when the data is more complex, we recommend using the like operator for more efficient filtering.​

    @@ -858,8 +858,8 @@ curl --request POST \​ ]​
    -

    Example 3: Keyword match on VARCHAR fields​

    The TEXT_MATCH expression is used for keyword match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to ​Keyword Match.​

    -

    The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the keyword "Apple" or "iPhone":​

    +

    Example 3: Text match on VARCHAR fields​

    The TEXT_MATCH expression is used for text match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to Text Match.​

    +

    The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the term "Apple" or "iPhone":​

    Python Java diff --git a/localization/v2.5.x/site/de/userGuide/search-query-get/full-text-search.json b/localization/v2.5.x/site/de/userGuide/search-query-get/full-text-search.json index a94358e1f..2d069bf7c 100644 --- a/localization/v2.5.x/site/de/userGuide/search-query-get/full-text-search.json +++ b/localization/v2.5.x/site/de/userGuide/search-query-get/full-text-search.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","MilvusClient.insert('demo', [​\n {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​\n {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​\n {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​\n])​\n\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['Who started AI research?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Volltextsuche","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Überblick","href":"Overview​","type":2,"isActive":false},{"label":"Erstellen Sie eine Sammlung für die Volltextsuche","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Einfügen von Textdaten","href":"Insert-text-data","type":2,"isActive":false},{"label":"Volltextsuche durchführen","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .build();\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"sparse\")\n .dataType(DataType.SparseFloatVector)\n .build());\n","import { MilvusClient, DataType } from \"@zilliz/milvus2-sdk-node\";\n\nconst address = \"http://localhost:19530\";\nconst token = \"root:Milvus\";\nconst client = new MilvusClient({address, token});\nconst schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n\nconsole.log(res.results)\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","import io.milvus.common.clientenum.FunctionType;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq.Function;\n\nimport java.util.*;\n\nschema.addFunction(Function.builder()\n .functionType(FunctionType.BM25)\n .name(\"text_bm25_emb\")\n .inputFieldNames(Collections.singletonList(\"text\"))\n .outputFieldNames(Collections.singletonList(\"vector\"))\n .build());\n","const functions = [\n {\n name: 'text_bm25_emb',\n description: 'bm25 function',\n type: FunctionType.BM25,\n input_field_names: ['text'],\n output_field_names: ['vector'],\n params: {},\n },\n];\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ],\n \"functions\": [\n {\n \"name\": \"text_bm25_emb\",\n \"type\": \"BM25\",\n \"inputFieldNames\": [\"text\"],\n \"outputFieldNames\": [\"sparse\"],\n \"params\": {}\n }\n ]\n }'\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","import io.milvus.v2.common.IndexParam;\n\nList indexes = new ArrayList<>();\nindexes.add(IndexParam.builder()\n .fieldName(\"sparse\")\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)\n .metricType(IndexParam.MetricType.BM25)\n .build());\n","const index_params = [\n {\n fieldName: \"sparse\",\n metricType: \"BM25\",\n indexType: \"AUTOINDEX\",\n },\n];\n","export indexParams='[\n {\n \"fieldName\": \"sparse\",\n \"metricType\": \"BM25\",\n \"indexType\": \"AUTOINDEX\"\n }\n ]'\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()\n .collectionName(\"demo\")\n .collectionSchema(schema)\n .indexParams(indexes)\n .build();\nclient.createCollection(requestCreate);\n","await client.create_collection(\n collection_name: 'demo', \n schema: schema, \n index_params: index_params\n);\n","export CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d \"{\n \\\"collectionName\\\": \\\"demo\\\",\n \\\"schema\\\": $schema,\n \\\"indexParams\\\": $indexParams\n}\"\n","client.insert('demo', [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n])\n\n","import com.google.gson.Gson;\nimport com.google.gson.JsonObject;\n\nimport io.milvus.v2.service.vector.request.InsertReq;\n\nGson gson = new Gson();\nList rows = Arrays.asList(\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval is a field of study.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval focuses on finding relevant information in large datasets.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"data mining and information retrieval overlap in research.\\\"}\", JsonObject.class)\n);\n\nclient.insert(InsertReq.builder()\n .collectionName(\"demo\")\n .data(rows)\n .build());\n","await client.insert({\ncollection_name: 'demo', \ndata: [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n]);\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"data\": [\n {\"text\": \"information retrieval is a field of study.\"},\n {\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"},\n {\"text\": \"data mining and information retrieval overlap in research.\"} \n ],\n \"collectionName\": \"demo\"\n}'\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['whats the focus of information retrieval?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n","import io.milvus.v2.service.vector.request.SearchReq;\nimport io.milvus.v2.service.vector.request.data.EmbeddedText;\nimport io.milvus.v2.service.vector.response.SearchResp;\n\nMap searchParams = new HashMap<>();\nsearchParams.put(\"drop_ratio_search\", 0.6);\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"demo\")\n .data(Collections.singletonList(new EmbeddedText(\"whats the focus of information retrieval?\")))\n .annsField(\"sparse\")\n .topK(3)\n .searchParams(searchParams)\n .outputFields(Collections.singletonList(\"text\"))\n .build());\n","await client.search(\n collection_name: 'demo', \n data: ['whats the focus of information retrieval?'],\n anns_field: 'sparse',\n limit: 3,\n params: {'drop_ratio_search': 0.6},\n)\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"demo\",\n \"data\": [\n \"whats the focus of information retrieval?\"\n ],\n \"annsField\": \"sparse\",\n \"limit\": 3,\n \"outputFields\": [\n \"text\"\n ],\n \"searchParams\":{\n \"params\":{\n \"drop_ratio_search\":0.6\n }\n }\n}'\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Volltextsuche","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Überblick","href":"Overview​","type":2,"isActive":false},{"label":"Erstellen Sie eine Sammlung für die Volltextsuche","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Textdaten einfügen","href":"Insert-text-data","type":2,"isActive":false},{"label":"Volltextsuche durchführen","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/de/userGuide/search-query-get/full-text-search.md b/localization/v2.5.x/site/de/userGuide/search-query-get/full-text-search.md index e33d6f912..90f588ec6 100644 --- a/localization/v2.5.x/site/de/userGuide/search-query-get/full-text-search.md +++ b/localization/v2.5.x/site/de/userGuide/search-query-get/full-text-search.md @@ -47,7 +47,7 @@ summary: >-
  • Texteingabe: Sie fügen Rohtextdokumente ein oder stellen Abfragetext bereit, ohne dass eine manuelle Einbettung erforderlich ist.

  • Text-Analyse: Milvus verwendet einen Analysator, um den eingegebenen Text in einzelne, durchsuchbare Begriffe zu zerlegen.

  • Funktions-Verarbeitung: Die eingebaute Funktion empfängt tokenisierte Begriffe und wandelt sie in spärliche Vektordarstellungen um.

  • -
  • Sammlungsspeicher: Milvus speichert diese spärlichen Einbettungen in einer Sammlung, um sie effizient abrufen zu können.

  • +
  • Sammlungsspeicher: Milvus speichert diese spärlichen Einbettungen in einer Sammlung zum effizienten Abruf.

  • BM25-Bewertung: Während einer Suche wendet Milvus den BM25-Algorithmus an, um die Punktzahlen für die gespeicherten Dokumente zu berechnen und die übereinstimmenden Ergebnisse nach ihrer Relevanz für den Abfragetext zu ordnen.

  • @@ -82,6 +82,8 @@ summary: >-

  • Ein SPARSE_FLOAT_VECTOR Feld, das für die Speicherung von Sparse Embeddings reserviert ist, die Milvus automatisch für das VARCHAR Feld generiert.

  • Definieren Sie das Sammlungsschema

    Erstellen Sie zunächst das Schema und fügen Sie die erforderlichen Felder hinzu.

    +
    from pymilvus import MilvusClient, DataType, Function, FunctionType​
     ​
     schema = MilvusClient.create_schema()​
    @@ -90,6 +92,80 @@ schema.add_field(field_name="id", dat
     schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​
     schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)​
     
    +
    +
    import io.milvus.v2.common.DataType;
    +import io.milvus.v2.service.collection.request.AddFieldReq;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
    +        .build();
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("id")
    +        .dataType(DataType.Int64)
    +        .isPrimaryKey(true)
    +        .autoID(true)
    +        .build());
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("text")
    +        .dataType(DataType.VarChar)
    +        .maxLength(1000)
    +        .enableAnalyzer(true)
    +        .build());
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("sparse")
    +        .dataType(DataType.SparseFloatVector)
    +        .build());
    +
    +
    import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
    +
    +const address = "http://localhost:19530";
    +const token = "root:Milvus";
    +const client = new MilvusClient({address, token});
    +const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +console.log(res.results)
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ]
    +    }'
     

    In dieser Konfiguration.

      @@ -98,6 +174,8 @@ schema.add_field(field_name="sparse",
    • sparse: ein Vektorfeld, das für die Speicherung von intern generierten Sparse Embeddings für Volltextsuchoperationen reserviert ist. Der Datentyp muss SPARSE_FLOAT_VECTOR sein.

    Definieren Sie nun eine Funktion, die Ihren Text in Sparse-Vektor-Darstellungen umwandelt, und fügen Sie sie dann dem Schema hinzu.

    +
    bm25_function = Function(​
         name="text_bm25_emb", # Function name​
         input_field_names=["text"], # Name of the VARCHAR field containing raw text data​
    @@ -107,6 +185,62 @@ schema.add_field(field_name="sparse",
     ​
     schema.add_function(bm25_function)​
     
    +
    +
    import io.milvus.common.clientenum.FunctionType;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
    +
    +import java.util.*;
    +
    +schema.addFunction(Function.builder()
    +        .functionType(FunctionType.BM25)
    +        .name("text_bm25_emb")
    +        .inputFieldNames(Collections.singletonList("text"))
    +        .outputFieldNames(Collections.singletonList("vector"))
    +        .build());
    +
    +
    const functions = [
    +    {
    +      name: 'text_bm25_emb',
    +      description: 'bm25 function',
    +      type: FunctionType.BM25,
    +      input_field_names: ['text'],
    +      output_field_names: ['vector'],
    +      params: {},
    +    },
    +];
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ],
    +        "functions": [
    +            {
    +                "name": "text_bm25_emb",
    +                "type": "BM25",
    +                "inputFieldNames": ["text"],
    +                "outputFieldNames": ["sparse"],
    +                "params": {}
    +            }
    +        ]
    +    }'
     

    Parameter

    Beschreibung

    @@ -121,9 +255,11 @@ schema.add_function(bm25_function)​

    Der Typ der zu verwendenden Funktion. Setzen Sie den Wert auf FunctionType.BM25.

    -

    Für Sammlungen mit mehreren VARCHAR Feldern, die eine Konvertierung von Text in Sparse Vectors erfordern, fügen Sie separate Funktionen zum Sammlungsschema hinzu und stellen sicher, dass jede Funktion einen eindeutigen Namen und output_field_names Wert hat.

    +

    Für Sammlungen mit mehreren VARCHAR Feldern, die eine Konvertierung von Text in Sparse Vectors erfordern, fügen Sie dem Sammlungsschema separate Funktionen hinzu und stellen sicher, dass jede Funktion einen eindeutigen Namen und output_field_names Wert hat.

    -

    Konfigurieren Sie den Index

    Nachdem Sie das Schema mit den erforderlichen Feldern und der integrierten Funktion definiert haben, richten Sie den Index für Ihre Sammlung ein. Um diesen Prozess zu vereinfachen, verwenden Sie AUTOINDEX als index_type, eine Option, die es Milvus ermöglicht, den am besten geeigneten Indextyp auf der Grundlage der Struktur Ihrer Daten auszuwählen und zu konfigurieren.

    +

    Konfigurieren Sie den Index

    Nachdem Sie das Schema mit den erforderlichen Feldern und der integrierten Funktion definiert haben, richten Sie den Index für Ihre Sammlung ein. Um diesen Prozess zu vereinfachen, verwenden Sie AUTOINDEX als index_type, eine Option, die es Milvus ermöglicht, den am besten geeigneten Indextyp basierend auf der Struktur Ihrer Daten auszuwählen und zu konfigurieren.

    +
    index_params = MilvusClient.prepare_index_params()​
     ​
     index_params.add_index(​
    @@ -132,6 +268,31 @@ index_params.add_index(​
         metric_type="BM25"​
     )​
     
    +
    +
    import io.milvus.v2.common.IndexParam;
    +
    +List<IndexParam> indexes = new ArrayList<>();
    +indexes.add(IndexParam.builder()
    +        .fieldName("sparse")
    +        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
    +        .metricType(IndexParam.MetricType.BM25)
    +        .build());
    +
    +
    const index_params = [
    +  {
    +    fieldName: "sparse",
    +    metricType: "BM25",
    +    indexType: "AUTOINDEX",
    +  },
    +];
    +
    +
    export indexParams='[
    +        {
    +            "fieldName": "sparse",
    +            "metricType": "BM25",
    +            "indexType": "AUTOINDEX"
    +        }
    +    ]'
     

    Parameter

    Beschreibung

    @@ -143,6 +304,8 @@ index_params.add_index(​

    Der Wert für diesen Parameter muss speziell für die Volltextsuchfunktionalität auf BM25 gesetzt werden.

    Erstellen Sie die Sammlung

    Erstellen Sie nun die Sammlung unter Verwendung der definierten Schema- und Indexparameter.

    +
    MilvusClient.create_collection(​
         collection_name='demo', ​
         schema=schema, ​
    @@ -150,7 +313,35 @@ index_params.add_index(​
     )​
     
     
    -

    Einfügen von Textdaten +
    await client.create_collection(
    +    collection_name: 'demo', 
    +    schema: schema, 
    +    index_params: index_params
    +);
    +
    +
    export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d "{
    +    \"collectionName\": \"demo\",
    +    \"schema\": $schema,
    +    \"indexParams\": $indexParams
    +}"
    +
    +

    Textdaten einfügen

    Nachdem Sie Ihre Sammlung und Ihren Index eingerichtet haben, können Sie nun Textdaten einfügen. Bei diesem Vorgang müssen Sie nur den Rohtext bereitstellen. Die integrierte Funktion, die wir zuvor definiert haben, erzeugt automatisch den entsprechenden Sparse-Vektor für jeden Texteintrag.

    -
    MilvusClient.insert('demo', [​
    -    {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​
    -    {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​
    -    {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​
    -])​
    +
    +
    client.insert('demo', [
    +    {'text': 'information retrieval is a field of study.'},
    +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
    +    {'text': 'data mining and information retrieval overlap in research.'},
    +])
     
    +
    +
    import com.google.gson.Gson;
    +import com.google.gson.JsonObject;
    +
    +import io.milvus.v2.service.vector.request.InsertReq;
    +
    +Gson gson = new Gson();
    +List<JsonObject> rows = Arrays.asList(
    +        gson.fromJson("{\"text\": \"information retrieval is a field of study.\"}", JsonObject.class),
    +        gson.fromJson("{\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"}", JsonObject.class),
    +        gson.fromJson("{\"text\": \"data mining and information retrieval overlap in research.\"}", JsonObject.class)
    +);
    +
    +client.insert(InsertReq.builder()
    +        .collectionName("demo")
    +        .data(rows)
    +        .build());
    +
    +
    await client.insert({
    +collection_name: 'demo', 
    +data: [
    +    {'text': 'information retrieval is a field of study.'},
    +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
    +    {'text': 'data mining and information retrieval overlap in research.'},
    +]);
    +
    +
    curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "data": [
    +        {"text": "information retrieval is a field of study."},
    +        {"text": "information retrieval focuses on finding relevant information in large datasets."},
    +        {"text": "data mining and information retrieval overlap in research."}       
    +    ],
    +    "collectionName": "demo"
    +}'
     

    Sobald Sie Daten in Ihre Sammlung eingefügt haben, können Sie eine Volltextsuche mit Rohtextabfragen durchführen. Milvus wandelt Ihre Abfrage automatisch in einen Sparse-Vektor um und ordnet die übereinstimmenden Suchergebnisse mit Hilfe des BM25-Algorithmus ein und gibt dann die TopK (limit) Ergebnisse zurück.

    +

    Sobald Sie Daten in Ihre Sammlung eingefügt haben, können Sie eine Volltextsuche mit Rohtextabfragen durchführen. Milvus konvertiert Ihre Abfrage automatisch in einen Sparse-Vektor und ordnet die übereinstimmenden Suchergebnisse mit dem BM25-Algorithmus ein und gibt dann die TopK (limit) Ergebnisse zurück.

    +
    search_params = {​
         'params': {'drop_ratio_search': 0.6},​
     }​
     ​
     MilvusClient.search(​
         collection_name='demo', ​
    -    data=['Who started AI research?'],​
    +    data=['whats the focus of information retrieval?'],​
         anns_field='sparse',​
         limit=3,​
         search_params=search_params​
     )​
     
    +
    +
    import io.milvus.v2.service.vector.request.SearchReq;
    +import io.milvus.v2.service.vector.request.data.EmbeddedText;
    +import io.milvus.v2.service.vector.response.SearchResp;
    +
    +Map<String,Object> searchParams = new HashMap<>();
    +searchParams.put("drop_ratio_search", 0.6);
    +SearchResp searchResp = client.search(SearchReq.builder()
    +        .collectionName("demo")
    +        .data(Collections.singletonList(new EmbeddedText("whats the focus of information retrieval?")))
    +        .annsField("sparse")
    +        .topK(3)
    +        .searchParams(searchParams)
    +        .outputFields(Collections.singletonList("text"))
    +        .build());
    +
    +
    await client.search(
    +    collection_name: 'demo', 
    +    data: ['whats the focus of information retrieval?'],
    +    anns_field: 'sparse',
    +    limit: 3,
    +    params: {'drop_ratio_search': 0.6},
    +)
    +
    +
    curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +--data-raw '{
    +    "collectionName": "demo",
    +    "data": [
    +        "whats the focus of information retrieval?"
    +    ],
    +    "annsField": "sparse",
    +    "limit": 3,
    +    "outputFields": [
    +        "text"
    +    ],
    +    "searchParams":{
    +        "params":{
    +            "drop_ratio_search":0.6
    +        }
    +    }
    +}'
     

    Parameter

    Beschreibung

    diff --git a/localization/v2.5.x/site/de/userGuide/search-query-get/keyword-match.json b/localization/v2.5.x/site/de/userGuide/search-query-get/keyword-match.json index be85ebefe..7d93b37e9 100644 --- a/localization/v2.5.x/site/de/userGuide/search-query-get/keyword-match.json +++ b/localization/v2.5.x/site/de/userGuide/search-query-get/keyword-match.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n"],"headingContent":"Keyword Match​","anchorList":[{"label":"Schlüsselwort-Abgleich","href":"Keyword-Match​","type":1,"isActive":false},{"label":"Überblick","href":"Overview","type":2,"isActive":false},{"label":"Aktivieren der Stichwortsuche","href":"Enable-keyword-match","type":2,"isActive":false},{"label":"Schlüsselwortabgleich verwenden","href":"Use-keyword-match","type":2,"isActive":false},{"label":"Überlegungen","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .enableDynamicField(false)\n .build();\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true,\n \"enable_match\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","Map analyzerParams = new HashMap<>();\nanalyzerParams.put(\"type\", \"english\");\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(200)\n .enableAnalyzer(true)\n .analyzerParams(analyzerParams)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n analyzer_params: { type: 'english' },\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 200,\n \"enable_analyzer\": true,\n \"enable_match\": true,\n \"analyzer_params\": {\"type\": \"english\"}\n }\n },\n {\n \"fieldName\": \"my_vector\",\n \"dataType\": \"FloatVector\",\n \"elementTypeParams\": {\n \"dim\": \"5\"\n }\n }\n ]\n }'\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine deep')\";\n","export filter=\"\\\"TEXT_MATCH(text, 'machine deep')\\\"\"\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"\n","export filter=\"\\\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\\\"\"\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .annsField(\"embeddings\")\n .data(Collections.singletonList(queryVector)))\n .filter(filter)\n .topK(10)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build());\n","// Match entities with `keyword1` or `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\n// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field\nconst result = await client.search(\n collection_name: \"YOUR_COLLECTION_NAME\", // Your collection name\n anns_field: \"embeddings\", // Vector field name\n data: [query_vector], // Query vector\n filter: filter,\n params: {\"nprobe\": 10},\n limit: 10, // Max. number of results to return\n output_fields: [\"id\", \"text\"] //Fields to return\n);\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1 keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"annsField\": \"my_vector\",\n \"data\": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],\n \"filter\": '\"$filter\"',\n \"searchParams\": {\n \"params\": {\n \"nprobe\": 10\n }\n },\n \"limit\": 3,\n \"outputFields\": [\"text\",\"id\"]\n}'\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nQueryResp queryResp = client.query(QueryReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .filter(filter)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build()\n);\n","// Match entities with both `keyword1` and `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nconst result = await client.query(\n collection_name: \"YOUR_COLLECTION_NAME\",\n filter: filter, \n output_fields: [\"id\", \"text\"]\n)\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/query\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"filter\": '\"$filter\"',\n \"outputFields\": [\"id\", \"text\"]\n}'\n"],"headingContent":"Text Match​","anchorList":[{"label":"Text-Abgleich","href":"Text-Match​","type":1,"isActive":false},{"label":"Überblick","href":"Overview","type":2,"isActive":false},{"label":"Aktivieren des Textabgleichs","href":"Enable-text-match","type":2,"isActive":false},{"label":"Textabgleich verwenden","href":"Use-text-match","type":2,"isActive":false},{"label":"Überlegungen","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/de/userGuide/search-query-get/keyword-match.md b/localization/v2.5.x/site/de/userGuide/search-query-get/keyword-match.md index 703c1422d..06be4c158 100644 --- a/localization/v2.5.x/site/de/userGuide/search-query-get/keyword-match.md +++ b/localization/v2.5.x/site/de/userGuide/search-query-get/keyword-match.md @@ -1,15 +1,15 @@ --- id: keyword-match.md summary: >- - Der Schlüsselwortabgleich in Milvus ermöglicht das präzise Auffinden von - Dokumenten auf der Grundlage bestimmter Begriffe. Diese Funktion wird in - erster Linie für eine gefilterte Suche verwendet, um bestimmte Bedingungen zu - erfüllen, und kann eine skalare Filterung zur Verfeinerung der - Abfrageergebnisse beinhalten, die eine Ähnlichkeitssuche innerhalb von - Vektoren ermöglicht, die skalare Kriterien erfüllen. -title: Schlüsselwort-Abgleich + Der Textabgleich in Milvus ermöglicht die präzise Suche nach Dokumenten auf + der Grundlage bestimmter Begriffe. Diese Funktion wird in erster Linie für die + gefilterte Suche nach bestimmten Bedingungen verwendet und kann eine skalare + Filterung zur Verfeinerung der Abfrageergebnisse beinhalten, die eine + Ähnlichkeitssuche innerhalb von Vektoren ermöglicht, die skalare Kriterien + erfüllen. +title: Text-Abgleich --- -

    Schlüsselwort-Abgleich

    Der Schlüsselwortabgleich in Milvus ermöglicht die präzise Suche nach Dokumenten auf der Grundlage bestimmter Begriffe. Diese Funktion wird in erster Linie für eine gefilterte Suche verwendet, um bestimmte Bedingungen zu erfüllen, und kann eine skalare Filterung zur Verfeinerung der Abfrageergebnisse beinhalten, die eine Ähnlichkeitssuche innerhalb von Vektoren ermöglicht, die skalare Kriterien erfüllen.

    +

    Der Textabgleich in Milvus ermöglicht die präzise Suche nach Dokumenten auf der Grundlage bestimmter Begriffe. Diese Funktion wird in erster Linie für eine gefilterte Suche verwendet, um bestimmte Bedingungen zu erfüllen, und kann eine skalare Filterung zur Verfeinerung der Abfrageergebnisse beinhalten, die eine Ähnlichkeitssuche innerhalb von Vektoren ermöglicht, die skalare Kriterien erfüllen.

    -

    Der Schlüsselwortabgleich konzentriert sich auf die Suche nach exakten Vorkommen der Abfragebegriffe, ohne die Relevanz der übereinstimmenden Dokumente zu bewerten. Wenn Sie die relevantesten Dokumente auf der Grundlage der semantischen Bedeutung und Wichtigkeit der Suchbegriffe abrufen möchten, empfehlen wir Ihnen die Volltextsuche.

    +

    Der Textabgleich konzentriert sich auf die Suche nach exakten Vorkommen der Abfragebegriffe, ohne die Relevanz der übereinstimmenden Dokumente zu bewerten. Wenn Sie die relevantesten Dokumente auf der Grundlage der semantischen Bedeutung und Wichtigkeit der Abfragebegriffe abrufen möchten, empfehlen wir Ihnen die Volltextsuche.

    Überblick

    Milvus integriert Tantivy, um den zugrundeliegenden invertierten Index und die Stichwortsuche zu betreiben. Für jeden Texteintrag indiziert Milvus diesen nach folgendem Verfahren.

    +

    Milvus integriert Tantivy, um den zugrunde liegenden invertierten Index und die begriffsbasierte Textsuche zu betreiben. Für jeden Texteintrag indiziert Milvus diesen nach folgendem Verfahren.

      -
    1. Analyzer: Der Analyzer verarbeitet den eingegebenen Text, indem er ihn in einzelne Wörter (Token) zerlegt und dann je nach Bedarf Filter anwendet. So kann Milvus einen Index auf der Grundlage dieser Token erstellen.

    2. +
    3. Analyzer: Der Analyzer verarbeitet den eingegebenen Text, indem er ihn in einzelne Wörter (Token) zerlegt und dann nach Bedarf Filter anwendet. So kann Milvus einen Index auf der Grundlage dieser Token erstellen.

    4. Indizierung: Nach der Textanalyse erstellt Milvus einen invertierten Index, der jedes einzelne Token den Dokumenten zuordnet, die es enthalten.

    -

    Wenn ein Benutzer einen Schlüsselwortvergleich durchführt, wird der invertierte Index verwendet, um schnell alle Dokumente abzurufen, die die Schlüsselwörter enthalten. Dies ist wesentlich schneller als das Durchsuchen jedes einzelnen Dokuments.

    +

    Wenn ein Benutzer einen Textabgleich durchführt, wird der invertierte Index verwendet, um schnell alle Dokumente aufzufinden, die die Begriffe enthalten. Dies ist wesentlich schneller, als jedes Dokument einzeln zu durchsuchen.

    - Keyword Match - Schlüsselwortabgleich

    -

    Aktivieren der Stichwortsuche

    Der Schlüsselwortabgleich funktioniert mit dem Feldtyp VARCHAR, der in Milvus im Wesentlichen ein String-Datentyp ist. Um den Schlüsselwortabgleich zu aktivieren, setzen Sie sowohl enable_analyzer als auch enable_match auf True und konfigurieren dann optional einen Analyzer für die Textanalyse, wenn Sie Ihr Sammlungsschema definieren.

    -

    Setzen Sie enable_analyzer und enable_match

    Um den Schlüsselwortabgleich für ein bestimmtes VARCHAR -Feld zu aktivieren, setzen Sie bei der Definition des Feldschemas die beiden Parameter enable_analyzer und enable_match auf True. Dadurch wird Milvus angewiesen, den Text zu tokenisieren und einen invertierten Index für das angegebene Feld zu erstellen, was schnelle und effiziente Schlüsselwortübereinstimmungen ermöglicht.

    +

    Der Textabgleich funktioniert mit dem Feldtyp VARCHAR, der in Milvus im Wesentlichen ein String-Datentyp ist. Um den Textabgleich zu aktivieren, setzen Sie sowohl enable_analyzer als auch enable_match auf True und konfigurieren dann optional einen Analyzer für die Textanalyse, wenn Sie Ihr Sammlungsschema definieren.

    +

    Setzen Sie enable_analyzer und enable_match

    Um den Textabgleich für ein bestimmtes VARCHAR -Feld zu aktivieren, setzen Sie bei der Definition des Feldschemas die beiden Parameter enable_analyzer und enable_match auf True. Dies weist Milvus an, Text zu tokenisieren und einen invertierten Index für das angegebene Feld zu erstellen, was schnelle und effiziente Textabgleiche ermöglicht.

    +
    from pymilvus import MilvusClient, DataType​
     ​
     schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​
    @@ -83,9 +85,74 @@ schema.add_field(​
     )​
     
     
    -

    Optional: Konfigurieren Sie einen Analysator

    Die Leistung und Genauigkeit des Schlüsselwortabgleichs hängt vom gewählten Analysator ab. Verschiedene Analysatoren sind auf verschiedene Sprachen und Textstrukturen zugeschnitten, so dass die Wahl des richtigen Analysators die Suchergebnisse für Ihren speziellen Anwendungsfall erheblich beeinflussen kann.

    -

    Standardmäßig verwendet Milvus den Analysator standard, der Text auf der Grundlage von Leerzeichen und Interpunktion in Token umwandelt, Token entfernt, die länger als 40 Zeichen sind, und Text in Kleinbuchstaben umwandelt. Zur Anwendung dieser Standardeinstellung sind keine zusätzlichen Parameter erforderlich. Weitere Informationen finden Sie unter Standard.

    +
    import io.milvus.v2.common.DataType;
    +import io.milvus.v2.service.collection.request.AddFieldReq;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
    +        .enableDynamicField(false)
    +        .build();
    +
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("text")
    +        .dataType(DataType.VarChar)
    +        .maxLength(1000)
    +        .enableAnalyzer(true)
    +        .enableMatch(true)
    +        .build());
    +
    +
    +
    const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true,
    +                    "enable_match": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ]
    +    }'
    +
    +
    +

    Optional: Konfigurieren Sie einen Analysator

    Die Leistung und Genauigkeit des Textabgleichs hängt vom ausgewählten Analysator ab. Verschiedene Analysatoren sind auf verschiedene Sprachen und Textstrukturen zugeschnitten, so dass die Wahl des richtigen Analysators die Suchergebnisse für Ihren speziellen Anwendungsfall erheblich beeinflussen kann.

    +

    Standardmäßig verwendet Milvus den Analysator standard, der Text auf der Grundlage von Leerzeichen und Interpunktion in Token umwandelt, Token mit mehr als 40 Zeichen entfernt und Text in Kleinbuchstaben konvertiert. Zur Anwendung dieser Standardeinstellung sind keine zusätzlichen Parameter erforderlich. Weitere Informationen finden Sie unter Standard.

    In Fällen, in denen ein anderer Analyzer erforderlich ist, können Sie diesen mit dem Parameter analyzer_params konfigurieren. Zum Beispiel, um den english Analyzer für die Verarbeitung von englischem Text anzuwenden.

    +
    analyzer_params={​
         "type": "english"​
     }​
    @@ -100,8 +167,71 @@ schema.add_field(​
     )​
     
     
    -

    Milvus bietet auch verschiedene andere Analysatoren an, die für unterschiedliche Sprachen und Szenarien geeignet sind. Weitere Einzelheiten finden Sie unter Übersicht.

    -

    Schlüsselwortabgleich verwenden +
    const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +    analyzer_params: { type: 'english' },
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 200,
    +                    "enable_analyzer": true,
    +                    "enable_match": true,
    +                    "analyzer_params": {"type": "english"}
    +                }
    +            },
    +            {
    +                "fieldName": "my_vector",
    +                "dataType": "FloatVector",
    +                "elementTypeParams": {
    +                    "dim": "5"
    +                }
    +            }
    +        ]
    +    }'
    +
    +
    +

    Milvus bietet auch verschiedene andere Analyzer, die für unterschiedliche Sprachen und Szenarien geeignet sind. Weitere Einzelheiten finden Sie unter Übersicht.

    +

    Textabgleich verwenden

    Sobald Sie die Schlüsselwortübereinstimmung für ein VARCHAR-Feld in Ihrem Sammelschema aktiviert haben, können Sie Schlüsselwortübereinstimmungen mit dem Ausdruck TEXT_MATCH durchführen.

    -

    Syntax des TEXT_MATCH-Ausdrucks

    Der Ausdruck TEXT_MATCH wird verwendet, um das Feld und die Schlüsselwörter anzugeben, nach denen gesucht werden soll. Seine Syntax lautet wie folgt.

    -
    TEXT_MATCH(field_name, text)​
    +    

    Sobald Sie die Textübereinstimmung für ein VARCHAR-Feld in Ihrem Sammlungsschema aktiviert haben, können Sie Textübereinstimmungen mit dem Ausdruck TEXT_MATCH durchführen.

    +

    Syntax des TEXT_MATCH-Ausdrucks

    Der Ausdruck TEXT_MATCH wird verwendet, um das Feld und die Begriffe anzugeben, nach denen gesucht werden soll. Seine Syntax lautet wie folgt.

    +
    TEXT_MATCH(field_name, text)​
     
     
    • field_name: Der Name des VARCHAR-Feldes, nach dem gesucht werden soll.

    • -
    • text: Die Schlüsselwörter, nach denen gesucht werden soll. Mehrere Schlüsselwörter können durch Leerzeichen oder andere geeignete Trennzeichen getrennt werden, je nach Sprache und konfiguriertem Analysator.

    • +
    • text: Die Begriffe, nach denen gesucht werden soll. Mehrere Begriffe können durch Leerzeichen oder andere geeignete Trennzeichen getrennt werden, je nach Sprache und konfiguriertem Analysator.

    -

    Standardmäßig verwendet TEXT_MATCH die Logik der ODER-Verknüpfung, d.h. es werden Dokumente zurückgegeben, die eines der angegebenen Schlüsselwörter enthalten. Um zum Beispiel nach Dokumenten zu suchen, die die Schlüsselwörter machine oder deep im Feld text enthalten, verwenden Sie den folgenden Ausdruck.

    +

    Standardmäßig verwendet TEXT_MATCH die Logik der ODER-Verknüpfung, d.h. es werden Dokumente zurückgegeben, die einen der angegebenen Begriffe enthalten. Um zum Beispiel nach Dokumenten zu suchen, die den Begriff machine oder deep im Feld text enthalten, verwenden Sie den folgenden Ausdruck.

    +
    filter = "TEXT_MATCH(text, 'machine deep')"​
    -
    +
    +
    String filter = "TEXT_MATCH(text, 'machine deep')";
    +
    +
    const filter = "TEXT_MATCH(text, 'machine deep')";
    +
    +
    export filter="\"TEXT_MATCH(text, 'machine deep')\""
     

    Sie können auch mehrere TEXT_MATCH Ausdrücke mit logischen Operatoren kombinieren, um einen UND-Abgleich durchzuführen. Um zum Beispiel nach Dokumenten zu suchen, die sowohl machine als auch deep im Feld text enthalten, verwenden Sie den folgenden Ausdruck.

    +
    filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"​
    -
     
    -

    Suche mit Schlüsselwortübereinstimmung

    Die Schlüsselwortübereinstimmung kann in Kombination mit der Vektorähnlichkeitssuche verwendet werden, um den Suchbereich einzugrenzen und die Suchleistung zu verbessern. Indem Sie die Sammlung vor der vektoriellen Ähnlichkeitssuche mit einem Schlüsselwortabgleich filtern, können Sie die Anzahl der zu durchsuchenden Dokumente reduzieren, was zu schnelleren Abfragezeiten führt.

    -

    In diesem Beispiel filtert der Ausdruck filter die Suchergebnisse so, dass nur Dokumente enthalten sind, die mit den angegebenen Schlüsselwörtern keyword1 oder keyword2 übereinstimmen. Die Vektorähnlichkeitssuche wird dann mit dieser gefilterten Teilmenge von Dokumenten durchgeführt.

    +
    String filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')";
    +
    +
    const filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"
    +
    +
    export filter="\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\""
    +
    +

    Suche mit Textabgleich

    Textabgleich kann in Kombination mit der Vektorähnlichkeitssuche verwendet werden, um den Suchbereich einzugrenzen und die Suchleistung zu verbessern. Indem Sie die Sammlung vor der Vektorähnlichkeitssuche mit einem Textabgleich filtern, können Sie die Anzahl der zu durchsuchenden Dokumente reduzieren, was zu schnelleren Abfragezeiten führt.

    +

    In diesem Beispiel filtert der Ausdruck filter die Suchergebnisse so, dass nur Dokumente enthalten sind, die mit dem angegebenen Begriff keyword1 oder keyword2 übereinstimmen. Die Vektorähnlichkeitssuche wird dann auf dieser gefilterten Teilmenge von Dokumenten durchgeführt.

    +
    # Match entities with `keyword1` or `keyword2`​
     filter = "TEXT_MATCH(text, 'keyword1 keyword2')"​
     ​
    @@ -150,8 +296,58 @@ result = MilvusClient.search(​
     )​
     
     
    -

    Abfrage mit Schlüsselwortübereinstimmung

    Die Schlüsselwortübereinstimmung kann auch für die skalare Filterung in Abfrageoperationen verwendet werden. Durch Angabe eines Ausdrucks TEXT_MATCH im Parameter expr der Methode query() können Sie Dokumente abrufen, die mit den angegebenen Schlüsselwörtern übereinstimmen.

    -

    Im folgenden Beispiel werden Dokumente abgerufen, bei denen das Feld text die beiden Schlüsselwörter keyword1 und keyword2 enthält.

    +
    String filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
    +
    +SearchResp searchResp = client.search(SearchReq.builder()
    +        .collectionName("YOUR_COLLECTION_NAME")
    +        .annsField("embeddings")
    +        .data(Collections.singletonList(queryVector)))
    +        .filter(filter)
    +        .topK(10)
    +        .outputFields(Arrays.asList("id", "text"))
    +        .build());
    +
    +
    // Match entities with `keyword1` or `keyword2`
    +const filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
    +
    +// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field
    +const result = await client.search(
    +    collection_name: "YOUR_COLLECTION_NAME", // Your collection name
    +    anns_field: "embeddings", // Vector field name
    +    data: [query_vector], // Query vector
    +    filter: filter,
    +    params: {"nprobe": 10},
    +    limit: 10, // Max. number of results to return
    +    output_fields: ["id", "text"] //Fields to return
    +);
    +
    +
    export filter="\"TEXT_MATCH(text, 'keyword1 keyword2')\""
    +
    +export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "collectionName": "demo2",
    +    "annsField": "my_vector",
    +    "data": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],
    +    "filter": '"$filter"',
    +    "searchParams": {
    +        "params": {
    +            "nprobe": 10
    +        }
    +    },
    +    "limit": 3,
    +    "outputFields": ["text","id"]
    +}'
    +
    +

    Abfrage mit Textabgleich

    Die Textübereinstimmung kann auch für die skalare Filterung in Abfrageoperationen verwendet werden. Durch die Angabe eines TEXT_MATCH -Ausdrucks im expr -Parameter der query() -Methode können Sie Dokumente abrufen, die mit den angegebenen Begriffen übereinstimmen.

    +

    Im folgenden Beispiel werden Dokumente abgerufen, bei denen das Feld text die beiden Begriffe keyword1 und keyword2 enthält.

    +
    # Match entities with both `keyword1` and `keyword2`​
     filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')"​
     ​
    @@ -161,6 +357,39 @@ result = MilvusClient.query(​
         output_fields=["id", "text"]​
     )​
     
    +
    +
    String filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
    +
    +QueryResp queryResp = client.query(QueryReq.builder()
    +        .collectionName("YOUR_COLLECTION_NAME")
    +        .filter(filter)
    +        .outputFields(Arrays.asList("id", "text"))
    +        .build()
    +);
    +
    +
    // Match entities with both `keyword1` and `keyword2`
    +const filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
    +
    +const result = await client.query(
    +    collection_name: "YOUR_COLLECTION_NAME",
    +    filter: filter, 
    +    output_fields: ["id", "text"]
    +)
    +
    +
    export filter="\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\""
    +
    +export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "collectionName": "demo2",
    +    "filter": '"$filter"',
    +    "outputFields": ["id", "text"]
    +}'
     

    Überlegungen

      -
    • Das Aktivieren des Schlüsselwortabgleichs für ein Feld löst die Erstellung eines invertierten Indexes aus, der Speicherressourcen verbraucht. Berücksichtigen Sie die Auswirkungen auf den Speicherplatz, wenn Sie sich für die Aktivierung dieser Funktion entscheiden, da diese je nach Textgröße, eindeutigen Token und dem verwendeten Analysator variieren.

    • +
    • Die Aktivierung des Textabgleichs für ein Feld löst die Erstellung eines invertierten Indexes aus, der Speicherressourcen verbraucht. Berücksichtigen Sie die Auswirkungen auf den Speicherplatz, wenn Sie sich für die Aktivierung dieser Funktion entscheiden, da diese je nach Textgröße, eindeutigen Token und dem verwendeten Analysator variieren.

    • Sobald Sie einen Analyzer in Ihrem Schema definiert haben, werden seine Einstellungen für diese Sammlung dauerhaft. Wenn Sie entscheiden, dass ein anderes Analyseprogramm besser zu Ihren Anforderungen passt, können Sie die vorhandene Sammlung löschen und eine neue Sammlung mit der gewünschten Analysekonfiguration erstellen.

    diff --git a/localization/v2.5.x/site/de/userGuide/search-query-get/multi-vector-search.md b/localization/v2.5.x/site/de/userGuide/search-query-get/multi-vector-search.md index 282d7dbee..017f872c8 100644 --- a/localization/v2.5.x/site/de/userGuide/search-query-get/multi-vector-search.md +++ b/localization/v2.5.x/site/de/userGuide/search-query-get/multi-vector-search.md @@ -22,7 +22,7 @@ title: Hybride Suche >

    Die hybride Suche bezieht sich auf eine Suchmethode, die mehrere ANN-Suchen gleichzeitig durchführt, mehrere Ergebnissätze aus diesen ANN-Suchen neu ordnet und schließlich einen einzigen Ergebnissatz liefert. Die Verwendung von Hybrid Search kann die Suchgenauigkeit verbessern. Zilliz unterstützt die Durchführung von Hybrid Search in einer Sammlung mit mehreren Vektorfeldern.

    -

    Die hybride Suche wird am häufigsten in Szenarien mit spärlichen und dichten Vektorsuchen und multimodalen Suchen verwendet. In diesem Leitfaden wird anhand eines konkreten Beispiels gezeigt, wie eine hybride Suche in Zilliz durchgeführt werden kann.

    +

    Die hybride Suche wird am häufigsten in Szenarien mit spärlichen und dichten Vektorsuchen und multimodalen Suchen verwendet. In diesem Leitfaden wird anhand eines konkreten Beispiels gezeigt, wie eine Hybrid-Suche in Zilliz durchgeführt werden kann.

    Szenarien

    Die hybride Suche eignet sich für die folgenden beiden Szenarien.

    Sparse-Dense-Vektorsuche

    Verschiedene Arten von Vektoren können unterschiedliche Informationen repräsentieren, und die Verwendung verschiedener Einbettungsmodelle kann verschiedene Merkmale und Aspekte der Daten umfassender darstellen. Zum Beispiel kann die Verwendung verschiedener Einbettungsmodelle für denselben Satz einen dichten Vektor zur Darstellung der semantischen Bedeutung und einen spärlichen Vektor zur Darstellung der Worthäufigkeit im Satz erzeugen.

      -
    • Spärliche Vektoren: Spärliche Vektoren zeichnen sich durch ihre hohe Vektordimensionalität und das Vorhandensein von wenigen Werten ungleich Null aus. Aufgrund dieser Struktur eignen sie sich besonders gut für traditionelle Information Retrieval-Anwendungen. In den meisten Fällen entspricht die Anzahl der Dimensionen, die in spärlichen Vektoren verwendet werden, den verschiedenen Token in einer oder mehreren Sprachen. Jeder Dimension wird ein Wert zugewiesen, der die relative Bedeutung dieses Tokens innerhalb des Dokuments angibt. Diese Anordnung erweist sich als vorteilhaft für Aufgaben, die den Abgleich von Schlüsselwörtern beinhalten.

    • -
    • Dichte Vektoren: Dichte Vektoren sind von neuronalen Netzen abgeleitete Einbettungen. Wenn sie in einem geordneten Array angeordnet sind, erfassen diese Vektoren das semantische Wesen des Eingabetextes. Dichte Vektoren sind nicht auf die Textverarbeitung beschränkt; sie werden auch häufig in der Computer Vision verwendet, um die Semantik von visuellen Daten darzustellen. Diese dichten Vektoren, die in der Regel durch Modelle zur Texteinbettung erzeugt werden, zeichnen sich dadurch aus, dass die meisten oder alle Elemente ungleich Null sind. Daher eignen sich dichte Vektoren besonders gut für semantische Suchanwendungen, da sie auf der Grundlage der Vektordistanz die ähnlichsten Ergebnisse liefern können, selbst wenn es keine exakten Schlüsselwortübereinstimmungen gibt. Diese Fähigkeit ermöglicht nuanciertere und kontextbezogene Suchergebnisse und erfasst oft Beziehungen zwischen Konzepten, die bei schlagwortbasierten Ansätzen übersehen werden könnten.

    • +
    • Spärliche Vektoren: Spärliche Vektoren zeichnen sich durch ihre hohe Vektordimensionalität und das Vorhandensein von wenigen Nicht-Null-Werten aus. Aufgrund dieser Struktur eignen sie sich besonders gut für traditionelle Information Retrieval-Anwendungen. In den meisten Fällen entspricht die Anzahl der Dimensionen, die in spärlichen Vektoren verwendet werden, den verschiedenen Token in einer oder mehreren Sprachen. Jeder Dimension wird ein Wert zugewiesen, der die relative Bedeutung dieses Tokens innerhalb des Dokuments angibt. Dieses Layout erweist sich als vorteilhaft für Aufgaben, die einen Textabgleich beinhalten.

    • +
    • Dichte Vektoren: Dichte Vektoren sind von neuronalen Netzen abgeleitete Einbettungen. Wenn sie in einem geordneten Array angeordnet sind, erfassen diese Vektoren das semantische Wesen des Eingabetextes. Dichte Vektoren sind nicht auf die Textverarbeitung beschränkt; sie werden auch häufig in der Computer Vision verwendet, um die Semantik von visuellen Daten darzustellen. Diese dichten Vektoren, die in der Regel durch Modelle zur Texteinbettung erzeugt werden, zeichnen sich dadurch aus, dass die meisten oder alle Elemente ungleich Null sind. Daher eignen sich dichte Vektoren besonders gut für semantische Suchanwendungen, da sie auf der Grundlage der Vektordistanz die ähnlichsten Ergebnisse liefern können, selbst wenn es keine exakten Textübereinstimmungen gibt. Diese Fähigkeit ermöglicht nuanciertere und kontextbezogene Suchergebnisse, die oft Beziehungen zwischen Konzepten erfassen, die bei schlagwortbasierten Ansätzen übersehen werden könnten.

    Weitere Einzelheiten finden Sie unter Sparse Vector und Dense Vector.

    -

    Multimodale Suche

    Multimodale Suche bezieht sich auf die Ähnlichkeitssuche von unstrukturierten Daten über mehrere Modalitäten hinweg (wie Bilder, Videos, Audio, Text usw.). Eine Person kann beispielsweise durch verschiedene Modalitäten von Daten wie Fingerabdrücke, Stimmabdrücke und Gesichtsmerkmale dargestellt werden. Die hybride Suche unterstützt mehrere Suchvorgänge gleichzeitig. Zum Beispiel die Suche nach einer Person mit ähnlichen Fingerabdrücken und Stimmbildern.

    +

    Multimodale Suche

    Multimodale Suche bezieht sich auf die Ähnlichkeitssuche von unstrukturierten Daten über mehrere Modalitäten hinweg (wie Bilder, Videos, Audio, Text usw.). Eine Person kann beispielsweise durch verschiedene Modalitäten von Daten wie Fingerabdrücke, Stimmabdrücke und Gesichtsmerkmale dargestellt werden. Die hybride Suche unterstützt mehrere Suchvorgänge gleichzeitig. Zum Beispiel die Suche nach einer Person mit ähnlichen Fingerabdrücken und Stimmabdrücken.

    Arbeitsablauf

    Auf der Grundlage einer Indexdatei, in der die sortierte Reihenfolge der Vektoreinbettungen aufgezeichnet ist, findet die ANN-Suche (Approximate Nearest Neighbor) eine Untergruppe von Vektoreinbettungen auf der Grundlage des Abfragevektors in einer empfangenen Suchanfrage, vergleicht den Abfragevektor mit denen in der Untergruppe und liefert die ähnlichsten Ergebnisse. Mit der ANN-Suche bietet Milvus ein effizientes Sucherlebnis. Auf dieser Seite erfahren Sie, wie Sie grundlegende ANN-Suchen durchführen können.

    +

    Basierend auf einer Indexdatei, die die sortierte Reihenfolge der Vektoreinbettungen aufzeichnet, findet die ANN-Suche (Approximate Nearest Neighbor) eine Untergruppe von Vektoreinbettungen, die auf dem Abfragevektor in einer empfangenen Suchanfrage basiert, vergleicht den Abfragevektor mit denen in der Untergruppe und liefert die ähnlichsten Ergebnisse. Mit der ANN-Suche bietet Milvus ein effizientes Sucherlebnis. Auf dieser Seite erfahren Sie, wie Sie grundlegende ANN-Suchen durchführen können.

    Überblick

    Bei der ANN-Suche bezieht sich eine Ein-Vektor-Suche auf eine Suche, die nur einen Abfragevektor umfasst. Basierend auf dem vorgefertigten Index und dem metrischen Typ, der in der Suchanfrage enthalten ist, findet Milvus die Top-K Vektoren, die dem Abfragevektor am ähnlichsten sind.

    -

    In diesem Abschnitt erfahren Sie, wie Sie eine Ein-Vektor-Suche durchführen können. Das Code-Snippet geht davon aus, dass Sie eine Sammlung in einer Quick-Setup-Art erstellt haben. Die Suchanfrage enthält einen einzigen Abfragevektor und bittet Milvus, das Innere Produkt (IP) zu verwenden, um die Ähnlichkeit zwischen den Abfragevektoren und den Vektoren in der Sammlung zu berechnen und die drei ähnlichsten zurückzugeben.

    +

    In diesem Abschnitt erfahren Sie, wie Sie eine Ein-Vektor-Suche durchführen können. Das Code-Snippet geht davon aus, dass Sie eine Sammlung in einer Quick-Setup-Art erstellt haben. Die Suchanfrage enthält einen einzelnen Abfragevektor und bittet Milvus, das Innere Produkt (IP) zu verwenden, um die Ähnlichkeit zwischen den Abfragevektoren und den Vektoren in der Sammlung zu berechnen und die drei ähnlichsten zurückzugeben.

    from pymilvus import MilvusClient​
    @@ -775,7 +775,7 @@ curl --request POST \​
           
         

    Sie werden feststellen, dass der Parameter limit, der in den Suchanfragen enthalten ist, die Anzahl der Entitäten bestimmt, die in die Suchergebnisse aufgenommen werden. Dieser Parameter gibt die maximale Anzahl der Entitäten an, die in einer einzelnen Suche zurückgegeben werden sollen, und wird normalerweise als Top-K bezeichnet.

    Wenn Sie paginierte Suchanfragen durchführen möchten, können Sie eine Schleife verwenden, um mehrere Suchanfragen zu senden, wobei die Parameter Limit und Offset in jeder Suchanfrage enthalten sind. Insbesondere können Sie den Parameter Limit auf die Anzahl der Entitäten setzen, die Sie in die aktuellen Abfrageergebnisse aufnehmen möchten, und den Parameter Offset auf die Gesamtzahl der Entitäten, die bereits zurückgegeben wurden.

    -

    Die nachstehende Tabelle zeigt, wie Sie die Parameter Limit und Offset für paginierte Abfragen einstellen, wenn 100 Entitäten auf einmal zurückgegeben werden.

    +

    Die folgende Tabelle zeigt, wie Sie die Parameter Limit und Offset für paginierte Abfragen einstellen, wenn 100 Entitäten auf einmal zurückgegeben werden.

    Abfragen

    Zurückzugebende Entitäten pro Abfrage

    Bereits zurückgegebene Einträge insgesamt

    @@ -904,9 +904,9 @@ curl --request POST \​
  • Volltextsuche

    Die Volltextsuche ist eine Funktion, die Dokumente abruft, die bestimmte Begriffe oder Phrasen in Textdatensätzen enthalten, und dann die Ergebnisse nach Relevanz einstuft. Diese Funktion überwindet die Einschränkungen der semantischen Suche, bei der präzise Begriffe übersehen werden können, und stellt sicher, dass Sie die genauesten und kontextrelevanten Ergebnisse erhalten. Darüber hinaus vereinfacht sie die Vektorsuche, indem sie Rohtexteingaben akzeptiert und Ihre Textdaten automatisch in spärliche Einbettungen konvertiert, ohne dass Sie manuell Vektoreinbettungen erstellen müssen.

    Einzelheiten zur Volltextsuche finden Sie unter Volltextsuche.

  • -
  • Schlüsselwort-Abgleich

    -

    Der Schlüsselwortabgleich in Milvus ermöglicht die präzise Suche nach Dokumenten auf der Grundlage bestimmter Begriffe. Diese Funktion wird in erster Linie für die gefilterte Suche nach bestimmten Bedingungen verwendet und kann skalare Filter zur Verfeinerung der Abfrageergebnisse einbeziehen, so dass Ähnlichkeitssuchen innerhalb von Vektoren, die skalare Kriterien erfüllen, möglich sind.

    -

    Weitere Informationen zur Schlüsselwortsuche finden Sie unter Schlüsselwortsuche.

  • +
  • Textabgleich

    +

    Der Textabgleich in Milvus ermöglicht das präzise Auffinden von Dokumenten auf der Grundlage bestimmter Begriffe. Diese Funktion wird in erster Linie für die gefilterte Suche nach bestimmten Bedingungen verwendet und kann eine skalare Filterung zur Verfeinerung der Abfrageergebnisse beinhalten, die eine Ähnlichkeitssuche innerhalb von Vektoren ermöglicht, die skalare Kriterien erfüllen.

    +

    Einzelheiten zum Textabgleich finden Sie unter Textabgleich.

  • Partitionsschlüssel verwenden

    Die Einbeziehung mehrerer skalarer Felder in die Metadatenfilterung und die Verwendung einer recht komplizierten Filterbedingung können die Sucheffizienz beeinträchtigen. Wenn Sie ein skalares Feld als Partitionsschlüssel festlegen und eine Filterbedingung verwenden, die den Partitionsschlüssel in der Suchanfrage einbezieht, kann dies dazu beitragen, den Suchbereich auf die Partitionen zu beschränken, die den angegebenen Partitionsschlüsselwerten entsprechen.

    Einzelheiten zum Partitionsschlüssel finden Sie unter Partitionsschlüssel verwenden.

  • @@ -914,6 +914,6 @@ curl --request POST \​

    In Milvus ermöglichen memory-mapped Dateien die direkte Abbildung von Dateiinhalten in den Speicher. Diese Funktion verbessert die Speichereffizienz, insbesondere in Situationen, in denen der verfügbare Speicher knapp ist, aber ein vollständiges Laden der Daten nicht möglich ist. Dieser Optimierungsmechanismus kann die Datenkapazität erhöhen und gleichzeitig die Leistung bis zu einer bestimmten Grenze sicherstellen; wenn jedoch die Datenmenge den Speicherplatz zu sehr übersteigt, kann die Such- und Abfrageleistung ernsthaft beeinträchtigt werden, weshalb Sie diese Funktion je nach Bedarf ein- oder ausschalten sollten.

    Einzelheiten zu den mmap-Einstellungen finden Sie unter Verwendung von mmap.

  • Clustering-Verdichtung

    -

    Clustering Compaction wurde entwickelt, um die Suchleistung zu verbessern und die Kosten in großen Sammlungen zu reduzieren. Dieser Leitfaden hilft Ihnen, die Clustering-Verdichtung zu verstehen und wie diese Funktion die Suchleistung verbessern kann.

    +

    Clustering Compaction wurde entwickelt, um die Suchleistung zu verbessern und die Kosten in großen Sammlungen zu reduzieren. Dieses Handbuch hilft Ihnen, die Clustering-Verdichtung zu verstehen und wie diese Funktion die Suchleistung verbessern kann.

    Einzelheiten zur Clustering-Kompaktierung finden Sie unter Clustering-Kompaktierung.

  • diff --git a/localization/v2.5.x/site/es/adminGuide/upgrade-pulsar-v3.json b/localization/v2.5.x/site/es/adminGuide/upgrade-pulsar-v3.json index d1486ff67..1248f5188 100644 --- a/localization/v2.5.x/site/es/adminGuide/upgrade-pulsar-v3.json +++ b/localization/v2.5.x/site/es/adminGuide/upgrade-pulsar-v3.json @@ -1 +1 @@ -{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n\"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\ncreationTimestamp: \"2024-11-22T08:06:59Z\"​\nfinalizers:​\n- milvus.milvus.io/finalizer​\ngeneration: 3​\nlabels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\ncomponents:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\ndependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\nenabled: false # set to false​\n# you may also clean up rest fields under pulsar field​\n# it's ok to keep them though.​\npulsarv3:​\nenabled: true​\n# append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations: null # this field should be removed or set to null​\nresourceVersion: null # this field should be removed or set to null​\nuid: null # this field should be removed or set to null​\nspec:​\ndependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Actualización de Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Hoja de ruta","href":"Roadmap","type":2,"isActive":false},{"label":"Procedimientos","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n \"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n ]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\n creationTimestamp: \"2024-11-22T08:06:59Z\"​\n finalizers:​\n - milvus.milvus.io/finalizer​\n generation: 3​\n labels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\n components:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\n dependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\n enabled: false # set to false​\n # you may also clean up rest fields under pulsar field​\n # it's ok to keep them though.​\npulsarv3:​\n enabled: true​\n # append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations: null # this field should be removed or set to null​\n resourceVersion: null # this field should be removed or set to null​\n uid: null # this field should be removed or set to null​\nspec:​\n dependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Actualización de Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Hoja de ruta","href":"Roadmap","type":2,"isActive":false},{"label":"Procedimientos","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/es/adminGuide/upgrade-pulsar-v3.md b/localization/v2.5.x/site/es/adminGuide/upgrade-pulsar-v3.md index 6d24c1e6d..4e7d29177 100644 --- a/localization/v2.5.x/site/es/adminGuide/upgrade-pulsar-v3.md +++ b/localization/v2.5.x/site/es/adminGuide/upgrade-pulsar-v3.md @@ -113,11 +113,11 @@ Forwarding from 127.

    Salida.

    {​
    -"segmentIDs": [​
    +  "segmentIDs": [​
         454097953998181000,​
         454097953999383600,​
         454097953998180800​
    -]​
    +  ]​
     }​
     
     
    @@ -141,7 +141,7 @@ Forwarding from 127.

    Detener Milvus y borrar Pulsar V2

    En este paso, necesita detener el pod Milvus y eliminar el despliegue Pulsar V2. Hay dos secciones separadas disponibles:

    • Para usuarios de Milvus Helm

      -

      Si ha instalado Milvus utilizando la tabla de Milvus Helm, vaya a Para usuarios de Helm.

    • +

      Si ha instalado Milvus utilizando la tabla Milvus Helm, vaya a Para usuarios de Helm.

    • Para usuarios de Milvus Operator

      Si ha instalado Milvus utilizando el Milvus Operator, vaya a Para usuarios de Milvus Operator.

    @@ -223,15 +223,15 @@ head milvus.yaml -n 20
    apiVersion: milvus.io/v1beta1​
     kind: Milvus​
     metadata:​
    -annotations:​
    +  annotations:​
         milvus.io/dependency-values-merged: "true"​
         milvus.io/pod-service-label-added: "true"​
         milvus.io/querynode-current-group-id: "0"​
    -creationTimestamp: "2024-11-22T08:06:59Z"​
    -finalizers:​
    -- milvus.milvus.io/finalizer​
    -generation: 3​
    -labels:​
    +  creationTimestamp: "2024-11-22T08:06:59Z"​
    +  finalizers:​
    +  - milvus.milvus.io/finalizer​
    +  generation: 3​
    +  labels:​
         app: milvus​
         milvus.io/operator-version: 1.1.2​
     name: my-release​
    @@ -239,23 +239,23 @@ namespace: default​
     resourceVersion: "692217324"​
     uid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​
     spec:​
    -components:​
    +  components:​
     
     
  • Cree un archivo patch.yaml con el siguiente contenido.

    # a patch to retain etcd & storage data and delete pulsar data while delete milvus​
     spec:​
    -dependencies:​
    +  dependencies:​
         etcd:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         storage:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         pulsar:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Delete​
             pvcDeletion: true​
     
    @@ -302,12 +302,12 @@ milvus.milvus.io "my-release" deleted
     
  • Edite el values.yaml guardado en el paso anterior.

    # change the following:​
     pulsar:​
    -enabled: false # set to false​
    -# you may also clean up rest fields under pulsar field​
    -# it's ok to keep them though.​
    +  enabled: false # set to false​
    +  # you may also clean up rest fields under pulsar field​
    +  # it's ok to keep them though.​
     pulsarv3:​
    -enabled: true​
    -# append other values for pulsar v3 chart if needs​
    +  enabled: true​
    +  # append other values for pulsar v3 chart if needs​
     
     
  • Actualice su helm repo local

    @@ -368,13 +368,13 @@ my-release-pulsarv3-zookeeper-2 < apiVersion: milvus.io/v1beta1​ kind: Milvus​ metadata:​ -annotations: null # this field should be removed or set to null​ -resourceVersion: null # this field should be removed or set to null​ -uid: null # this field should be removed or set to null​ + annotations: null # this field should be removed or set to null​ + resourceVersion: null # this field should be removed or set to null​ + uid: null # this field should be removed or set to null​ spec:​ -dependencies:​ + dependencies:​ pulsar:​ - inCluster:​ + inCluster:​ chartVersion: pulsar-v3​ # delete all previous values for pulsar v2 and set it to null.​ # you may add additional values here for pulsar v3 if you're sure about it.​ diff --git a/localization/v2.5.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.5.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.json index ac7d6087d..d248b83ab 100644 --- a/localization/v2.5.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.5.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Ejecutar Milvus en Kubernetes con Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Visión general","href":"Overview","type":2,"isActive":false},{"label":"Requisitos previos","href":"Prerequisites","type":2,"isActive":false},{"label":"Instalar Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Instalación en línea","href":"Online-install","type":2,"isActive":false},{"label":"Instalación fuera de línea","href":"Offline-install","type":2,"isActive":false},{"label":"Actualice el cluster Milvus en ejecución","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Desinstalar Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Lo que sigue","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Ejecutar Milvus en Kubernetes con Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Visión general","href":"Overview","type":2,"isActive":false},{"label":"Requisitos previos","href":"Prerequisites","type":2,"isActive":false},{"label":"Instalar Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Instalación en línea","href":"Online-install","type":2,"isActive":false},{"label":"Instalación fuera de línea","href":"Offline-install","type":2,"isActive":false},{"label":"Actualice el cluster Milvus en ejecución","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Desinstalar Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Lo que sigue","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.5.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.md index 23b6f356d..aa4ade3b9 100644 --- a/localization/v2.5.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.5.x/site/es/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -62,7 +62,7 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI standard (default) k8s.io/minikube-hostpath Delete Immediate false
  • Compruebe los requisitos de hardware y software antes de la instalación.

  • -
  • Antes de instalar Milvus, se recomienda utilizar Milvus Sizing Tool para estimar los requisitos de hardware basándose en el tamaño de sus datos. Esto ayuda a garantizar un rendimiento y una asignación de recursos óptimos para su instalación de Milvus.

  • +
  • Antes de instalar Milvus, se recomienda utilizar Milvus Sizing Tool para estimar los requisitos de hardware basados en el tamaño de sus datos. Esto ayuda a garantizar un rendimiento y una asignación de recursos óptimos para su instalación de Milvus.

  • Si encuentra algún problema al tirar de la imagen, póngase en contacto con nosotros en community@zilliz.com con detalles sobre el problema, y le proporcionaremos el soporte necesario.

    @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

    Antes de instalar Milvus Helm Charts, necesita añadir el repositorio Milvus Helm.

    -
    $ helm repo add milvus https://github.com/zilliztech/milvus-helm
    +
    $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
     

    El repositorio de Milvus Helm Charts en https://github.com/milvus-io/milvus-helm ha sido archivado y puede obtener más actualizaciones en https://github.com/zilliztech/milvus-helm como se indica a continuación:

    -
    helm repo add zilliztech https://github.com/zilliztech/milvus-helm
    +
    helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
     helm repo update
     # upgrade existing helm release
     helm upgrade my-release zilliztech/milvus
    diff --git a/localization/v2.5.x/site/es/home/home.md b/localization/v2.5.x/site/es/home/home.md
    index ddfe6cc3e..13ed95db0 100644
    --- a/localization/v2.5.x/site/es/home/home.md
    +++ b/localization/v2.5.x/site/es/home/home.md
    @@ -114,7 +114,7 @@ Aquí aprenderá qué es Milvus y cómo instalar, utilizar y desplegar Milvus pa
         

    Nov 2024 - Lanzamiento de Milvus 2.5.0

    • Añadida guía sobre cómo realizar búsquedas de texto completo.
    • -
    • Se ha añadido una guía sobre cómo realizar coincidencias de palabras clave.
    • +
    • Se ha añadido una guía sobre cómo realizar una coincidencia de texto.
    • Añadida guía sobre cómo habilitar valores nulos y por defecto.
    • Se han añadido descripciones de los analizadores.
    • Añadidas descripciones de los índices de mapa de bits.
    • diff --git a/localization/v2.5.x/site/es/menuStructure/es.json b/localization/v2.5.x/site/es/menuStructure/es.json index dd13d979f..08c331af4 100644 --- a/localization/v2.5.x/site/es/menuStructure/es.json +++ b/localization/v2.5.x/site/es/menuStructure/es.json @@ -286,6 +286,18 @@ "order": 8, "children": [] }, + { + "label": "Tipos métricos", + "id": "metric.md", + "order": 9, + "children": [] + }, + { + "label": "Nivel de coherencia", + "id": "consistency.md", + "order": 10, + "children": [] + }, { "label": "Réplica en memoria", "id": "replica.md", @@ -602,31 +614,39 @@ ] }, { - "label": "Gestionar índices", + "label": "Índices", "id": "manage_indexes", "order": 4, "isMenu": true, "children": [ { - "label": "Campos vectoriales de índice", + "label": "Índices vectoriales", "id": "index-vector-fields.md", "order": 0, "children": [] }, { - "label": "Índice Campos escalares", - "id": "index-scalar-fields.md", + "label": "Índices escalares", + "id": "scalar-index", "order": 1, - "children": [] - }, - { - "label": "Índice BITMAP", - "id": "bitmap.md", - "order": 2, - "children": [] + "isMenu": true, + "children": [ + { + "label": "Índice Campos escalares", + "id": "index-scalar-fields.md", + "order": 1, + "children": [] + }, + { + "label": "Índice de mapa de bits", + "id": "bitmap.md", + "order": 2, + "children": [] + } + ] }, { - "label": "Índice con GPU", + "label": "Índices habilitados para GPU", "id": "index-with-gpu.md", "order": 3, "children": [] @@ -682,7 +702,7 @@ "children": [] }, { - "label": "Coincidencia de palabras clave", + "label": "Coincidencia de texto", "id": "keyword-match.md", "order": 7, "children": [] @@ -699,30 +719,6 @@ "order": 9, "children": [] }, - { - "label": "Utilizar mmap", - "id": "mmap.md", - "order": 10, - "children": [] - }, - { - "label": "Agrupación Compactación", - "id": "clustering-compaction.md", - "order": 11, - "children": [] - }, - { - "label": "Nivel de coherencia", - "id": "consistency.md", - "order": 12, - "children": [] - }, - { - "label": "Tipos métricos", - "id": "metric.md", - "order": 13, - "children": [] - }, { "label": "Filtrado de metadatos", "id": "boolean.md", @@ -736,26 +732,6 @@ "children": [] } ] - }, - { - "label": "Importación de datos", - "id": "data_import", - "order": 6, - "isMenu": true, - "children": [ - { - "label": "Preparar los datos de origen", - "id": "prepare-source-data.md", - "order": 0, - "children": [] - }, - { - "label": "Importar datos", - "id": "import-data.md", - "order": 1, - "children": [] - } - ] } ] }, @@ -897,11 +873,31 @@ } ] }, + { + "label": "Importación de datos", + "id": "data_import", + "order": 5, + "isMenu": true, + "children": [ + { + "label": "Preparar los datos de origen", + "id": "prepare-source-data.md", + "order": 0, + "children": [] + }, + { + "label": "Importar datos", + "id": "import-data.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Migración Milvus", "id": "milvus_migration", "isMenu": true, - "order": 5, + "order": 6, "children": [ { "label": "Visión general", @@ -998,7 +994,7 @@ "children": [] }, { - "label": "Despliegue en GCP", + "label": "Implantación en GCP", "id": "gcp.md", "order": 2, "children": [] @@ -1299,7 +1295,7 @@ "isMenu": true, "children": [ { - "label": "Rastreo de Jaeger", + "label": "Seguimiento de Jaeger", "id": "config_jaeger_tracing.md", "order": 0, "children": [] @@ -1321,10 +1317,30 @@ } ] }, + { + "label": "Optimización del almacenamiento", + "id": "storage_optimization", + "order": 10, + "isMenu": true, + "children": [ + { + "label": "Utilizar mmap", + "id": "mmap.md", + "order": 0, + "children": [] + }, + { + "label": "Agrupación Compactación", + "id": "clustering-compaction.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Seguridad", "id": "security", - "order": 10, + "order": 11, "isMenu": true, "children": [ { diff --git a/localization/v2.5.x/site/es/release_notes.md b/localization/v2.5.x/site/es/release_notes.md index 71e2dd82e..d2200870d 100644 --- a/localization/v2.5.x/site/es/release_notes.md +++ b/localization/v2.5.x/site/es/release_notes.md @@ -50,7 +50,7 @@ title: Notas de la versión

      WebUI de gestión de clústeres (Beta)

      Para soportar mejor los datos masivos y las ricas funciones, el sofisticado diseño de Milvus incluye varias dependencias, numerosos roles de nodo, estructuras de datos complejas y mucho más. Estos aspectos pueden plantear retos de uso y mantenimiento.

      Milvus 2.5 introduce una WebUI integrada de gestión de clústeres, que reduce la dificultad de mantenimiento del sistema mediante la visualización de la compleja información del entorno de ejecución de Milvus. Esto incluye detalles de bases de datos y colecciones, segmentos, canales, dependencias, estado de salud de los nodos, información de tareas, consultas lentas, etc.

      Coincidencia de texto

      Milvus 2.5 aprovecha los analizadores y la indexación de Tantivy para el preprocesamiento de textos y la creación de índices, lo que permite una correspondencia precisa en lenguaje natural de datos de texto basados en términos específicos. Esta función se utiliza principalmente para la búsqueda filtrada para satisfacer condiciones específicas y puede incorporar el filtrado escalar para refinar los resultados de la consulta, permitiendo búsquedas de similitud dentro de vectores que cumplen criterios escalares.

      -

      Para obtener más información, consulte Coincidencia de palabras clave.

      +

      Para obtener más información, consulte Coincidencia de texto.

      Índice de mapa de bits

      Se ha añadido un nuevo índice de datos escalares a la familia Milvus. El índice BitMap utiliza una matriz de bits, de longitud igual al número de filas, para representar la existencia de valores y acelerar las búsquedas.

      Los índices Bitmap han sido tradicionalmente eficaces para los campos de baja cardinalidad, que tienen un número modesto de valores distintos, por ejemplo, una columna que contiene información sobre el sexo con sólo dos valores posibles: masculino y femenino.

      Para obtener más información, consulte Índice de mapa de bits.

      diff --git a/localization/v2.5.x/site/es/tutorials/hybrid_search_with_milvus.md b/localization/v2.5.x/site/es/tutorials/hybrid_search_with_milvus.md index bc77a0055..d3288b847 100644 --- a/localization/v2.5.x/site/es/tutorials/hybrid_search_with_milvus.md +++ b/localization/v2.5.x/site/es/tutorials/hybrid_search_with_milvus.md @@ -25,7 +25,7 @@ title: Búsqueda híbrida con Milvus

      Milvus admite métodos de recuperación densos, dispersos e híbridos:

      • Recuperación densa: Utiliza el contexto semántico para comprender el significado de las consultas.
      • -
      • Recuperación dispersa: Hace hincapié en la concordancia de palabras clave para encontrar resultados basados en términos específicos, lo que equivale a una búsqueda de texto completo.
      • +
      • Recuperación dispersa: Hace hincapié en la concordancia de textos para encontrar resultados basados en términos específicos, lo que equivale a una búsqueda de texto completo.
      • Recuperación híbrida: Combina los enfoques Dense y Sparse, capturando el contexto completo y las palabras clave específicas para obtener resultados de búsqueda completos.

      Al integrar estos métodos, la búsqueda híbrida de Milvus equilibra las similitudes semánticas y léxicas, mejorando la relevancia global de los resultados de la búsqueda. Este cuaderno mostrará el proceso de configuración y uso de estas estrategias de recuperación, destacando su eficacia en varios escenarios de búsqueda.

      diff --git a/localization/v2.5.x/site/es/userGuide/collections/manage-collections.md b/localization/v2.5.x/site/es/userGuide/collections/manage-collections.md index b0efd24c9..79235df40 100644 --- a/localization/v2.5.x/site/es/userGuide/collections/manage-collections.md +++ b/localization/v2.5.x/site/es/userGuide/collections/manage-collections.md @@ -161,7 +161,7 @@ title: Explicación de las colecciones
    • Iterador de búsqueda

    • Consulta

    • Búsqueda de texto completo

    • -
    • Coincidencia de palabras clave

    • +
    • Coincidencia de texto

    Además, Milvus también proporciona mejoras para aumentar el rendimiento y la eficacia de la búsqueda. Están desactivadas por defecto, y usted puede activarlas y utilizarlas según sus necesidades de servicio. Son las siguientes

      diff --git a/localization/v2.5.x/site/es/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.5.x/site/es/userGuide/manage-indexes/index-with-gpu.md index 5e91c638c..d0f6d4de9 100644 --- a/localization/v2.5.x/site/es/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.5.x/site/es/userGuide/manage-indexes/index-with-gpu.md @@ -203,8 +203,8 @@ collection.search(

      Cuando utilice índices GPU, tenga en cuenta ciertas restricciones:

        -
      • Para GPU_IVF_FLAT, el valor máximo para limit es 256.

      • -
      • Para GPU_IVF_PQ y GPU_CAGRA, el valor máximo de limit es 1024.

      • +
      • Para GPU_IVF_FLAT, el valor máximo de límite es 1024.

      • +
      • Para GPU_IVF_PQ y GPU_CAGRA, el valor máximo para limit es 1024.

      • Aunque no hay un límite establecido para GPU_BRUTE_FORCE, se recomienda no superar los 4096 para evitar posibles problemas de rendimiento.

      • Actualmente, los índices GPU no soportan la distancia COSINE. Si se requiere la distancia COSINE, los datos deben ser normalizados en primer lugar, y luego la distancia de producto interno (IP) se puede utilizar como sustituto.

      • La carga de la protección OOM para los índices de la GPU no está totalmente soportada, demasiados datos pueden provocar fallos en el QueryNode.

      • diff --git a/localization/v2.5.x/site/es/userGuide/schema/analyzer/analyzer-overview.md b/localization/v2.5.x/site/es/userGuide/schema/analyzer/analyzer-overview.md index ae04ccd10..371ad5066 100644 --- a/localization/v2.5.x/site/es/userGuide/schema/analyzer/analyzer-overview.md +++ b/localization/v2.5.x/site/es/userGuide/schema/analyzer/analyzer-overview.md @@ -24,12 +24,12 @@ summary: >- >

        En el tratamiento de textos, un analizador es un componente crucial que convierte el texto en bruto en un formato estructurado que permite realizar búsquedas. Cada analizador suele constar de dos elementos básicos: un tokenizador y un filtro. Juntos, transforman el texto de entrada en tokens, refinan estos tokens y los preparan para una indexación y recuperación eficientes.

        -

        En Milvus, los analizadores se configuran durante la creación de la colección cuando se añaden los campos VARCHAR al esquema de la colección. Los tokens producidos por un analizador pueden utilizarse para construir un índice para la concordancia de palabras clave o convertirse en incrustaciones dispersas para la búsqueda de texto completo. Para obtener más información, consulte Coincidencia de palabras clave o Búsqueda de texto completo.

        +

        En Milvus, los analizadores se configuran durante la creación de la colección cuando se añaden los campos VARCHAR al esquema de la colección. Los tokens producidos por un analizador pueden utilizarse para construir un índice para la correspondencia de texto o convertirse en incrustaciones dispersas para la búsqueda de texto completo. Para obtener más información, consulte Coincidencia de texto o Búsqueda de texto completo.

        El uso de analizadores puede afectar al rendimiento.

          -
        • Búsqueda de texto completo: Para la búsqueda de texto completo, los canales DataNode y QueryNode consumen datos más lentamente porque deben esperar a que se complete la tokenización. Como resultado, los datos recién ingestados tardan más en estar disponibles para la búsqueda.

        • -
        • Coincidencia de palabras clave: En el caso de la concordancia de palabras clave, la creación de índices también es más lenta, ya que la tokenización debe finalizar antes de que se pueda crear un índice.

        • +
        • Búsqueda de texto completo: Para la búsqueda de texto completo, los canales DataNode y QueryNode consumen datos más lentamente porque deben esperar a que se complete la tokenización. Como resultado, los datos recién ingresados tardan más en estar disponibles para la búsqueda.

        • +
        • Coincidencia de texto: Para la coincidencia de texto, la creación de índices también es más lenta, ya que la tokenización debe finalizar antes de que se pueda crear un índice.

        Anatomía de un analizador

    -

    Para obtener más información sobre los parámetros de búsqueda por similitud, consulte Búsqueda básica de RNA.

    +

    Para obtener más información sobre los parámetros de búsqueda de similitudes, consulte Búsqueda básica de RNA.

    +

    Límites

    Cuando utilice vectores dispersos en Milvus, tenga en cuenta los siguientes límites:

    +
      +
    • Actualmente, sólo se admite la métrica de distancia IP para vectores dispersos. La alta dimensionalidad de los vectores dispersos hace que las distancias L2 y coseno sean poco prácticas.

    • +
    • Para los campos de vectores dispersos, sólo se admiten los tipos de índice SPARSE_INVERTED_INDEX y SPARSE_WAND.

    • +
    • Tipos de datos admitidos para vectores dispersos:

      +
        +
      • La parte de dimensión debe ser un entero de 32 bits sin signo;
      • +
      • La parte de valor puede ser un número de coma flotante de 32 bits no negativo.
      • +
    • +
    • Los vectores dispersos deben cumplir los siguientes requisitos para la inserción y la búsqueda:

      +
        +
      • Al menos un valor del vector es distinto de cero;
      • +
      • Los índices del vector no son negativos.
      • +
    • +
    +

    FAQ

      +
    • ¿Puede explicar la diferencia entre SPARSE_INVERTED_INDEX y SPARSE_WAND, y cómo puedo elegir entre ellos?

      +

      SPARSE_INVERTED_INDEX es un índice invertido tradicional, mientras que SPARSE_WAND utiliza el algoritmo Weak-AND para reducir el número de evaluaciones de distancia IP completa durante la búsqueda. SPARSE_WAND suele ser más rápido, pero su rendimiento puede disminuir al aumentar la densidad del vector. Para elegir entre ellos, realice experimentos y pruebas comparativas basadas en su conjunto de datos y caso de uso específicos.

    • +
    • ¿Cómo debo elegir los parámetros drop_ratio_build y drop_ratio_search?

      +

      La elección de drop_ratio_build y drop_ratio_search depende de las características de los datos y de los requisitos de latencia/rendimiento y precisión de la búsqueda.

    • +
    • ¿Puede la dimensión de una incrustación dispersa ser cualquier valor discreto dentro del espacio uint32?

      +

      Sí, con una excepción. La dimensión de una incrustación dispersa puede ser cualquier valor en el intervalo de [0, maximum of uint32). Esto significa que no se puede utilizar el valor máximo de uint32.

    • +
    • ¿Las búsquedas en segmentos crecientes se realizan a través de un índice o por fuerza bruta?

      +

      Las búsquedas en segmentos crecientes se realizan a través de un índice del mismo tipo que el índice de segmento sellado. Para nuevos segmentos crecientes antes de que se construya el índice, se utiliza una búsqueda por fuerza bruta.

    • +
    • ¿Es posible tener vectores dispersos y densos en una misma colección?

      +

      Sí, con el soporte de tipos de vectores múltiples, puede crear colecciones con columnas de vectores tanto dispersos como densos y realizar búsquedas híbridas en ellas.

    • +
    diff --git a/localization/v2.5.x/site/es/userGuide/search-query-get/boolean.md b/localization/v2.5.x/site/es/userGuide/search-query-get/boolean.md index adacaf1f3..eef4ff25d 100644 --- a/localization/v2.5.x/site/es/userGuide/search-query-get/boolean.md +++ b/localization/v2.5.x/site/es/userGuide/search-query-get/boolean.md @@ -701,9 +701,9 @@ curl --request POST \​

    Match operators​

    Match operators include:​

    • like: Match constants or prefixes (prefix%), infixes (%infix%), and suffixes (%suffix) within constants. It relies on a brute-force search mechanism using wildcards and does not involve text tokenization. While it can achieve exact matches, its query efficiency is relatively low, making it suitable for simple matching tasks or queries on smaller datasets.​

    • -
    • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​ -

      -

      To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to ​Keyword Match.​

      +
    • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​

      +

      +

      To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to Text Match.​

    Example 1: Apply filter on scalar field​

    The following example demonstrates how to filter products whose color is red. In this case, you can quickly filter all red products by matching the prefix 'red%’. Similarly, you can use the expression color in ['red_7025’, 'red_4794’, ‘red_9392’] to filter all red products. However, when the data is more complex, we recommend using the like operator for more efficient filtering.​

    @@ -858,8 +858,8 @@ curl --request POST \​ ]​
    -

    Example 3: Keyword match on VARCHAR fields​

    The TEXT_MATCH expression is used for keyword match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to ​Keyword Match.​

    -

    The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the keyword "Apple" or "iPhone":​

    +

    Example 3: Text match on VARCHAR fields​

    The TEXT_MATCH expression is used for text match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to Text Match.​

    +

    The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the term "Apple" or "iPhone":​

    Python Java diff --git a/localization/v2.5.x/site/es/userGuide/search-query-get/full-text-search.json b/localization/v2.5.x/site/es/userGuide/search-query-get/full-text-search.json index 00a0341b5..8b35ce3a3 100644 --- a/localization/v2.5.x/site/es/userGuide/search-query-get/full-text-search.json +++ b/localization/v2.5.x/site/es/userGuide/search-query-get/full-text-search.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","MilvusClient.insert('demo', [​\n {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​\n {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​\n {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​\n])​\n\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['Who started AI research?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Búsqueda de texto completo","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Descripción general","href":"Overview​","type":2,"isActive":false},{"label":"Crear una colección para la búsqueda de texto completo","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Inserte los datos de texto","href":"Insert-text-data","type":2,"isActive":false},{"label":"Búsqueda de texto completo","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .build();\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"sparse\")\n .dataType(DataType.SparseFloatVector)\n .build());\n","import { MilvusClient, DataType } from \"@zilliz/milvus2-sdk-node\";\n\nconst address = \"http://localhost:19530\";\nconst token = \"root:Milvus\";\nconst client = new MilvusClient({address, token});\nconst schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n\nconsole.log(res.results)\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","import io.milvus.common.clientenum.FunctionType;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq.Function;\n\nimport java.util.*;\n\nschema.addFunction(Function.builder()\n .functionType(FunctionType.BM25)\n .name(\"text_bm25_emb\")\n .inputFieldNames(Collections.singletonList(\"text\"))\n .outputFieldNames(Collections.singletonList(\"vector\"))\n .build());\n","const functions = [\n {\n name: 'text_bm25_emb',\n description: 'bm25 function',\n type: FunctionType.BM25,\n input_field_names: ['text'],\n output_field_names: ['vector'],\n params: {},\n },\n];\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ],\n \"functions\": [\n {\n \"name\": \"text_bm25_emb\",\n \"type\": \"BM25\",\n \"inputFieldNames\": [\"text\"],\n \"outputFieldNames\": [\"sparse\"],\n \"params\": {}\n }\n ]\n }'\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","import io.milvus.v2.common.IndexParam;\n\nList indexes = new ArrayList<>();\nindexes.add(IndexParam.builder()\n .fieldName(\"sparse\")\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)\n .metricType(IndexParam.MetricType.BM25)\n .build());\n","const index_params = [\n {\n fieldName: \"sparse\",\n metricType: \"BM25\",\n indexType: \"AUTOINDEX\",\n },\n];\n","export indexParams='[\n {\n \"fieldName\": \"sparse\",\n \"metricType\": \"BM25\",\n \"indexType\": \"AUTOINDEX\"\n }\n ]'\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()\n .collectionName(\"demo\")\n .collectionSchema(schema)\n .indexParams(indexes)\n .build();\nclient.createCollection(requestCreate);\n","await client.create_collection(\n collection_name: 'demo', \n schema: schema, \n index_params: index_params\n);\n","export CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d \"{\n \\\"collectionName\\\": \\\"demo\\\",\n \\\"schema\\\": $schema,\n \\\"indexParams\\\": $indexParams\n}\"\n","client.insert('demo', [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n])\n\n","import com.google.gson.Gson;\nimport com.google.gson.JsonObject;\n\nimport io.milvus.v2.service.vector.request.InsertReq;\n\nGson gson = new Gson();\nList rows = Arrays.asList(\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval is a field of study.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval focuses on finding relevant information in large datasets.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"data mining and information retrieval overlap in research.\\\"}\", JsonObject.class)\n);\n\nclient.insert(InsertReq.builder()\n .collectionName(\"demo\")\n .data(rows)\n .build());\n","await client.insert({\ncollection_name: 'demo', \ndata: [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n]);\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"data\": [\n {\"text\": \"information retrieval is a field of study.\"},\n {\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"},\n {\"text\": \"data mining and information retrieval overlap in research.\"} \n ],\n \"collectionName\": \"demo\"\n}'\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['whats the focus of information retrieval?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n","import io.milvus.v2.service.vector.request.SearchReq;\nimport io.milvus.v2.service.vector.request.data.EmbeddedText;\nimport io.milvus.v2.service.vector.response.SearchResp;\n\nMap searchParams = new HashMap<>();\nsearchParams.put(\"drop_ratio_search\", 0.6);\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"demo\")\n .data(Collections.singletonList(new EmbeddedText(\"whats the focus of information retrieval?\")))\n .annsField(\"sparse\")\n .topK(3)\n .searchParams(searchParams)\n .outputFields(Collections.singletonList(\"text\"))\n .build());\n","await client.search(\n collection_name: 'demo', \n data: ['whats the focus of information retrieval?'],\n anns_field: 'sparse',\n limit: 3,\n params: {'drop_ratio_search': 0.6},\n)\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"demo\",\n \"data\": [\n \"whats the focus of information retrieval?\"\n ],\n \"annsField\": \"sparse\",\n \"limit\": 3,\n \"outputFields\": [\n \"text\"\n ],\n \"searchParams\":{\n \"params\":{\n \"drop_ratio_search\":0.6\n }\n }\n}'\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Búsqueda de texto completo","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Descripción general","href":"Overview​","type":2,"isActive":false},{"label":"Crear una colección para la búsqueda de texto completo","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Insertar datos de texto","href":"Insert-text-data","type":2,"isActive":false},{"label":"Búsqueda de texto completo","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/es/userGuide/search-query-get/full-text-search.md b/localization/v2.5.x/site/es/userGuide/search-query-get/full-text-search.md index 45a6bcb76..a1ae67d5c 100644 --- a/localization/v2.5.x/site/es/userGuide/search-query-get/full-text-search.md +++ b/localization/v2.5.x/site/es/userGuide/search-query-get/full-text-search.md @@ -82,6 +82,8 @@ summary: >-
  • Un campo SPARSE_FLOAT_VECTOR reservado para almacenar incrustaciones dispersas que Milvus generará automáticamente para el campo VARCHAR.

  • Definir el esquema de la colección

    En primer lugar, cree el esquema y añada los campos necesarios.

    +
    from pymilvus import MilvusClient, DataType, Function, FunctionType​
     ​
     schema = MilvusClient.create_schema()​
    @@ -90,6 +92,80 @@ schema.add_field(field_name="id", dat
     schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​
     schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)​
     
    +
    +
    import io.milvus.v2.common.DataType;
    +import io.milvus.v2.service.collection.request.AddFieldReq;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
    +        .build();
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("id")
    +        .dataType(DataType.Int64)
    +        .isPrimaryKey(true)
    +        .autoID(true)
    +        .build());
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("text")
    +        .dataType(DataType.VarChar)
    +        .maxLength(1000)
    +        .enableAnalyzer(true)
    +        .build());
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("sparse")
    +        .dataType(DataType.SparseFloatVector)
    +        .build());
    +
    +
    import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
    +
    +const address = "http://localhost:19530";
    +const token = "root:Milvus";
    +const client = new MilvusClient({address, token});
    +const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +console.log(res.results)
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ]
    +    }'
     

    En esta configuración

      @@ -98,6 +174,8 @@ schema.add_field(field_name="sparse",
    • sparseCampo vectorial : un campo vectorial reservado para almacenar incrustaciones dispersas generadas internamente para operaciones de búsqueda de texto completo. El tipo de datos debe ser SPARSE_FLOAT_VECTOR.

    Ahora, defina una función que convierta su texto en representaciones vectoriales dispersas y añádala al esquema.

    +
    bm25_function = Function(​
         name="text_bm25_emb", # Function name​
         input_field_names=["text"], # Name of the VARCHAR field containing raw text data​
    @@ -107,6 +185,62 @@ schema.add_field(field_name="sparse",
     ​
     schema.add_function(bm25_function)​
     
    +
    +
    import io.milvus.common.clientenum.FunctionType;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
    +
    +import java.util.*;
    +
    +schema.addFunction(Function.builder()
    +        .functionType(FunctionType.BM25)
    +        .name("text_bm25_emb")
    +        .inputFieldNames(Collections.singletonList("text"))
    +        .outputFieldNames(Collections.singletonList("vector"))
    +        .build());
    +
    +
    const functions = [
    +    {
    +      name: 'text_bm25_emb',
    +      description: 'bm25 function',
    +      type: FunctionType.BM25,
    +      input_field_names: ['text'],
    +      output_field_names: ['vector'],
    +      params: {},
    +    },
    +];
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ],
    +        "functions": [
    +            {
    +                "name": "text_bm25_emb",
    +                "type": "BM25",
    +                "inputFieldNames": ["text"],
    +                "outputFieldNames": ["sparse"],
    +                "params": {}
    +            }
    +        ]
    +    }'
     

    Parámetro

    Descripción

    @@ -124,6 +258,8 @@ schema.add_function(bm25_function)​

    Para colecciones con múltiples campos VARCHAR que requieran conversión de texto a vectores dispersos, añada funciones separadas al esquema de la colección, asegurándose de que cada función tiene un nombre y un valor output_field_names únicos.

    Configurar el índice

    Tras definir el esquema con los campos necesarios y la función incorporada, configure el índice para su colección. Para simplificar este proceso, utilice AUTOINDEX como index_type, una opción que permite a Milvus elegir y configurar el tipo de índice más adecuado en función de la estructura de sus datos.

    +
    index_params = MilvusClient.prepare_index_params()​
     ​
     index_params.add_index(​
    @@ -132,6 +268,31 @@ index_params.add_index(​
         metric_type="BM25"​
     )​
     
    +
    +
    import io.milvus.v2.common.IndexParam;
    +
    +List<IndexParam> indexes = new ArrayList<>();
    +indexes.add(IndexParam.builder()
    +        .fieldName("sparse")
    +        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
    +        .metricType(IndexParam.MetricType.BM25)
    +        .build());
    +
    +
    const index_params = [
    +  {
    +    fieldName: "sparse",
    +    metricType: "BM25",
    +    indexType: "AUTOINDEX",
    +  },
    +];
    +
    +
    export indexParams='[
    +        {
    +            "fieldName": "sparse",
    +            "metricType": "BM25",
    +            "indexType": "AUTOINDEX"
    +        }
    +    ]'
     

    Parámetro

    Descripción

    @@ -142,7 +303,9 @@ index_params.add_index(​

    metric_type

    El valor de este parámetro debe establecerse en BM25 específicamente para la funcionalidad de búsqueda de texto completo.

    -

    Cree la colección

    Cree ahora la colección utilizando los parámetros de esquema e índice definidos.

    +

    Cree la colección

    Ahora cree la colección utilizando los parámetros de esquema e índice definidos.

    +
    MilvusClient.create_collection(​
         collection_name='demo', ​
         schema=schema, ​
    @@ -150,7 +313,35 @@ index_params.add_index(​
     )​
     
     
    -

    Inserte los datos de texto +
    await client.create_collection(
    +    collection_name: 'demo', 
    +    schema: schema, 
    +    index_params: index_params
    +);
    +
    +
    export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d "{
    +    \"collectionName\": \"demo\",
    +    \"schema\": $schema,
    +    \"indexParams\": $indexParams
    +}"
    +
    +

    Insertar datos de texto

    Después de configurar la colección y el índice, está listo para insertar datos de texto. En este proceso, sólo tiene que proporcionar el texto sin procesar. La función incorporada que definimos anteriormente genera automáticamente el vector disperso correspondiente para cada entrada de texto.

    -
    MilvusClient.insert('demo', [​
    -    {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​
    -    {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​
    -    {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​
    -])​
    +    

    Después de configurar la colección y el índice, estás listo para insertar datos de texto. En este proceso, sólo necesitas proporcionar el texto en bruto. La función incorporada que definimos anteriormente genera automáticamente el vector disperso correspondiente para cada entrada de texto.

    + +
    client.insert('demo', [
    +    {'text': 'information retrieval is a field of study.'},
    +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
    +    {'text': 'data mining and information retrieval overlap in research.'},
    +])
     
    +
    +
    import com.google.gson.Gson;
    +import com.google.gson.JsonObject;
    +
    +import io.milvus.v2.service.vector.request.InsertReq;
    +
    +Gson gson = new Gson();
    +List<JsonObject> rows = Arrays.asList(
    +        gson.fromJson("{\"text\": \"information retrieval is a field of study.\"}", JsonObject.class),
    +        gson.fromJson("{\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"}", JsonObject.class),
    +        gson.fromJson("{\"text\": \"data mining and information retrieval overlap in research.\"}", JsonObject.class)
    +);
    +
    +client.insert(InsertReq.builder()
    +        .collectionName("demo")
    +        .data(rows)
    +        .build());
    +
    +
    await client.insert({
    +collection_name: 'demo', 
    +data: [
    +    {'text': 'information retrieval is a field of study.'},
    +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
    +    {'text': 'data mining and information retrieval overlap in research.'},
    +]);
    +
    +
    curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "data": [
    +        {"text": "information retrieval is a field of study."},
    +        {"text": "information retrieval focuses on finding relevant information in large datasets."},
    +        {"text": "data mining and information retrieval overlap in research."}       
    +    ],
    +    "collectionName": "demo"
    +}'
     

    Una vez que haya insertado datos en su colección, puede realizar búsquedas de texto completo utilizando consultas de texto sin procesar. Milvus convierte automáticamente su consulta en un vector disperso y clasifica los resultados de búsqueda coincidentes utilizando el algoritmo BM25, y luego devuelve los resultados topK (limit).

    +
    search_params = {​
         'params': {'drop_ratio_search': 0.6},​
     }​
     ​
     MilvusClient.search(​
         collection_name='demo', ​
    -    data=['Who started AI research?'],​
    +    data=['whats the focus of information retrieval?'],​
         anns_field='sparse',​
         limit=3,​
         search_params=search_params​
     )​
     
    +
    +
    import io.milvus.v2.service.vector.request.SearchReq;
    +import io.milvus.v2.service.vector.request.data.EmbeddedText;
    +import io.milvus.v2.service.vector.response.SearchResp;
    +
    +Map<String,Object> searchParams = new HashMap<>();
    +searchParams.put("drop_ratio_search", 0.6);
    +SearchResp searchResp = client.search(SearchReq.builder()
    +        .collectionName("demo")
    +        .data(Collections.singletonList(new EmbeddedText("whats the focus of information retrieval?")))
    +        .annsField("sparse")
    +        .topK(3)
    +        .searchParams(searchParams)
    +        .outputFields(Collections.singletonList("text"))
    +        .build());
    +
    +
    await client.search(
    +    collection_name: 'demo', 
    +    data: ['whats the focus of information retrieval?'],
    +    anns_field: 'sparse',
    +    limit: 3,
    +    params: {'drop_ratio_search': 0.6},
    +)
    +
    +
    curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +--data-raw '{
    +    "collectionName": "demo",
    +    "data": [
    +        "whats the focus of information retrieval?"
    +    ],
    +    "annsField": "sparse",
    +    "limit": 3,
    +    "outputFields": [
    +        "text"
    +    ],
    +    "searchParams":{
    +        "params":{
    +            "drop_ratio_search":0.6
    +        }
    +    }
    +}'
     

    Parámetro

    Descripción

    search_params

    -

    Un diccionario que contiene los parámetros de búsqueda.

    +

    Diccionario que contiene los parámetros de búsqueda.

    params.drop_ratio_search

    -

    Proporción de términos de baja frecuencia que deben ignorarse durante la búsqueda. Para más detalles, consulte Vector disperso.

    +

    Proporción de términos de baja frecuencia que se ignoran durante la búsqueda. Para más detalles, consulte Vector disperso.

    data

    El texto en bruto de la consulta.

    anns_field

    diff --git a/localization/v2.5.x/site/es/userGuide/search-query-get/keyword-match.json b/localization/v2.5.x/site/es/userGuide/search-query-get/keyword-match.json index 84a506baa..32c609919 100644 --- a/localization/v2.5.x/site/es/userGuide/search-query-get/keyword-match.json +++ b/localization/v2.5.x/site/es/userGuide/search-query-get/keyword-match.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n"],"headingContent":"Keyword Match​","anchorList":[{"label":"Concordancia de palabras clave","href":"Keyword-Match​","type":1,"isActive":false},{"label":"Visión general","href":"Overview","type":2,"isActive":false},{"label":"Activar la concordancia de palabras clave","href":"Enable-keyword-match","type":2,"isActive":false},{"label":"Utilizar la concordancia de palabras clave","href":"Use-keyword-match","type":2,"isActive":false},{"label":"Consideraciones","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .enableDynamicField(false)\n .build();\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true,\n \"enable_match\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","Map analyzerParams = new HashMap<>();\nanalyzerParams.put(\"type\", \"english\");\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(200)\n .enableAnalyzer(true)\n .analyzerParams(analyzerParams)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n analyzer_params: { type: 'english' },\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 200,\n \"enable_analyzer\": true,\n \"enable_match\": true,\n \"analyzer_params\": {\"type\": \"english\"}\n }\n },\n {\n \"fieldName\": \"my_vector\",\n \"dataType\": \"FloatVector\",\n \"elementTypeParams\": {\n \"dim\": \"5\"\n }\n }\n ]\n }'\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine deep')\";\n","export filter=\"\\\"TEXT_MATCH(text, 'machine deep')\\\"\"\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"\n","export filter=\"\\\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\\\"\"\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .annsField(\"embeddings\")\n .data(Collections.singletonList(queryVector)))\n .filter(filter)\n .topK(10)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build());\n","// Match entities with `keyword1` or `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\n// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field\nconst result = await client.search(\n collection_name: \"YOUR_COLLECTION_NAME\", // Your collection name\n anns_field: \"embeddings\", // Vector field name\n data: [query_vector], // Query vector\n filter: filter,\n params: {\"nprobe\": 10},\n limit: 10, // Max. number of results to return\n output_fields: [\"id\", \"text\"] //Fields to return\n);\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1 keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"annsField\": \"my_vector\",\n \"data\": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],\n \"filter\": '\"$filter\"',\n \"searchParams\": {\n \"params\": {\n \"nprobe\": 10\n }\n },\n \"limit\": 3,\n \"outputFields\": [\"text\",\"id\"]\n}'\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nQueryResp queryResp = client.query(QueryReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .filter(filter)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build()\n);\n","// Match entities with both `keyword1` and `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nconst result = await client.query(\n collection_name: \"YOUR_COLLECTION_NAME\",\n filter: filter, \n output_fields: [\"id\", \"text\"]\n)\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/query\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"filter\": '\"$filter\"',\n \"outputFields\": [\"id\", \"text\"]\n}'\n"],"headingContent":"Text Match​","anchorList":[{"label":"Correspondencia de textos","href":"Text-Match​","type":1,"isActive":false},{"label":"Visión general","href":"Overview","type":2,"isActive":false},{"label":"Activar la concordancia de texto","href":"Enable-text-match","type":2,"isActive":false},{"label":"Utilizar la concordancia de texto","href":"Use-text-match","type":2,"isActive":false},{"label":"Consideraciones","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/es/userGuide/search-query-get/keyword-match.md b/localization/v2.5.x/site/es/userGuide/search-query-get/keyword-match.md index ace021f9e..62cce45dd 100644 --- a/localization/v2.5.x/site/es/userGuide/search-query-get/keyword-match.md +++ b/localization/v2.5.x/site/es/userGuide/search-query-get/keyword-match.md @@ -1,15 +1,15 @@ --- id: keyword-match.md summary: >- - La concordancia de palabras clave en Milvus permite una recuperación precisa - de documentos basada en términos específicos. Esta función se utiliza + La concordancia de texto en Milvus permite una recuperación precisa de + documentos basada en términos específicos. Esta función se utiliza principalmente para la búsqueda filtrada para satisfacer condiciones específicas y puede incorporar el filtrado escalar para refinar los resultados de la consulta, lo que permite búsquedas de similitud dentro de vectores que cumplen criterios escalares. -title: Concordancia de palabras clave +title: Correspondencia de textos --- -

    Concordancia de palabras clave

    La concordancia de palabras clave en Milvus permite una recuperación precisa de documentos basada en términos específicos. Esta función se utiliza principalmente para la búsqueda filtrada para satisfacer condiciones específicas y puede incorporar el filtrado escalar para refinar los resultados de la consulta, permitiendo búsquedas de similitud dentro de vectores que cumplen criterios escalares.

    +

    La concordancia de texto en Milvus permite una recuperación precisa de documentos basada en términos específicos. Esta función se utiliza principalmente para la búsqueda filtrada para satisfacer condiciones específicas y puede incorporar el filtrado escalar para refinar los resultados de la consulta, permitiendo búsquedas de similitud dentro de vectores que cumplen criterios escalares.

    -

    La concordancia de palabras clave se centra en la búsqueda de apariciones exactas de los términos de la consulta, sin puntuar la relevancia de los documentos coincidentes. Si desea recuperar los documentos más relevantes basándose en el significado semántico y la importancia de los términos de la consulta, le recomendamos que utilice la búsqueda de texto completo.

    +

    La concordancia de texto se centra en la búsqueda de apariciones exactas de los términos de la consulta, sin puntuar la relevancia de los documentos coincidentes. Si desea recuperar los documentos más relevantes basándose en el significado semántico y la importancia de los términos de la consulta, le recomendamos que utilice la búsqueda de texto completo.

    Visión general

    Milvus integra Tantivy para potenciar su índice invertido subyacente y la búsqueda por palabras clave. Para cada entrada de texto, Milvus lo indexa siguiendo el procedimiento.

    +

    Milvus integra Tantivy para potenciar su índice invertido subyacente y la búsqueda de texto basada en términos. Para cada entrada de texto, Milvus lo indexa siguiendo el procedimiento.

    1. Analizador: El analizador procesa el texto de entrada convirtiéndolo en palabras individuales, o tokens, y aplicando los filtros necesarios. Esto permite a Milvus construir un índice basado en estos tokens.

    2. Indexación: Tras el análisis del texto, Milvus crea un índice invertido que asigna cada token único a los documentos que lo contienen.

    -

    Cuando un usuario realiza una búsqueda de palabras clave, el índice invertido se utiliza para recuperar rápidamente todos los documentos que las contienen. Esto es mucho más rápido que escanear cada documento individualmente.

    +

    Cuando un usuario realiza una coincidencia de texto, el índice invertido se utiliza para recuperar rápidamente todos los documentos que contienen los términos. Esto es mucho más rápido que escanear cada documento individualmente.

    - Keyword Match - Concordancia de palabras clave

    -

    Activar la concordancia de palabras clave

    La concordancia de palabras clave funciona en el tipo de campo VARCHAR, que es esencialmente el tipo de datos de cadena en Milvus. Para activar la concordancia de palabras clave, configure tanto enable_analyzer como enable_match en True y, a continuación, configure opcionalmente un analizador para el análisis de texto al definir el esquema de su colección.

    -

    Establezca enable_analyzer y enable_match

    Para habilitar la concordancia de palabras clave para un campo VARCHAR específico, establezca los parámetros enable_analyzer y enable_match en True al definir el esquema del campo. Esto indica a Milvus que tokenice el texto y cree un índice invertido para el campo especificado, lo que permite una comparación rápida y eficaz de las palabras clave.

    +

    La concordancia de texto funciona en el tipo de campo VARCHAR, que es esencialmente el tipo de datos de cadena en Milvus. Para habilitar la coincidencia de texto, configure tanto enable_analyzer como enable_match en True y luego, opcionalmente, configure un analizador para el análisis de texto cuando defina el esquema de su colección.

    +

    Establezca enable_analyzer y enable_match

    Para habilitar la coincidencia de texto para un campo VARCHAR específico, establezca los parámetros enable_analyzer y enable_match en True al definir el esquema del campo. Esto indica a Milvus que tokenice el texto y cree un índice invertido para el campo especificado, lo que permite coincidencias de texto rápidas y eficientes.

    +
    from pymilvus import MilvusClient, DataType​
     ​
     schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​
    @@ -83,9 +85,74 @@ schema.add_field(​
     )​
     
     
    -

    Opcional: Configurar un analizador

    El rendimiento y la precisión de la concordancia de palabras clave dependen del analizador seleccionado. Los diferentes analizadores se adaptan a varios idiomas y estructuras de texto, por lo que elegir el adecuado puede afectar significativamente a los resultados de búsqueda para su caso de uso específico.

    +
    import io.milvus.v2.common.DataType;
    +import io.milvus.v2.service.collection.request.AddFieldReq;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
    +        .enableDynamicField(false)
    +        .build();
    +
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("text")
    +        .dataType(DataType.VarChar)
    +        .maxLength(1000)
    +        .enableAnalyzer(true)
    +        .enableMatch(true)
    +        .build());
    +
    +
    +
    const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true,
    +                    "enable_match": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ]
    +    }'
    +
    +
    +

    Opcional: Configurar un analizador

    El rendimiento y la precisión de las comparaciones de texto dependen del analizador seleccionado. Los diferentes analizadores se adaptan a varios lenguajes y estructuras de texto, por lo que elegir el correcto puede tener un impacto significativo en los resultados de búsqueda para su caso de uso específico.

    Por defecto, Milvus utiliza el analizador standard, que tokeniza el texto basándose en los espacios en blanco y la puntuación, elimina los tokens de más de 40 caracteres y convierte el texto a minúsculas. No se necesitan parámetros adicionales para aplicar esta configuración por defecto. Para más información, consulte Estándar.

    En los casos en que se requiera un analizador diferente, puede configurarlo utilizando el parámetro analyzer_params. Por ejemplo, para aplicar el analizador english para procesar texto en inglés.

    +
    analyzer_params={​
         "type": "english"​
     }​
    @@ -100,8 +167,71 @@ schema.add_field(​
     )​
     
     
    -

    Milvus también proporciona otros analizadores adaptados a diferentes idiomas y escenarios. Para más detalles, consulte Visión general.

    -

    Utilizar la concordancia de palabras clave +
    const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +    analyzer_params: { type: 'english' },
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 200,
    +                    "enable_analyzer": true,
    +                    "enable_match": true,
    +                    "analyzer_params": {"type": "english"}
    +                }
    +            },
    +            {
    +                "fieldName": "my_vector",
    +                "dataType": "FloatVector",
    +                "elementTypeParams": {
    +                    "dim": "5"
    +                }
    +            }
    +        ]
    +    }'
    +
    +
    +

    Milvus también proporciona otros analizadores adecuados para diferentes lenguajes y escenarios. Para más detalles, consulte Visión general.

    +

    Utilizar la concordancia de texto

    Una vez que haya activado la concordancia de palabras clave para un campo VARCHAR en el esquema de su colección, puede realizar concordancias de palabras clave utilizando la expresión TEXT_MATCH.

    -

    Sintaxis de la expresión TEXT_MATCH

    La expresión TEXT_MATCH se utiliza para especificar el campo y las palabras clave que se van a buscar. Su sintaxis es la siguiente

    -
    TEXT_MATCH(field_name, text)​
    +    

    Una vez que haya habilitado la coincidencia de texto para un campo VARCHAR en el esquema de su colección, puede realizar coincidencias de texto utilizando la expresión TEXT_MATCH.

    +

    Sintaxis de la expresión TEXT_MATCH

    La expresión TEXT_MATCH se utiliza para especificar el campo y los términos que se van a buscar. Su sintaxis es la siguiente

    +
    TEXT_MATCH(field_name, text)​
     
     
      -
    • field_name: El nombre del campo VARCHAR que se va a buscar.

    • -
    • text: Las palabras clave a buscar. Las palabras clave múltiples pueden separarse por espacios u otros delimitadores apropiados según el idioma y el analizador configurado.

    • +
    • field_name: El nombre del campo VARCHAR a buscar.

    • +
    • text: Los términos a buscar. Los términos múltiples pueden separarse por espacios u otros delimitadores apropiados según el idioma y el analizador configurado.

    -

    Por defecto, TEXT_MATCH utiliza la lógica de búsqueda OR, lo que significa que devolverá los documentos que contengan cualquiera de las palabras clave especificadas. Por ejemplo, para buscar documentos que contengan las palabras clave machine o deep en el campo text, utilice la siguiente expresión.

    +

    Por defecto, TEXT_MATCH utiliza la lógica de búsqueda OR, lo que significa que devolverá los documentos que contengan cualquiera de los términos especificados. Por ejemplo, para buscar documentos que contengan el término machine o deep en el campo text, utilice la siguiente expresión.

    +
    filter = "TEXT_MATCH(text, 'machine deep')"​
    -
    +
    +
    String filter = "TEXT_MATCH(text, 'machine deep')";
    +
    +
    const filter = "TEXT_MATCH(text, 'machine deep')";
    +
    +
    export filter="\"TEXT_MATCH(text, 'machine deep')\""
     

    También puede combinar varias expresiones TEXT_MATCH utilizando operadores lógicos para realizar una búsqueda AND. Por ejemplo, para buscar documentos que contengan machine y deep en el campo text, utilice la siguiente expresión.

    +
    filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"​
    -
     
    -

    Búsqueda con coincidencia de palabra clave

    La concordancia de palabras clave puede utilizarse en combinación con la búsqueda de similitud vectorial para restringir el alcance de la búsqueda y mejorar el rendimiento de la misma. Al filtrar la colección mediante la concordancia de palabras clave antes de la búsqueda por similitud vectorial, puede reducir el número de documentos en los que es necesario buscar, lo que se traduce en tiempos de consulta más rápidos.

    -

    En este ejemplo, la expresión filter filtra los resultados de la búsqueda para incluir sólo los documentos que coinciden con las palabras clave especificadas keyword1 o keyword2. A continuación, la búsqueda por similitud vectorial se realiza en este subconjunto filtrado de documentos.

    +
    String filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')";
    +
    +
    const filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"
    +
    +
    export filter="\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\""
    +
    +

    Búsqueda con coincidencia de texto

    La concordancia de texto puede utilizarse en combinación con la búsqueda por similitud vectorial para acotar el ámbito de búsqueda y mejorar el rendimiento de la misma. Al filtrar la colección utilizando la concordancia de texto antes de la búsqueda por similitud vectorial, puede reducir el número de documentos en los que es necesario buscar, lo que se traduce en tiempos de consulta más rápidos.

    +

    En este ejemplo, la expresión filter filtra los resultados de la búsqueda para incluir únicamente los documentos que coinciden con el término especificado keyword1 o keyword2. A continuación, se realiza la búsqueda de similitud vectorial en este subconjunto filtrado de documentos.

    +
    # Match entities with `keyword1` or `keyword2`​
     filter = "TEXT_MATCH(text, 'keyword1 keyword2')"​
     ​
    @@ -150,8 +296,58 @@ result = MilvusClient.search(​
     )​
     
     
    -

    Consulta con concordancia de palabras clave

    La concordancia de palabras clave también puede utilizarse para el filtrado escalar en las operaciones de consulta. Especificando una expresión TEXT_MATCH en el parámetro expr del método query(), puede recuperar documentos que coincidan con las palabras clave dadas.

    -

    El ejemplo siguiente recupera documentos cuyo campo text contiene las palabras clave keyword1 y keyword2.

    +
    String filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
    +
    +SearchResp searchResp = client.search(SearchReq.builder()
    +        .collectionName("YOUR_COLLECTION_NAME")
    +        .annsField("embeddings")
    +        .data(Collections.singletonList(queryVector)))
    +        .filter(filter)
    +        .topK(10)
    +        .outputFields(Arrays.asList("id", "text"))
    +        .build());
    +
    +
    // Match entities with `keyword1` or `keyword2`
    +const filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
    +
    +// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field
    +const result = await client.search(
    +    collection_name: "YOUR_COLLECTION_NAME", // Your collection name
    +    anns_field: "embeddings", // Vector field name
    +    data: [query_vector], // Query vector
    +    filter: filter,
    +    params: {"nprobe": 10},
    +    limit: 10, // Max. number of results to return
    +    output_fields: ["id", "text"] //Fields to return
    +);
    +
    +
    export filter="\"TEXT_MATCH(text, 'keyword1 keyword2')\""
    +
    +export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "collectionName": "demo2",
    +    "annsField": "my_vector",
    +    "data": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],
    +    "filter": '"$filter"',
    +    "searchParams": {
    +        "params": {
    +            "nprobe": 10
    +        }
    +    },
    +    "limit": 3,
    +    "outputFields": ["text","id"]
    +}'
    +
    +

    Consulta con concordancia de texto

    La concordancia de texto también puede utilizarse para el filtrado escalar en operaciones de consulta. Especificando una expresión TEXT_MATCH en el parámetro expr del método query(), puede recuperar documentos que coincidan con los términos dados.

    +

    El siguiente ejemplo recupera documentos en los que el campo text contiene los términos keyword1 y keyword2.

    +
    # Match entities with both `keyword1` and `keyword2`​
     filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')"​
     ​
    @@ -161,6 +357,39 @@ result = MilvusClient.query(​
         output_fields=["id", "text"]​
     )​
     
    +
    +
    String filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
    +
    +QueryResp queryResp = client.query(QueryReq.builder()
    +        .collectionName("YOUR_COLLECTION_NAME")
    +        .filter(filter)
    +        .outputFields(Arrays.asList("id", "text"))
    +        .build()
    +);
    +
    +
    // Match entities with both `keyword1` and `keyword2`
    +const filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
    +
    +const result = await client.query(
    +    collection_name: "YOUR_COLLECTION_NAME",
    +    filter: filter, 
    +    output_fields: ["id", "text"]
    +)
    +
    +
    export filter="\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\""
    +
    +export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "collectionName": "demo2",
    +    "filter": '"$filter"',
    +    "outputFields": ["id", "text"]
    +}'
     

    Consideraciones

      -
    • La activación de la concordancia de palabras clave para un campo desencadena la creación de un índice invertido, que consume recursos de almacenamiento. Tenga en cuenta el impacto en el almacenamiento cuando decida activar esta función, ya que varía en función del tamaño del texto, los tokens únicos y el analizador utilizado.

    • +
    • La activación de la concordancia de texto para un campo desencadena la creación de un índice invertido, que consume recursos de almacenamiento. Tenga en cuenta el impacto en el almacenamiento cuando decida activar esta función, ya que varía en función del tamaño del texto, los tokens únicos y el analizador utilizado.

    • Una vez que haya definido un analizador en su esquema, su configuración será permanente para esa colección. Si decide que un analizador diferente se adaptaría mejor a sus necesidades, puede considerar eliminar la colección existente y crear una nueva con la configuración de analizador deseada.

    diff --git a/localization/v2.5.x/site/es/userGuide/search-query-get/multi-vector-search.md b/localization/v2.5.x/site/es/userGuide/search-query-get/multi-vector-search.md index ac4b238cc..9f0c8df3f 100644 --- a/localization/v2.5.x/site/es/userGuide/search-query-get/multi-vector-search.md +++ b/localization/v2.5.x/site/es/userGuide/search-query-get/multi-vector-search.md @@ -41,8 +41,8 @@ title: Búsqueda híbrida

    La búsqueda híbrida es adecuada para los dos escenarios siguientes.

    Búsqueda de vectores dispersos-densos

    Diferentes tipos de vectores pueden representar información diferente, y el uso de varios modelos de incrustación puede representar de forma más exhaustiva diferentes características y aspectos de los datos. Por ejemplo, el uso de distintos modelos de incrustación para la misma frase puede generar un vector denso que represente el significado semántico y un vector disperso que represente la frecuencia de palabras en la frase.

      -
    • Vectores dispersos: Los vectores dispersos se caracterizan por su elevada dimensionalidad y la presencia de pocos valores distintos de cero. Esta estructura los hace especialmente adecuados para las aplicaciones tradicionales de recuperación de información. En la mayoría de los casos, el número de dimensiones utilizadas en los vectores dispersos corresponde a diferentes tokens de una o varias lenguas. A cada dimensión se le asigna un valor que indica la importancia relativa de ese token dentro del documento. Esta disposición resulta ventajosa para tareas que implican la concordancia de palabras clave.

    • -
    • Vectores densos: Los vectores densos son incrustaciones derivadas de las redes neuronales. Cuando se disponen en una matriz ordenada, estos vectores capturan la esencia semántica del texto de entrada. Tenga en cuenta que los vectores densos no se limitan al tratamiento de textos, sino que también se utilizan mucho en visión por ordenador para representar la semántica de los datos visuales. Estos vectores densos, normalmente generados por modelos de incrustación de texto, se caracterizan porque la mayoría o todos los elementos son distintos de cero. Así, los vectores densos son especialmente eficaces para las aplicaciones de búsqueda semántica, ya que pueden devolver los resultados más parecidos basándose en la distancia vectorial incluso en ausencia de coincidencias exactas de palabras clave. Esta capacidad permite obtener resultados de búsqueda más matizados y sensibles al contexto, que a menudo captan relaciones entre conceptos que podrían pasar desapercibidas con enfoques basados en palabras clave.

    • +
    • Vectores dispersos: Los vectores dispersos se caracterizan por su elevada dimensionalidad y la presencia de pocos valores distintos de cero. Esta estructura los hace especialmente adecuados para las aplicaciones tradicionales de recuperación de información. En la mayoría de los casos, el número de dimensiones utilizadas en los vectores dispersos corresponde a diferentes tokens de una o varias lenguas. A cada dimensión se le asigna un valor que indica la importancia relativa de ese token dentro del documento. Esta disposición resulta ventajosa para las tareas de comparación de textos.

    • +
    • Vectores densos: Los vectores densos son incrustaciones derivadas de redes neuronales. Cuando se disponen en una matriz ordenada, estos vectores capturan la esencia semántica del texto de entrada. Los vectores densos no se limitan al tratamiento de textos, sino que también se utilizan mucho en visión por ordenador para representar la semántica de los datos visuales. Estos vectores densos, normalmente generados por modelos de incrustación de texto, se caracterizan porque la mayoría o todos los elementos son distintos de cero. Así, los vectores densos son especialmente eficaces para las aplicaciones de búsqueda semántica, ya que pueden devolver los resultados más similares basándose en la distancia vectorial, incluso en ausencia de coincidencias textuales exactas. Esta capacidad permite obtener resultados de búsqueda más matizados y sensibles al contexto, a menudo captando relaciones entre conceptos que podrían pasar desapercibidas con enfoques basados en palabras clave.

    Para más información, consulte Vector disperso y Vector denso.

    Búsqueda multimodal

    La búsqueda multimodal se refiere a la búsqueda por similitud de datos no estructurados en múltiples modalidades (como imágenes, vídeos, audio, texto, etc.). Por ejemplo, una persona puede representarse utilizando varias modalidades de datos, como huellas dactilares, huellas vocales y rasgos faciales. La búsqueda híbrida permite realizar varias búsquedas simultáneamente. Por ejemplo, la búsqueda de una persona con huellas dactilares y vocales similares.

    @@ -606,7 +606,7 @@ ranker = RRFRanker(
    -

    Realizar una búsqueda híbrida

    Antes de realizar una búsqueda híbrida, es necesario cargar la colección en memoria. Si algún campo vectorial de la colección no tiene un índice o no está cargado, se producirá un error al llamar al método de Búsqueda Híbrida.

    +

    Realizar una búsqueda híbrida

    Antes de realizar una búsqueda híbrida, es necesario cargar la colección en memoria. Si alguno de los campos vectoriales de la colección no tiene un índice o no está cargado, se producirá un error al llamar al método de Búsqueda Híbrida.

    from pymilvus import MilvusClient​
    @@ -676,7 +676,7 @@ res = await client.
    -

    A continuación se muestra la salida.

    +

    A continuación se muestra el resultado.

    ["['id: 844, distance: 0.006047376897186041, entity: {}', 'id: 876, distance: 0.006422005593776703, entity: {}']"]​
     
     
    diff --git a/localization/v2.5.x/site/es/userGuide/search-query-get/single-vector-search.md b/localization/v2.5.x/site/es/userGuide/search-query-get/single-vector-search.md index 5b4fd30e4..5d2f286ed 100644 --- a/localization/v2.5.x/site/es/userGuide/search-query-get/single-vector-search.md +++ b/localization/v2.5.x/site/es/userGuide/search-query-get/single-vector-search.md @@ -65,8 +65,8 @@ title: Búsqueda ANN básica d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

    En las búsquedas RNA, una búsqueda monovectorial se refiere a una búsqueda que implica sólo un vector de consulta. Basándose en el índice pre-construido y en el tipo de métrica incluido en la petición de búsqueda, Milvus encontrará los K vectores más similares al vector de consulta.

    -

    En esta sección, aprenderá a realizar una búsqueda de un solo vector. El fragmento de código supone que ha creado una colección de forma rápida. La petición de búsqueda lleva un único vector de consulta y pide a Milvus que utilice Inner Product (IP) para calcular la similitud entre los vectores de consulta y los vectores de la colección y devuelve los tres más similares.

    +

    En las búsquedas RNA, una búsqueda de un solo vector se refiere a una búsqueda que implica sólo un vector de consulta. Basándose en el índice pre-construido y en el tipo de métrica incluido en la petición de búsqueda, Milvus encontrará los K vectores más similares al vector de consulta.

    +

    En esta sección, aprenderá a realizar una búsqueda de un solo vector. El fragmento de código asume que ha creado una colección de forma rápida. La petición de búsqueda lleva un único vector de consulta y pide a Milvus que utilice Inner Product (IP) para calcular la similitud entre los vectores de consulta y los vectores de la colección y devuelve los tres más similares.

    from pymilvus import MilvusClient​
    @@ -893,7 +893,7 @@ curl --request POST \​
     

    Puede mejorar la relevancia de los resultados de búsqueda restringiendo la distancia o puntuación de las entidades devueltas dentro de un rango específico. En Milvus, una búsqueda por rango implica dibujar dos círculos concéntricos con el vector incrustado más similar al vector de consulta como centro. La petición de búsqueda especifica el radio de ambos círculos, y Milvus devuelve todas las incrustaciones vectoriales que caen dentro del círculo exterior pero no del círculo interior.

    Para más información sobre la búsqueda por rango, consulte Búsqueda por rango.

  • Búsqueda por agrupación

    -

    Si las entidades devueltas tienen el mismo valor en un campo específico, es posible que los resultados de la búsqueda no representen la distribución de todas las incrustaciones vectoriales en el espacio vectorial. Para diversificar los resultados de la búsqueda, considere la posibilidad de utilizar la búsqueda de agrupación.

    +

    Si las entidades devueltas tienen el mismo valor en un campo específico, los resultados de la búsqueda pueden no representar la distribución de todas las incrustaciones vectoriales en el espacio vectorial. Para diversificar los resultados de la búsqueda, considere la posibilidad de utilizar la búsqueda de agrupación.

    Para más información sobre la búsqueda agrupada, consulte Búsqueda agrupada.

  • Búsqueda híbrida

    Una colección puede incluir hasta cuatro campos vectoriales para guardar las incrustaciones vectoriales generadas utilizando diferentes modelos de incrustación. De este modo, puede utilizar una búsqueda híbrida para volver a clasificar los resultados de búsqueda de estos campos vectoriales, mejorando el índice de recuperación.

    @@ -904,9 +904,9 @@ curl --request POST \​
  • Búsqueda de texto completo

    La búsqueda de texto completo es una función que recupera documentos que contienen términos o frases específicos en conjuntos de datos de texto y, a continuación, clasifica los resultados en función de su relevancia. Esta función supera las limitaciones de la búsqueda semántica, que puede pasar por alto términos precisos, garantizando que usted reciba los resultados más precisos y contextualmente relevantes. Además, simplifica las búsquedas vectoriales al aceptar la entrada de texto sin formato, convirtiendo automáticamente sus datos de texto en incrustaciones dispersas sin necesidad de generar manualmente incrustaciones vectoriales.

    Para obtener más información sobre la búsqueda de texto completo, consulte Búsqueda de texto completo.

  • -
  • Concordancia de palabras clave

    -

    La concordancia de palabras clave en Milvus permite una recuperación precisa de documentos basada en términos específicos. Esta función se utiliza principalmente para la búsqueda filtrada para satisfacer condiciones específicas y puede incorporar el filtrado escalar para refinar los resultados de la consulta, permitiendo búsquedas de similitud dentro de los vectores que cumplen los criterios escalares.

    -

    Para obtener más información sobre la concordancia de palabras clave, consulte Concordancia de palabras clave.

  • +
  • Correspondencia de textos

    +

    La concordancia de texto en Milvus permite una recuperación precisa de documentos basada en términos específicos. Esta función se utiliza principalmente para la búsqueda filtrada para satisfacer condiciones específicas y puede incorporar el filtrado escalar para refinar los resultados de la consulta, permitiendo búsquedas de similitud dentro de vectores que cumplen criterios escalares.

    +

    Para obtener más información sobre la concordancia de texto, consulte Concordancia de texto.

  • Utilizar clave de partición

    Involucrar varios campos escalares en el filtrado de metadatos y utilizar una condición de filtrado bastante complicada puede afectar a la eficacia de la búsqueda. Una vez establecido un campo escalar como clave de partición y utilizada una condición de filtrado que incluya la clave de partición en la petición de búsqueda, puede ayudar a restringir el ámbito de búsqueda dentro de las particiones correspondientes a los valores de clave de partición especificados.

    Para más detalles sobre la clave de partición, consulte Utilizar clave de partición.

  • diff --git a/localization/v2.5.x/site/fr/adminGuide/upgrade-pulsar-v3.json b/localization/v2.5.x/site/fr/adminGuide/upgrade-pulsar-v3.json index bd4f9edf2..3418f8a96 100644 --- a/localization/v2.5.x/site/fr/adminGuide/upgrade-pulsar-v3.json +++ b/localization/v2.5.x/site/fr/adminGuide/upgrade-pulsar-v3.json @@ -1 +1 @@ -{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n\"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\ncreationTimestamp: \"2024-11-22T08:06:59Z\"​\nfinalizers:​\n- milvus.milvus.io/finalizer​\ngeneration: 3​\nlabels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\ncomponents:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\ndependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\nenabled: false # set to false​\n# you may also clean up rest fields under pulsar field​\n# it's ok to keep them though.​\npulsarv3:​\nenabled: true​\n# append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations: null # this field should be removed or set to null​\nresourceVersion: null # this field should be removed or set to null​\nuid: null # this field should be removed or set to null​\nspec:​\ndependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Mise à jour de Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Feuille de route","href":"Roadmap","type":2,"isActive":false},{"label":"Procédures","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n \"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n ]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\n creationTimestamp: \"2024-11-22T08:06:59Z\"​\n finalizers:​\n - milvus.milvus.io/finalizer​\n generation: 3​\n labels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\n components:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\n dependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\n enabled: false # set to false​\n # you may also clean up rest fields under pulsar field​\n # it's ok to keep them though.​\npulsarv3:​\n enabled: true​\n # append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations: null # this field should be removed or set to null​\n resourceVersion: null # this field should be removed or set to null​\n uid: null # this field should be removed or set to null​\nspec:​\n dependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Mise à jour de Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Feuille de route","href":"Roadmap","type":2,"isActive":false},{"label":"Procédures","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/fr/adminGuide/upgrade-pulsar-v3.md b/localization/v2.5.x/site/fr/adminGuide/upgrade-pulsar-v3.md index 3c7266bbe..7aaac9238 100644 --- a/localization/v2.5.x/site/fr/adminGuide/upgrade-pulsar-v3.md +++ b/localization/v2.5.x/site/fr/adminGuide/upgrade-pulsar-v3.md @@ -113,11 +113,11 @@ Forwarding from 127.

    Résultat.

    {​
    -"segmentIDs": [​
    +  "segmentIDs": [​
         454097953998181000,​
         454097953999383600,​
         454097953998180800​
    -]​
    +  ]​
     }​
     
     
    @@ -223,15 +223,15 @@ head milvus.yaml -n 20
    apiVersion: milvus.io/v1beta1​
     kind: Milvus​
     metadata:​
    -annotations:​
    +  annotations:​
         milvus.io/dependency-values-merged: "true"​
         milvus.io/pod-service-label-added: "true"​
         milvus.io/querynode-current-group-id: "0"​
    -creationTimestamp: "2024-11-22T08:06:59Z"​
    -finalizers:​
    -- milvus.milvus.io/finalizer​
    -generation: 3​
    -labels:​
    +  creationTimestamp: "2024-11-22T08:06:59Z"​
    +  finalizers:​
    +  - milvus.milvus.io/finalizer​
    +  generation: 3​
    +  labels:​
         app: milvus​
         milvus.io/operator-version: 1.1.2​
     name: my-release​
    @@ -239,23 +239,23 @@ namespace: default​
     resourceVersion: "692217324"​
     uid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​
     spec:​
    -components:​
    +  components:​
     
     
  • Créer un fichier patch.yaml avec le contenu suivant.

    # a patch to retain etcd & storage data and delete pulsar data while delete milvus​
     spec:​
    -dependencies:​
    +  dependencies:​
         etcd:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         storage:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         pulsar:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Delete​
             pvcDeletion: true​
     
    @@ -274,7 +274,7 @@ kubectl -n default get milvus my-release​
     kubectl -n default delete milvus my-release --wait=true
    -

    Résultat : Notez que cela peut prendre quelques minutes pour que milvus s'arrête de manière gracieuse et que l'opérateur supprime les volumes de pulsar.

    +

    Résultat : Notez que cela peut prendre quelques minutes pour que Milvus s'arrête de manière gracieuse et que l'opérateur supprime les volumes de pulsar.

    milvus.milvus.io "my-release" deleted​
     NAME         MODE      STATUS     UPDATED   AGE​
     my-release   cluster   Deleting   True      41m​
    @@ -302,12 +302,12 @@ milvus.milvus.io "my-release" deleted
     
  • Modifier le site values.yaml enregistré à l'étape précédente.

    # change the following:​
     pulsar:​
    -enabled: false # set to false​
    -# you may also clean up rest fields under pulsar field​
    -# it's ok to keep them though.​
    +  enabled: false # set to false​
    +  # you may also clean up rest fields under pulsar field​
    +  # it's ok to keep them though.​
     pulsarv3:​
    -enabled: true​
    -# append other values for pulsar v3 chart if needs​
    +  enabled: true​
    +  # append other values for pulsar v3 chart if needs​
     
     
  • Mettez à jour votre répertoire helm local.

    @@ -368,13 +368,13 @@ my-release-pulsarv3-zookeeper-2 < apiVersion: milvus.io/v1beta1​ kind: Milvus​ metadata:​ -annotations: null # this field should be removed or set to null​ -resourceVersion: null # this field should be removed or set to null​ -uid: null # this field should be removed or set to null​ + annotations: null # this field should be removed or set to null​ + resourceVersion: null # this field should be removed or set to null​ + uid: null # this field should be removed or set to null​ spec:​ -dependencies:​ + dependencies:​ pulsar:​ - inCluster:​ + inCluster:​ chartVersion: pulsar-v3​ # delete all previous values for pulsar v2 and set it to null.​ # you may add additional values here for pulsar v3 if you're sure about it.​ @@ -395,7 +395,7 @@ helm -n milvus-operator upgrade milvus-milvus.milvus.io/my-release created​
  • -
  • Vérifier les pods pour voir si tous sont planifiés et en cours d'exécution avec kubectl -n default get pods.

    +
  • Vérifiez les pods pour voir si tous sont planifiés et en cours d'exécution avec kubectl -n default get pods.

    Cela peut prendre quelques minutes pour que tous les pods démarrent.

    Le résultat est le suivant.

    NAME                                            READY   STATUS      RESTARTS   AGE​
    diff --git a/localization/v2.5.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.5.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.json
    index 07e221f22..446a00bdb 100644
    --- a/localization/v2.5.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.json
    +++ b/localization/v2.5.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.json
    @@ -1 +1 @@
    -{"codeList":["$ kubectl get sc\n\nNAME                  PROVISIONER                  RECLAIMPOLICY    VOLUMEBIINDINGMODE    ALLOWVOLUMEEXPANSION     AGE\nstandard (default)    k8s.io/minikube-hostpath     Delete           Immediate             false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME                                             READY  STATUS   RESTARTS  AGE\nmy-release-etcd-0                                1/1    Running   0        3m23s\nmy-release-etcd-1                                1/1    Running   0        3m23s\nmy-release-etcd-2                                1/1    Running   0        3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm      1/1    Running   0        3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg     1/1    Running   0        3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj      1/1    Running   0        3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv          1/1    Running   0        3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg     1/1    Running   0        3m24s\nmy-release-minio-0                               1/1    Running   0        3m23s\nmy-release-minio-1                               1/1    Running   0        3m23s\nmy-release-minio-2                               1/1    Running   0        3m23s\nmy-release-minio-3                               1/1    Running   0        3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc  1/1    Running   0        3m24s\nmy-release-pulsar-bookkeeper-0                   1/1    Running   0        3m23s\nmy-release-pulsar-bookkeeper-1                   1/1    Running   0        98s\nmy-release-pulsar-broker-556ff89d4c-2m29m        1/1    Running   0        3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v         1/1    Running   0        3m23s\nmy-release-pulsar-zookeeper-0                    1/1    Running   0        3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr       0/1   Completed  0        3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Exécuter Milvus dans Kubernetes avec Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview","type":2,"isActive":false},{"label":"Conditions préalables","href":"Prerequisites","type":2,"isActive":false},{"label":"Installation de Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Installation en ligne","href":"Online-install","type":2,"isActive":false},{"label":"Installation hors ligne","href":"Offline-install","type":2,"isActive":false},{"label":"Mise à niveau du cluster Milvus en cours d'exécution","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Désinstaller Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Prochaines étapes","href":"Whats-next","type":2,"isActive":false}]}
    \ No newline at end of file
    +{"codeList":["$ kubectl get sc\n\nNAME                  PROVISIONER                  RECLAIMPOLICY    VOLUMEBIINDINGMODE    ALLOWVOLUMEEXPANSION     AGE\nstandard (default)    k8s.io/minikube-hostpath     Delete           Immediate             false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME                                             READY  STATUS   RESTARTS  AGE\nmy-release-etcd-0                                1/1    Running   0        3m23s\nmy-release-etcd-1                                1/1    Running   0        3m23s\nmy-release-etcd-2                                1/1    Running   0        3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm      1/1    Running   0        3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg     1/1    Running   0        3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj      1/1    Running   0        3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv          1/1    Running   0        3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg     1/1    Running   0        3m24s\nmy-release-minio-0                               1/1    Running   0        3m23s\nmy-release-minio-1                               1/1    Running   0        3m23s\nmy-release-minio-2                               1/1    Running   0        3m23s\nmy-release-minio-3                               1/1    Running   0        3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc  1/1    Running   0        3m24s\nmy-release-pulsar-bookkeeper-0                   1/1    Running   0        3m23s\nmy-release-pulsar-bookkeeper-1                   1/1    Running   0        98s\nmy-release-pulsar-broker-556ff89d4c-2m29m        1/1    Running   0        3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v         1/1    Running   0        3m23s\nmy-release-pulsar-zookeeper-0                    1/1    Running   0        3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr       0/1   Completed  0        3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Exécuter Milvus dans Kubernetes avec Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview","type":2,"isActive":false},{"label":"Conditions préalables","href":"Prerequisites","type":2,"isActive":false},{"label":"Installation de Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Installation en ligne","href":"Online-install","type":2,"isActive":false},{"label":"Installation hors ligne","href":"Offline-install","type":2,"isActive":false},{"label":"Mise à niveau du cluster Milvus en cours d'exécution","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Désinstaller Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Prochaines étapes","href":"Whats-next","type":2,"isActive":false}]}
    \ No newline at end of file
    diff --git a/localization/v2.5.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.5.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.md
    index 7f303e44b..d2944cee1 100644
    --- a/localization/v2.5.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.md
    +++ b/localization/v2.5.x/site/fr/getstarted/run-milvus-k8s/install_cluster-helm.md
    @@ -83,11 +83,11 @@ NAME                  PROVISIONER                  RECLAIMPOLICY    VOLUMEBIINDI
             >
           
         

    Avant d'installer Milvus Helm Charts, vous devez ajouter le référentiel Milvus Helm.

    -
    $ helm repo add milvus https://github.com/zilliztech/milvus-helm
    +
    $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
     

    Le dépôt Milvus Helm Charts à l'adresse https://github.com/milvus-io/milvus-helm a été archivé et vous pouvez obtenir d'autres mises à jour à l'adresse https://github.com/zilliztech/milvus-helm comme suit :

    -
    helm repo add zilliztech https://github.com/zilliztech/milvus-helm
    +
    helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
     helm repo update
     # upgrade existing helm release
     helm upgrade my-release zilliztech/milvus
    diff --git a/localization/v2.5.x/site/fr/home/home.md b/localization/v2.5.x/site/fr/home/home.md
    index 8bd5fe13b..3023b74bb 100644
    --- a/localization/v2.5.x/site/fr/home/home.md
    +++ b/localization/v2.5.x/site/fr/home/home.md
    @@ -114,7 +114,7 @@ Vous découvrirez ici ce qu'est Milvus et comment installer, utiliser et déploy
         

    Nov 2024 - Sortie de Milvus 2.5.0

    • Ajout d'indications sur la manière d'effectuer une recherche en texte intégral.
    • -
    • Ajout d'indications sur la façon d'effectuer une recherche par mot-clé.
    • +
    • Ajout d'indications sur la manière d'effectuer une correspondance de texte.
    • Ajout d'indications sur la manière d'activer les valeurs nullables et les valeurs par défaut.
    • Ajout de descriptions des analyseurs.
    • Ajout de descriptions des index bitmap.
    • diff --git a/localization/v2.5.x/site/fr/menuStructure/fr.json b/localization/v2.5.x/site/fr/menuStructure/fr.json index 766ca7af8..a9a9cc0c4 100644 --- a/localization/v2.5.x/site/fr/menuStructure/fr.json +++ b/localization/v2.5.x/site/fr/menuStructure/fr.json @@ -286,6 +286,18 @@ "order": 8, "children": [] }, + { + "label": "Types métriques", + "id": "metric.md", + "order": 9, + "children": [] + }, + { + "label": "Niveau de cohérence", + "id": "consistency.md", + "order": 10, + "children": [] + }, { "label": "Réplique en mémoire", "id": "replica.md", @@ -602,31 +614,39 @@ ] }, { - "label": "Gérer les index", + "label": "Index", "id": "manage_indexes", "order": 4, "isMenu": true, "children": [ { - "label": "Champs vectoriels d'index", + "label": "Indices vectoriels", "id": "index-vector-fields.md", "order": 0, "children": [] }, { - "label": "Champs scalaires indexés", - "id": "index-scalar-fields.md", + "label": "Index scalaires", + "id": "scalar-index", "order": 1, - "children": [] - }, - { - "label": "Index BITMAP", - "id": "bitmap.md", - "order": 2, - "children": [] + "isMenu": true, + "children": [ + { + "label": "Champs scalaires indexés", + "id": "index-scalar-fields.md", + "order": 1, + "children": [] + }, + { + "label": "Index Bitmap", + "id": "bitmap.md", + "order": 2, + "children": [] + } + ] }, { - "label": "Index avec GPU", + "label": "Index compatibles avec le GPU", "id": "index-with-gpu.md", "order": 3, "children": [] @@ -682,7 +702,7 @@ "children": [] }, { - "label": "Correspondance des mots-clés", + "label": "Correspondance des textes", "id": "keyword-match.md", "order": 7, "children": [] @@ -699,30 +719,6 @@ "order": 9, "children": [] }, - { - "label": "Utiliser mmap", - "id": "mmap.md", - "order": 10, - "children": [] - }, - { - "label": "Regroupement Compaction", - "id": "clustering-compaction.md", - "order": 11, - "children": [] - }, - { - "label": "Niveau de cohérence", - "id": "consistency.md", - "order": 12, - "children": [] - }, - { - "label": "Types métriques", - "id": "metric.md", - "order": 13, - "children": [] - }, { "label": "Filtrage des métadonnées", "id": "boolean.md", @@ -736,26 +732,6 @@ "children": [] } ] - }, - { - "label": "Importation de données", - "id": "data_import", - "order": 6, - "isMenu": true, - "children": [ - { - "label": "Préparer les données sources", - "id": "prepare-source-data.md", - "order": 0, - "children": [] - }, - { - "label": "Importer des données", - "id": "import-data.md", - "order": 1, - "children": [] - } - ] } ] }, @@ -897,11 +873,31 @@ } ] }, + { + "label": "Importation de données", + "id": "data_import", + "order": 5, + "isMenu": true, + "children": [ + { + "label": "Préparer les données sources", + "id": "prepare-source-data.md", + "order": 0, + "children": [] + }, + { + "label": "Importer des données", + "id": "import-data.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Milvus Migration", "id": "milvus_migration", "isMenu": true, - "order": 5, + "order": 6, "children": [ { "label": "Vue d'ensemble", @@ -1321,10 +1317,30 @@ } ] }, + { + "label": "Optimisation du stockage", + "id": "storage_optimization", + "order": 10, + "isMenu": true, + "children": [ + { + "label": "Utiliser mmap", + "id": "mmap.md", + "order": 0, + "children": [] + }, + { + "label": "Regroupement Compaction", + "id": "clustering-compaction.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Sécurité", "id": "security", - "order": 10, + "order": 11, "isMenu": true, "children": [ { @@ -1715,7 +1731,7 @@ "children": [] }, { - "label": "PrivéGPT", + "label": "PrivateGPT", "id": "use_milvus_in_private_gpt.md", "order": 8, "children": [] diff --git a/localization/v2.5.x/site/fr/release_notes.md b/localization/v2.5.x/site/fr/release_notes.md index c6b2a9087..2693c2c45 100644 --- a/localization/v2.5.x/site/fr/release_notes.md +++ b/localization/v2.5.x/site/fr/release_notes.md @@ -49,8 +49,8 @@ title: Notes de mise à jour

      Pour plus d'informations, reportez-vous à la section Recherche en texte intégral.

      Interface Web de gestion des clusters (Beta)

      Pour mieux prendre en charge les données massives et les fonctionnalités riches, la conception sophistiquée de Milvus inclut diverses dépendances, de nombreux rôles de nœuds, des structures de données complexes, etc. Ces aspects peuvent poser des problèmes d'utilisation et de maintenance.

      Milvus 2.5 introduit une interface Web intégrée de gestion des clusters, qui réduit les difficultés de maintenance du système en visualisant les informations complexes de l'environnement d'exécution de Milvus. Il s'agit notamment des détails des bases de données et des collections, des segments, des canaux, des dépendances, de l'état de santé des nœuds, des informations sur les tâches, des requêtes lentes, etc.

      -

      Correspondance de texte

      Milvus 2.5 exploite les analyseurs et l'indexation de Tantivy pour le prétraitement du texte et la construction de l'index, prenant en charge la correspondance précise en langage naturel des données textuelles basées sur des termes spécifiques. Cette fonctionnalité est principalement utilisée pour la recherche filtrée afin de satisfaire des conditions spécifiques et peut incorporer le filtrage scalaire pour affiner les résultats des requêtes, permettant des recherches de similarité dans les vecteurs qui répondent aux critères scalaires.

      -

      Pour plus d'informations, reportez-vous à la section Correspondance par mot-clé.

      +

      Correspondance de texte

      Milvus 2.5 exploite les analyseurs et l'indexation de Tantivy pour le prétraitement du texte et la construction de l'index, prenant en charge la correspondance précise en langage naturel des données textuelles basées sur des termes spécifiques. Cette fonction est principalement utilisée pour la recherche filtrée afin de satisfaire des conditions spécifiques et peut incorporer le filtrage scalaire pour affiner les résultats des requêtes, permettant des recherches de similarité dans les vecteurs qui répondent aux critères scalaires.

      +

      Pour plus d'informations, reportez-vous à la section Correspondance de texte.

      Index Bitmap

      Un nouvel index de données scalaires a été ajouté à la famille Milvus. L'index BitMap utilise un tableau de bits, d'une longueur égale au nombre de lignes, pour représenter l'existence de valeurs et accélérer les recherches.

      Les index Bitmap sont traditionnellement efficaces pour les champs à faible cardinalité, qui présentent un nombre modeste de valeurs distinctes - par exemple, une colonne contenant des informations sur le sexe avec seulement deux valeurs possibles : homme et femme.

      Pour plus de détails, voir Index bitmap.

      diff --git a/localization/v2.5.x/site/fr/tutorials/hybrid_search_with_milvus.md b/localization/v2.5.x/site/fr/tutorials/hybrid_search_with_milvus.md index 4fd666790..6417f70a0 100644 --- a/localization/v2.5.x/site/fr/tutorials/hybrid_search_with_milvus.md +++ b/localization/v2.5.x/site/fr/tutorials/hybrid_search_with_milvus.md @@ -25,8 +25,8 @@ title: Recherche hybride avec Milvus

      Milvus prend en charge les méthodes de recherche denses, éparses et hybrides :

      • Recherche dense : Utilise le contexte sémantique pour comprendre le sens des requêtes.
      • -
      • Récupération éparse : Elle met l'accent sur la correspondance des mots-clés pour trouver des résultats basés sur des termes spécifiques, ce qui équivaut à une recherche en texte intégral.
      • -
      • Recherche hybride : Combine les approches denses et éparses, en capturant le contexte complet et les mots-clés spécifiques pour obtenir des résultats de recherche complets.
      • +
      • Récupération éparse : Elle met l'accent sur la correspondance des textes pour trouver des résultats basés sur des termes spécifiques, ce qui équivaut à une recherche en texte intégral.
      • +
      • Recherche hybride : Combine les approches denses et éparses, en capturant le contexte complet et les mots-clés spécifiques pour des résultats de recherche complets.

      En intégrant ces méthodes, la recherche hybride Milvus équilibre les similitudes sémantiques et lexicales, améliorant ainsi la pertinence globale des résultats de la recherche. Ce bloc-notes décrit le processus de mise en place et d'utilisation de ces stratégies de recherche, en soulignant leur efficacité dans divers scénarios de recherche.

      Dépendances et environnement

      $ pip install --upgrade pymilvus "pymilvus[model]"
      diff --git a/localization/v2.5.x/site/fr/userGuide/collections/manage-collections.md b/localization/v2.5.x/site/fr/userGuide/collections/manage-collections.md
      index a3b48f5c3..cec5d3111 100644
      --- a/localization/v2.5.x/site/fr/userGuide/collections/manage-collections.md
      +++ b/localization/v2.5.x/site/fr/userGuide/collections/manage-collections.md
      @@ -161,7 +161,7 @@ title: Collection expliquée
       
    • Itérateur de recherche

    • Requête

    • Recherche plein texte

    • -
    • Correspondance de mots-clés

    • +
    • Correspondance de texte

    En outre, Milvus fournit également des améliorations destinées à accroître les performances et l'efficacité de la recherche. Elles sont désactivées par défaut et vous pouvez les activer et les utiliser en fonction de vos besoins. Ces améliorations sont les suivantes

      diff --git a/localization/v2.5.x/site/fr/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.5.x/site/fr/userGuide/manage-indexes/index-with-gpu.md index 43d502151..7c6d24f1d 100644 --- a/localization/v2.5.x/site/fr/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.5.x/site/fr/userGuide/manage-indexes/index-with-gpu.md @@ -203,9 +203,9 @@ collection.search(

      Lorsque vous utilisez des index GPU, vous devez tenir compte de certaines contraintes :

        -
      • Pour GPU_IVF_FLAT, la valeur maximale de la limite est 256.

      • +
      • Pour GPU_IVF_FLAT, la valeur maximale de la limite est de 1024.

      • Pour GPU_IVF_PQ et GPU_CAGRA, la valeur maximale de la limite est de 1024.

      • -
      • Bien qu'il n'y ait pas de limite définie pour GPU_BRUTE_FORCE, il est recommandé de ne pas dépasser 4096 pour éviter les problèmes de performance.

      • +
      • Bien qu'il n'y ait pas de limite fixée pour GPU_BRUTE_FORCE, il est recommandé de ne pas dépasser 4096 pour éviter les problèmes de performance.

      • Actuellement, les index GPU ne prennent pas en charge la distance COSINE. Si la distance COSINE est requise, les données doivent d'abord être normalisées, puis la distance du produit intérieur (IP) peut être utilisée comme substitut.

      • La protection OOM du chargement pour les index GPU n'est pas entièrement prise en charge, une trop grande quantité de données peut entraîner le blocage du QueryNode.

      • Les index GPU ne prennent pas en charge les fonctions de recherche telles que la recherche par plage et la recherche par groupement.

      • diff --git a/localization/v2.5.x/site/fr/userGuide/schema/analyzer/analyzer-overview.md b/localization/v2.5.x/site/fr/userGuide/schema/analyzer/analyzer-overview.md index 76459e8e8..ef9df76a5 100644 --- a/localization/v2.5.x/site/fr/userGuide/schema/analyzer/analyzer-overview.md +++ b/localization/v2.5.x/site/fr/userGuide/schema/analyzer/analyzer-overview.md @@ -24,12 +24,12 @@ summary: >- >

        Dans le traitement de texte, un analyseur est un composant crucial qui convertit le texte brut en un format structuré et consultable. Chaque analyseur se compose généralement de deux éléments principaux : le tokéniseur et le filtre. Ensemble, ils transforment le texte d'entrée en tokens, affinent ces tokens et les préparent pour une indexation et une recherche efficaces.

        -

        Dans Milvus, les analyseurs sont configurés lors de la création de la collection lorsque vous ajoutez des champs VARCHAR au schéma de la collection. Les jetons produits par un analyseur peuvent être utilisés pour construire un index pour la recherche par mot-clé ou convertis en encastrements épars pour la recherche en texte intégral. Pour plus d'informations, reportez-vous à la section Correspondance par mot-clé ou recherche en texte intégral.

        +

        Dans Milvus, les analyseurs sont configurés lors de la création de la collection lorsque vous ajoutez des champs VARCHAR au schéma de la collection. Les jetons produits par un analyseur peuvent être utilisés pour construire un index pour la mise en correspondance de texte ou convertis en encastrements épars pour la recherche en texte intégral. Pour plus d'informations, reportez-vous à la section Correspondance de texte ou Recherche en texte intégral.

        L'utilisation d'analyseurs peut avoir un impact sur les performances.

        • Recherche en texte intégral : Pour la recherche en texte intégral, les canaux DataNode et QueryNode consomment les données plus lentement car ils doivent attendre la fin de la tokenisation. Par conséquent, les données nouvellement ingérées mettent plus de temps à être disponibles pour la recherche.

        • -
        • Correspondance par mot-clé : Pour la correspondance par mot-clé, la création de l'index est également plus lente car la tokenisation doit être terminée avant qu'un index puisse être construit.

        • +
        • Correspondance de texte : Pour la correspondance de texte, la création d'index est également plus lente car la tokenisation doit être terminée avant qu'un index puisse être construit.

        Anatomie d'un analyseur

    Analyseur intégré

    Les analyseurs intégrés dans Milvus sont préconfigurés avec des tokenizers et des filtres spécifiques, ce qui vous permet de les utiliser immédiatement sans avoir à définir ces composants vous-même. Chaque analyseur intégré sert de modèle et comprend un tokenizer et des filtres prédéfinis, avec des paramètres facultatifs pour la personnalisation.

    -

    Par exemple, pour utiliser l'analyseur intégré standard, il suffit de spécifier son nom standard comme type et d'inclure éventuellement des configurations supplémentaires spécifiques à ce type d'analyseur, comme stop_words.

    +

    Par exemple, pour utiliser l'analyseur intégré standard, il suffit de spécifier son nom standard comme type et d'inclure éventuellement des configurations supplémentaires spécifiques à ce type d'analyseur, telles que stop_words.

    analyzer_params = {​
         "type": "standard", # Uses the standard built-in analyzer​
         "stop_words": ["a", "an", "for"] # Defines a list of common words (stop words) to exclude from tokenization​
    @@ -121,7 +121,7 @@ summary: >-
     
    • Filtres intégrés: Préconfigurés par Milvus, ils ne nécessitent qu'une configuration minimale. Vous pouvez utiliser ces filtres prêts à l'emploi en spécifiant leur nom. Les filtres ci-dessous sont intégrés pour une utilisation directe.

        -
      • lowercase: Convertit le texte en minuscules pour garantir une correspondance insensible à la casse. Pour plus de détails, voir Minuscules.

      • +
      • lowercase: Convertit le texte en minuscules, ce qui garantit une correspondance insensible à la casse. Pour plus de détails, voir Minuscules.

      • asciifolding: Convertit les caractères non ASCII en équivalents ASCII, ce qui simplifie la gestion des textes multilingues. Pour plus d'informations, reportez-vous à la section Pliage ASCII.

      • alphanumonly: Conserve uniquement les caractères alphanumériques en supprimant les autres. Pour plus de détails, voir Alphanumonly.

      • cnalphanumonly: Supprime les jetons contenant des caractères autres que des caractères chinois, des lettres anglaises ou des chiffres. Pour plus de détails, voir Cnalphanumonly.

      • diff --git a/localization/v2.5.x/site/fr/userGuide/schema/sparse_vector.json b/localization/v2.5.x/site/fr/userGuide/schema/sparse_vector.json index 5b7df020f..34715b9f2 100644 --- a/localization/v2.5.x/site/fr/userGuide/schema/sparse_vector.json +++ b/localization/v2.5.x/site/fr/userGuide/schema/sparse_vector.json @@ -1 +1 @@ -{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"Vecteur épars","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview​","type":2,"isActive":false},{"label":"Utiliser des vecteurs peu denses dans Milvus","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"Vecteur épars","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview​","type":2,"isActive":false},{"label":"Utiliser des vecteurs peu denses dans Milvus","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false},{"label":"Limites","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/fr/userGuide/schema/sparse_vector.md b/localization/v2.5.x/site/fr/userGuide/schema/sparse_vector.md index f599ef45b..dc006f11d 100644 --- a/localization/v2.5.x/site/fr/userGuide/schema/sparse_vector.md +++ b/localization/v2.5.x/site/fr/userGuide/schema/sparse_vector.md @@ -479,3 +479,60 @@ sparse.put(1000L,

    Pour plus d'informations sur les paramètres de recherche de similarité, reportez-vous à la section Recherche ANN de base.

    +

    Limites

    Lorsque vous utilisez des vecteurs épars dans Milvus, tenez compte des limites suivantes :

    +
      +
    • Actuellement, seule la métrique de distance IP est prise en charge pour les vecteurs peu denses. La dimensionnalité élevée des vecteurs clairsemés rend les distances L2 et cosinus impraticables.

    • +
    • Seuls les types d'index SPARSE_INVERTED_INDEX et SPARSE_WAND sont pris en charge pour les champs de vecteurs peu denses.

    • +
    • Types de données pris en charge pour les vecteurs peu denses :

      +
        +
      • La partie dimension doit être un entier non signé de 32 bits ;
      • +
      • La partie valeur peut être un nombre à virgule flottante 32 bits non négatif.
      • +
    • +
    • Les vecteurs épars doivent répondre aux exigences suivantes en matière d'insertion et de recherche :

      +
        +
      • Au moins une valeur du vecteur est non nulle ;
      • +
      • Les indices du vecteur sont non négatifs.
      • +
    • +
    +

    FAQ

      +
    • Pouvez-vous expliquer la différence entre SPARSE_INVERTED_INDEX et SPARSE_WAND, et comment choisir entre les deux ?

      +

      SPARSE_INVERTED_INDEX est un index inversé traditionnel, tandis que SPARSE_WAND utilise l'algorithme Weak-AND pour réduire le nombre d'évaluations de la distance IP complète pendant la recherche. SPARSE_WAND est généralement plus rapide, mais ses performances peuvent diminuer avec l'augmentation de la densité des vecteurs. Pour choisir entre les deux, effectuez des expériences et des analyses comparatives en fonction de votre jeu de données et de votre cas d'utilisation spécifiques.

    • +
    • Comment dois-je choisir les paramètres drop_ratio_build et drop_ratio_search ?

      +

      Le choix des paramètres drop_ratio_build et drop_ratio_search dépend des caractéristiques de vos données et de vos exigences en matière de latence, de débit et de précision de la recherche.

    • +
    • La dimension d'un encastrement clairsemé peut-elle être n'importe quelle valeur discrète dans l'espace uint32 ?

      +

      Oui, à une exception près. La dimension d'un encastrement clairsemé peut être n'importe quelle valeur dans l'intervalle [0, maximum of uint32). Cela signifie que vous ne pouvez pas utiliser la valeur maximale de uint32.

    • +
    • Les recherches sur les segments croissants sont-elles effectuées à l'aide d'un index ou par force brute ?

      +

      Les recherches sur les segments croissants sont effectuées à l'aide d'un index du même type que l'index du segment scellé. Pour les nouveaux segments croissants avant que l'index ne soit construit, une recherche par force brute est utilisée.

    • +
    • Est-il possible d'avoir à la fois des vecteurs denses et peu denses dans une même collection ?

      +

      Oui, grâce à la prise en charge de plusieurs types de vecteurs, vous pouvez créer des collections avec des colonnes de vecteurs denses et peu denses et effectuer des recherches hybrides sur ces collections.

    • +
    diff --git a/localization/v2.5.x/site/fr/userGuide/search-query-get/boolean.md b/localization/v2.5.x/site/fr/userGuide/search-query-get/boolean.md index 2ee18b680..6159ed093 100644 --- a/localization/v2.5.x/site/fr/userGuide/search-query-get/boolean.md +++ b/localization/v2.5.x/site/fr/userGuide/search-query-get/boolean.md @@ -700,9 +700,9 @@ curl --request POST \​

    Match operators​

    Match operators include:​

    • like: Match constants or prefixes (prefix%), infixes (%infix%), and suffixes (%suffix) within constants. It relies on a brute-force search mechanism using wildcards and does not involve text tokenization. While it can achieve exact matches, its query efficiency is relatively low, making it suitable for simple matching tasks or queries on smaller datasets.​

    • -
    • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​ -

      -

      To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to ​Keyword Match.​

      +
    • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​

      +

      +

      To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to Text Match.​

    Example 1: Apply filter on scalar field​

    The following example demonstrates how to filter products whose color is red. In this case, you can quickly filter all red products by matching the prefix 'red%’. Similarly, you can use the expression color in ['red_7025’, 'red_4794’, ‘red_9392’] to filter all red products. However, when the data is more complex, we recommend using the like operator for more efficient filtering.​

    @@ -857,8 +857,8 @@ curl --request POST \​ ]​
    -

    Example 3: Keyword match on VARCHAR fields​

    The TEXT_MATCH expression is used for keyword match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to ​Keyword Match.​

    -

    The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the keyword "Apple" or "iPhone":​

    +

    Example 3: Text match on VARCHAR fields​

    The TEXT_MATCH expression is used for text match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to Text Match.​

    +

    The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the term "Apple" or "iPhone":​

    Python Java diff --git a/localization/v2.5.x/site/fr/userGuide/search-query-get/full-text-search.json b/localization/v2.5.x/site/fr/userGuide/search-query-get/full-text-search.json index 699958c5f..4d8e1cb8c 100644 --- a/localization/v2.5.x/site/fr/userGuide/search-query-get/full-text-search.json +++ b/localization/v2.5.x/site/fr/userGuide/search-query-get/full-text-search.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","MilvusClient.insert('demo', [​\n {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​\n {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​\n {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​\n])​\n\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['Who started AI research?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Recherche en texte intégral","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview​","type":2,"isActive":false},{"label":"Créer une collection pour la recherche en texte intégral","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Insérer des données textuelles","href":"Insert-text-data","type":2,"isActive":false},{"label":"Effectuer une recherche en texte intégral","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .build();\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"sparse\")\n .dataType(DataType.SparseFloatVector)\n .build());\n","import { MilvusClient, DataType } from \"@zilliz/milvus2-sdk-node\";\n\nconst address = \"http://localhost:19530\";\nconst token = \"root:Milvus\";\nconst client = new MilvusClient({address, token});\nconst schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n\nconsole.log(res.results)\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","import io.milvus.common.clientenum.FunctionType;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq.Function;\n\nimport java.util.*;\n\nschema.addFunction(Function.builder()\n .functionType(FunctionType.BM25)\n .name(\"text_bm25_emb\")\n .inputFieldNames(Collections.singletonList(\"text\"))\n .outputFieldNames(Collections.singletonList(\"vector\"))\n .build());\n","const functions = [\n {\n name: 'text_bm25_emb',\n description: 'bm25 function',\n type: FunctionType.BM25,\n input_field_names: ['text'],\n output_field_names: ['vector'],\n params: {},\n },\n];\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ],\n \"functions\": [\n {\n \"name\": \"text_bm25_emb\",\n \"type\": \"BM25\",\n \"inputFieldNames\": [\"text\"],\n \"outputFieldNames\": [\"sparse\"],\n \"params\": {}\n }\n ]\n }'\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","import io.milvus.v2.common.IndexParam;\n\nList indexes = new ArrayList<>();\nindexes.add(IndexParam.builder()\n .fieldName(\"sparse\")\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)\n .metricType(IndexParam.MetricType.BM25)\n .build());\n","const index_params = [\n {\n fieldName: \"sparse\",\n metricType: \"BM25\",\n indexType: \"AUTOINDEX\",\n },\n];\n","export indexParams='[\n {\n \"fieldName\": \"sparse\",\n \"metricType\": \"BM25\",\n \"indexType\": \"AUTOINDEX\"\n }\n ]'\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()\n .collectionName(\"demo\")\n .collectionSchema(schema)\n .indexParams(indexes)\n .build();\nclient.createCollection(requestCreate);\n","await client.create_collection(\n collection_name: 'demo', \n schema: schema, \n index_params: index_params\n);\n","export CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d \"{\n \\\"collectionName\\\": \\\"demo\\\",\n \\\"schema\\\": $schema,\n \\\"indexParams\\\": $indexParams\n}\"\n","client.insert('demo', [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n])\n\n","import com.google.gson.Gson;\nimport com.google.gson.JsonObject;\n\nimport io.milvus.v2.service.vector.request.InsertReq;\n\nGson gson = new Gson();\nList rows = Arrays.asList(\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval is a field of study.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval focuses on finding relevant information in large datasets.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"data mining and information retrieval overlap in research.\\\"}\", JsonObject.class)\n);\n\nclient.insert(InsertReq.builder()\n .collectionName(\"demo\")\n .data(rows)\n .build());\n","await client.insert({\ncollection_name: 'demo', \ndata: [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n]);\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"data\": [\n {\"text\": \"information retrieval is a field of study.\"},\n {\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"},\n {\"text\": \"data mining and information retrieval overlap in research.\"} \n ],\n \"collectionName\": \"demo\"\n}'\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['whats the focus of information retrieval?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n","import io.milvus.v2.service.vector.request.SearchReq;\nimport io.milvus.v2.service.vector.request.data.EmbeddedText;\nimport io.milvus.v2.service.vector.response.SearchResp;\n\nMap searchParams = new HashMap<>();\nsearchParams.put(\"drop_ratio_search\", 0.6);\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"demo\")\n .data(Collections.singletonList(new EmbeddedText(\"whats the focus of information retrieval?\")))\n .annsField(\"sparse\")\n .topK(3)\n .searchParams(searchParams)\n .outputFields(Collections.singletonList(\"text\"))\n .build());\n","await client.search(\n collection_name: 'demo', \n data: ['whats the focus of information retrieval?'],\n anns_field: 'sparse',\n limit: 3,\n params: {'drop_ratio_search': 0.6},\n)\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"demo\",\n \"data\": [\n \"whats the focus of information retrieval?\"\n ],\n \"annsField\": \"sparse\",\n \"limit\": 3,\n \"outputFields\": [\n \"text\"\n ],\n \"searchParams\":{\n \"params\":{\n \"drop_ratio_search\":0.6\n }\n }\n}'\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Recherche en texte intégral","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview​","type":2,"isActive":false},{"label":"Créer une collection pour la recherche en texte intégral","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Insérer des données textuelles","href":"Insert-text-data","type":2,"isActive":false},{"label":"Effectuer une recherche en texte intégral","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/fr/userGuide/search-query-get/full-text-search.md b/localization/v2.5.x/site/fr/userGuide/search-query-get/full-text-search.md index e7dbf2162..ec1ed7aff 100644 --- a/localization/v2.5.x/site/fr/userGuide/search-query-get/full-text-search.md +++ b/localization/v2.5.x/site/fr/userGuide/search-query-get/full-text-search.md @@ -83,6 +83,8 @@ summary: >-
  • Un champ SPARSE_FLOAT_VECTOR réservé au stockage d'enchâssements épars que Milvus générera automatiquement pour le champ VARCHAR.

  • Définir le schéma de la collection

    Commencez par créer le schéma et ajoutez les champs nécessaires.

    +
    from pymilvus import MilvusClient, DataType, Function, FunctionType​
     ​
     schema = MilvusClient.create_schema()​
    @@ -91,6 +93,80 @@ schema.add_field(field_name="id", dat
     schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​
     schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)​
     
    +
    +
    import io.milvus.v2.common.DataType;
    +import io.milvus.v2.service.collection.request.AddFieldReq;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
    +        .build();
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("id")
    +        .dataType(DataType.Int64)
    +        .isPrimaryKey(true)
    +        .autoID(true)
    +        .build());
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("text")
    +        .dataType(DataType.VarChar)
    +        .maxLength(1000)
    +        .enableAnalyzer(true)
    +        .build());
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("sparse")
    +        .dataType(DataType.SparseFloatVector)
    +        .build());
    +
    +
    import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
    +
    +const address = "http://localhost:19530";
    +const token = "root:Milvus";
    +const client = new MilvusClient({address, token});
    +const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +console.log(res.results)
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ]
    +    }'
     

    Dans cette configuration.

      @@ -99,6 +175,8 @@ schema.add_field(field_name="sparse",
    • sparse: un champ vectoriel réservé au stockage des enchâssements épars générés en interne pour les opérations de recherche en texte intégral. Le type de données doit être SPARSE_FLOAT_VECTOR.

    Définissez maintenant une fonction qui convertira votre texte en représentations vectorielles éparses, puis ajoutez-la au schéma.

    +
    bm25_function = Function(​
         name="text_bm25_emb", # Function name​
         input_field_names=["text"], # Name of the VARCHAR field containing raw text data​
    @@ -108,6 +186,62 @@ schema.add_field(field_name="sparse",
     ​
     schema.add_function(bm25_function)​
     
    +
    +
    import io.milvus.common.clientenum.FunctionType;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
    +
    +import java.util.*;
    +
    +schema.addFunction(Function.builder()
    +        .functionType(FunctionType.BM25)
    +        .name("text_bm25_emb")
    +        .inputFieldNames(Collections.singletonList("text"))
    +        .outputFieldNames(Collections.singletonList("vector"))
    +        .build());
    +
    +
    const functions = [
    +    {
    +      name: 'text_bm25_emb',
    +      description: 'bm25 function',
    +      type: FunctionType.BM25,
    +      input_field_names: ['text'],
    +      output_field_names: ['vector'],
    +      params: {},
    +    },
    +];
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ],
    +        "functions": [
    +            {
    +                "name": "text_bm25_emb",
    +                "type": "BM25",
    +                "inputFieldNames": ["text"],
    +                "outputFieldNames": ["sparse"],
    +                "params": {}
    +            }
    +        ]
    +    }'
     

    Paramètre

    Description

    @@ -125,6 +259,8 @@ schema.add_function(bm25_function)​

    Pour les collections comportant plusieurs champs VARCHAR nécessitant une conversion de texte en vecteurs épars, ajoutez des fonctions distinctes au schéma de la collection, en veillant à ce que chaque fonction ait un nom et une valeur output_field_names uniques.

    Configuration de l'index

    Après avoir défini le schéma avec les champs nécessaires et la fonction intégrée, configurez l'index de votre collection. Pour simplifier ce processus, utilisez AUTOINDEX comme index_type, une option qui permet à Milvus de choisir et de configurer le type d'index le plus approprié en fonction de la structure de vos données.

    +
    index_params = MilvusClient.prepare_index_params()​
     ​
     index_params.add_index(​
    @@ -133,9 +269,34 @@ index_params.add_index(​
         metric_type="BM25"​
     )​
     
    +
    +
    import io.milvus.v2.common.IndexParam;
    +
    +List<IndexParam> indexes = new ArrayList<>();
    +indexes.add(IndexParam.builder()
    +        .fieldName("sparse")
    +        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
    +        .metricType(IndexParam.MetricType.BM25)
    +        .build());
    +
    +
    const index_params = [
    +  {
    +    fieldName: "sparse",
    +    metricType: "BM25",
    +    indexType: "AUTOINDEX",
    +  },
    +];
    +
    +
    export indexParams='[
    +        {
    +            "fieldName": "sparse",
    +            "metricType": "BM25",
    +            "indexType": "AUTOINDEX"
    +        }
    +    ]'
     

    Paramètre

    -

    Description de l'index

    +

    Description

    field_name

    Le nom du champ vectoriel à indexer. Pour la recherche en texte intégral, il doit s'agir du champ qui stocke les vecteurs épars générés. Dans cet exemple, la valeur est sparse.

    index_type

    @@ -144,12 +305,42 @@ index_params.add_index(​

    La valeur de ce paramètre doit être définie sur BM25 spécifiquement pour la fonctionnalité de recherche en texte intégral.

    Création de la collection

    Créez maintenant la collection à l'aide des paramètres de schéma et d'index définis.

    +
    MilvusClient.create_collection(​
         collection_name='demo', ​
         schema=schema, ​
         index_params=index_params​
     )​
     
    +
    +
    import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq requestCreate = CreateCollectionReq.builder()
    +        .collectionName("demo")
    +        .collectionSchema(schema)
    +        .indexParams(indexes)
    +        .build();
    +client.createCollection(requestCreate);
    +
    +
    await client.create_collection(
    +    collection_name: 'demo', 
    +    schema: schema, 
    +    index_params: index_params
    +);
    +
    +
    export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d "{
    +    \"collectionName\": \"demo\",
    +    \"schema\": $schema,
    +    \"indexParams\": $indexParams
    +}"
     

    Insérer des données textuelles

    Après avoir configuré votre collection et votre index, vous êtes prêt à insérer des données textuelles. Pour ce faire, il vous suffit de fournir le texte brut. La fonction intégrée que nous avons définie précédemment génère automatiquement le vecteur sparse correspondant pour chaque entrée de texte.

    -
    MilvusClient.insert('demo', [​
    -    {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​
    -    {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​
    -    {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​
    -])​
    +
    +
    client.insert('demo', [
    +    {'text': 'information retrieval is a field of study.'},
    +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
    +    {'text': 'data mining and information retrieval overlap in research.'},
    +])
     
    +
    +
    import com.google.gson.Gson;
    +import com.google.gson.JsonObject;
    +
    +import io.milvus.v2.service.vector.request.InsertReq;
    +
    +Gson gson = new Gson();
    +List<JsonObject> rows = Arrays.asList(
    +        gson.fromJson("{\"text\": \"information retrieval is a field of study.\"}", JsonObject.class),
    +        gson.fromJson("{\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"}", JsonObject.class),
    +        gson.fromJson("{\"text\": \"data mining and information retrieval overlap in research.\"}", JsonObject.class)
    +);
    +
    +client.insert(InsertReq.builder()
    +        .collectionName("demo")
    +        .data(rows)
    +        .build());
    +
    +
    await client.insert({
    +collection_name: 'demo', 
    +data: [
    +    {'text': 'information retrieval is a field of study.'},
    +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
    +    {'text': 'data mining and information retrieval overlap in research.'},
    +]);
    +
    +
    curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "data": [
    +        {"text": "information retrieval is a field of study."},
    +        {"text": "information retrieval focuses on finding relevant information in large datasets."},
    +        {"text": "data mining and information retrieval overlap in research."}       
    +    ],
    +    "collectionName": "demo"
    +}'
     

    Une fois que vous avez inséré des données dans votre collection, vous pouvez effectuer des recherches en texte intégral à l'aide de requêtes de texte brut. Milvus convertit automatiquement votre requête en un vecteur clair et classe les résultats de recherche correspondants à l'aide de l'algorithme BM25, puis renvoie les topK (limit) résultats.

    +
    search_params = {​
         'params': {'drop_ratio_search': 0.6},​
     }​
     ​
     MilvusClient.search(​
         collection_name='demo', ​
    -    data=['Who started AI research?'],​
    +    data=['whats the focus of information retrieval?'],​
         anns_field='sparse',​
         limit=3,​
         search_params=search_params​
     )​
     
    +
    +
    import io.milvus.v2.service.vector.request.SearchReq;
    +import io.milvus.v2.service.vector.request.data.EmbeddedText;
    +import io.milvus.v2.service.vector.response.SearchResp;
    +
    +Map<String,Object> searchParams = new HashMap<>();
    +searchParams.put("drop_ratio_search", 0.6);
    +SearchResp searchResp = client.search(SearchReq.builder()
    +        .collectionName("demo")
    +        .data(Collections.singletonList(new EmbeddedText("whats the focus of information retrieval?")))
    +        .annsField("sparse")
    +        .topK(3)
    +        .searchParams(searchParams)
    +        .outputFields(Collections.singletonList("text"))
    +        .build());
    +
    +
    await client.search(
    +    collection_name: 'demo', 
    +    data: ['whats the focus of information retrieval?'],
    +    anns_field: 'sparse',
    +    limit: 3,
    +    params: {'drop_ratio_search': 0.6},
    +)
    +
    +
    curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +--data-raw '{
    +    "collectionName": "demo",
    +    "data": [
    +        "whats the focus of information retrieval?"
    +    ],
    +    "annsField": "sparse",
    +    "limit": 3,
    +    "outputFields": [
    +        "text"
    +    ],
    +    "searchParams":{
    +        "params":{
    +            "drop_ratio_search":0.6
    +        }
    +    }
    +}'
     

    Paramètre

    Description

    search_params

    -

    Dictionnaire contenant les paramètres de recherche.

    +

    Un dictionnaire contenant les paramètres de recherche.

    params.drop_ratio_search

    Proportion de termes de basse fréquence à ignorer lors de la recherche. Pour plus de détails, voir Vecteur clair.

    data

    diff --git a/localization/v2.5.x/site/fr/userGuide/search-query-get/keyword-match.json b/localization/v2.5.x/site/fr/userGuide/search-query-get/keyword-match.json index eaa40ca3d..6e9db6cf3 100644 --- a/localization/v2.5.x/site/fr/userGuide/search-query-get/keyword-match.json +++ b/localization/v2.5.x/site/fr/userGuide/search-query-get/keyword-match.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n"],"headingContent":"Keyword Match​","anchorList":[{"label":"Correspondance de mots-clés","href":"Keyword-Match​","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview","type":2,"isActive":false},{"label":"Activer la recherche par mot-clé","href":"Enable-keyword-match","type":2,"isActive":false},{"label":"Utiliser la correspondance de mots-clés","href":"Use-keyword-match","type":2,"isActive":false},{"label":"Points à prendre en considération","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .enableDynamicField(false)\n .build();\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true,\n \"enable_match\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","Map analyzerParams = new HashMap<>();\nanalyzerParams.put(\"type\", \"english\");\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(200)\n .enableAnalyzer(true)\n .analyzerParams(analyzerParams)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n analyzer_params: { type: 'english' },\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 200,\n \"enable_analyzer\": true,\n \"enable_match\": true,\n \"analyzer_params\": {\"type\": \"english\"}\n }\n },\n {\n \"fieldName\": \"my_vector\",\n \"dataType\": \"FloatVector\",\n \"elementTypeParams\": {\n \"dim\": \"5\"\n }\n }\n ]\n }'\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine deep')\";\n","export filter=\"\\\"TEXT_MATCH(text, 'machine deep')\\\"\"\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"\n","export filter=\"\\\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\\\"\"\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .annsField(\"embeddings\")\n .data(Collections.singletonList(queryVector)))\n .filter(filter)\n .topK(10)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build());\n","// Match entities with `keyword1` or `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\n// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field\nconst result = await client.search(\n collection_name: \"YOUR_COLLECTION_NAME\", // Your collection name\n anns_field: \"embeddings\", // Vector field name\n data: [query_vector], // Query vector\n filter: filter,\n params: {\"nprobe\": 10},\n limit: 10, // Max. number of results to return\n output_fields: [\"id\", \"text\"] //Fields to return\n);\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1 keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"annsField\": \"my_vector\",\n \"data\": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],\n \"filter\": '\"$filter\"',\n \"searchParams\": {\n \"params\": {\n \"nprobe\": 10\n }\n },\n \"limit\": 3,\n \"outputFields\": [\"text\",\"id\"]\n}'\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nQueryResp queryResp = client.query(QueryReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .filter(filter)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build()\n);\n","// Match entities with both `keyword1` and `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nconst result = await client.query(\n collection_name: \"YOUR_COLLECTION_NAME\",\n filter: filter, \n output_fields: [\"id\", \"text\"]\n)\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/query\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"filter\": '\"$filter\"',\n \"outputFields\": [\"id\", \"text\"]\n}'\n"],"headingContent":"Text Match​","anchorList":[{"label":"Correspondance de texte","href":"Text-Match​","type":1,"isActive":false},{"label":"Vue d'ensemble","href":"Overview","type":2,"isActive":false},{"label":"Activer la correspondance de texte","href":"Enable-text-match","type":2,"isActive":false},{"label":"Utiliser la correspondance de texte","href":"Use-text-match","type":2,"isActive":false},{"label":"Points à prendre en considération","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/fr/userGuide/search-query-get/keyword-match.md b/localization/v2.5.x/site/fr/userGuide/search-query-get/keyword-match.md index c7544c7cd..aef49a422 100644 --- a/localization/v2.5.x/site/fr/userGuide/search-query-get/keyword-match.md +++ b/localization/v2.5.x/site/fr/userGuide/search-query-get/keyword-match.md @@ -1,15 +1,15 @@ --- id: keyword-match.md summary: >- - La correspondance par mot-clé dans Milvus permet une recherche précise de + La correspondance de texte dans Milvus permet une recherche précise de documents sur la base de termes spécifiques. Cette fonction est principalement utilisée pour la recherche filtrée afin de satisfaire des conditions spécifiques et peut incorporer le filtrage scalaire pour affiner les résultats de la requête, permettant des recherches de similarité dans les vecteurs qui répondent aux critères scalaires. -title: Correspondance de mots-clés +title: Correspondance de texte --- -

    Correspondance de mots-clés

    La recherche par mot-clé dans Milvus permet d'extraire des documents avec précision sur la base de termes spécifiques. Cette fonction est principalement utilisée pour la recherche filtrée afin de satisfaire des conditions spécifiques et peut incorporer le filtrage scalaire pour affiner les résultats de la requête, permettant des recherches de similarité dans les vecteurs qui répondent aux critères scalaires.

    +

    La correspondance de texte dans Milvus permet une recherche précise de documents sur la base de termes spécifiques. Cette fonction est principalement utilisée pour la recherche filtrée afin de satisfaire des conditions spécifiques et peut incorporer le filtrage scalaire pour affiner les résultats de la requête, permettant des recherches de similarité dans les vecteurs qui répondent aux critères scalaires.

    -

    La correspondance par mot-clé se concentre sur la recherche d'occurrences exactes des termes de la requête, sans évaluer la pertinence des documents correspondants. Si vous souhaitez récupérer les documents les plus pertinents en fonction de la signification sémantique et de l'importance des termes de la requête, nous vous recommandons d'utiliser la recherche en texte intégral.

    +

    La correspondance de texte se concentre sur la recherche des occurrences exactes des termes de la requête, sans évaluer la pertinence des documents mis en correspondance. Si vous souhaitez récupérer les documents les plus pertinents en fonction de la signification sémantique et de l'importance des termes de la requête, nous vous recommandons d'utiliser la recherche en texte intégral.

    Vue d'ensemble

    Milvus intègre Tantivy pour alimenter son index inversé sous-jacent et sa recherche par mot-clé. Pour chaque entrée de texte, Milvus l'indexe en suivant la procédure.

    +

    Milvus intègre Tantivy pour alimenter son index inversé sous-jacent et sa recherche textuelle basée sur les termes. Pour chaque entrée de texte, Milvus l'indexe en suivant la procédure.

    1. Analyseur: L'analyseur traite le texte d'entrée en le transformant en mots individuels, ou tokens, puis en appliquant des filtres si nécessaire. Cela permet à Milvus de construire un index basé sur ces tokens.

    2. Indexation: Après l'analyse du texte, Milvus crée un index inversé qui associe chaque token unique aux documents qui le contiennent.

    -

    Lorsqu'un utilisateur effectue une recherche par mots-clés, l'index inversé est utilisé pour retrouver rapidement tous les documents contenant les mots-clés. Cette méthode est beaucoup plus rapide que l'analyse individuelle de chaque document.

    +

    Lorsqu'un utilisateur effectue une correspondance de texte, l'index inversé est utilisé pour retrouver rapidement tous les documents contenant les termes. Cette méthode est beaucoup plus rapide que l'analyse individuelle de chaque document.

    - Keyword Match - Correspondance par mot-clé

    -

    Activer la recherche par mot-clé

    La correspondance par mot-clé fonctionne sur le type de champ VARCHAR, qui est essentiellement le type de données chaîne dans Milvus. Pour activer la correspondance de mots-clés, définissez enable_analyzer et enable_match sur True, puis configurez éventuellement un analyseur pour l'analyse de texte lors de la définition de votre schéma de collecte.

    -

    Définir enable_analyzer et enable_match

    Pour activer la correspondance des mots-clés pour un champ VARCHAR spécifique, définissez les paramètres enable_analyzer et enable_match sur True lors de la définition du schéma de champ. Cela indique à Milvus de tokeniser le texte et de créer un index inversé pour le champ spécifié, ce qui permet des correspondances de mots clés rapides et efficaces.

    +

    La correspondance de texte fonctionne avec le type de champ VARCHAR, qui est essentiellement le type de données chaîne dans Milvus. Pour activer la correspondance de texte, définissez enable_analyzer et enable_match sur True, puis configurez éventuellement un analyseur pour l'analyse de texte lors de la définition de votre schéma de collecte.

    +

    Définir enable_analyzer et enable_match

    Pour activer la correspondance de texte pour un champ VARCHAR spécifique, définissez les paramètres enable_analyzer et enable_match sur True lors de la définition du schéma de champ. Cela permet à Milvus de tokeniser le texte et de créer un index inversé pour le champ spécifié, ce qui permet des correspondances de texte rapides et efficaces.

    +
    from pymilvus import MilvusClient, DataType​
     ​
     schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​
    @@ -83,9 +85,74 @@ schema.add_field(​
     )​
     
     
    -

    Facultatif : Configurer un analyseur

    Les performances et la précision de la recherche par mot-clé dépendent de l'analyseur sélectionné. Différents analyseurs sont adaptés à diverses langues et structures de texte, de sorte que le choix du bon analyseur peut avoir un impact significatif sur les résultats de recherche pour votre cas d'utilisation spécifique.

    +
    import io.milvus.v2.common.DataType;
    +import io.milvus.v2.service.collection.request.AddFieldReq;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
    +        .enableDynamicField(false)
    +        .build();
    +
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("text")
    +        .dataType(DataType.VarChar)
    +        .maxLength(1000)
    +        .enableAnalyzer(true)
    +        .enableMatch(true)
    +        .build());
    +
    +
    +
    const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true,
    +                    "enable_match": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ]
    +    }'
    +
    +
    +

    Facultatif : Configurer un analyseur

    Les performances et la précision de la recherche de texte dépendent de l'analyseur sélectionné. Différents analyseurs sont adaptés à diverses langues et structures de texte, de sorte que le choix du bon analyseur peut avoir un impact significatif sur les résultats de recherche pour votre cas d'utilisation spécifique.

    Par défaut, Milvus utilise l'analyseur standard, qui donne un sens au texte en fonction des espaces blancs et de la ponctuation, supprime les tokens de plus de 40 caractères et convertit le texte en minuscules. Aucun paramètre supplémentaire n'est nécessaire pour appliquer ce paramètre par défaut. Pour plus d'informations, voir Standard.

    Si un autre analyseur est nécessaire, vous pouvez le configurer à l'aide du paramètre analyzer_params. Par exemple, pour appliquer l'analyseur english au traitement du texte anglais.

    +
    analyzer_params={​
         "type": "english"​
     }​
    @@ -100,8 +167,71 @@ schema.add_field(​
     )​
     
     
    -

    Milvus propose également d'autres analyseurs adaptés à différentes langues et à différents scénarios. Pour plus de détails, reportez-vous à la section Vue d'ensemble.

    -

    Utiliser la correspondance de mots-clés +
    const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +    analyzer_params: { type: 'english' },
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 200,
    +                    "enable_analyzer": true,
    +                    "enable_match": true,
    +                    "analyzer_params": {"type": "english"}
    +                }
    +            },
    +            {
    +                "fieldName": "my_vector",
    +                "dataType": "FloatVector",
    +                "elementTypeParams": {
    +                    "dim": "5"
    +                }
    +            }
    +        ]
    +    }'
    +
    +
    +

    Milvus fournit également d'autres analyseurs adaptés à différents langages et scénarios. Pour plus de détails, reportez-vous à la section Vue d'ensemble.

    +

    Utiliser la correspondance de texte

    Une fois que vous avez activé la correspondance de mots-clés pour un champ VARCHAR dans votre schéma de collecte, vous pouvez effectuer des correspondances de mots-clés à l'aide de l'expression TEXT_MATCH.

    -

    Syntaxe de l'expression TEXT_MATCH

    L'expression TEXT_MATCH est utilisée pour spécifier le champ et les mots-clés à rechercher. Sa syntaxe est la suivante.

    -
    TEXT_MATCH(field_name, text)​
    +    

    Une fois que vous avez activé la correspondance de texte pour un champ VARCHAR dans votre schéma de collecte, vous pouvez effectuer des correspondances de texte à l'aide de l'expression TEXT_MATCH.

    +

    Syntaxe de l'expression TEXT_MATCH

    L'expression TEXT_MATCH est utilisée pour spécifier le champ et les termes à rechercher. Sa syntaxe est la suivante.

    +
    TEXT_MATCH(field_name, text)​
     
     
    • field_name: Le nom du champ VARCHAR à rechercher.

    • -
    • text: Les mots-clés à rechercher. Plusieurs mots-clés peuvent être séparés par des espaces ou d'autres délimiteurs appropriés en fonction de la langue et de l'analyseur configuré.

    • +
    • text: Les termes à rechercher. Les termes multiples peuvent être séparés par des espaces ou d'autres délimiteurs appropriés en fonction de la langue et de l'analyseur configuré.

    -

    Par défaut, TEXT_MATCH utilise la logique de correspondance OR, ce qui signifie qu'il renverra les documents qui contiennent n'importe lequel des mots-clés spécifiés. Par exemple, pour rechercher des documents contenant les mots-clés machine ou deep dans le champ text, utilisez l'expression suivante.

    +

    Par défaut, TEXT_MATCH utilise la logique de correspondance OR, ce qui signifie qu'il renverra les documents qui contiennent n'importe lequel des termes spécifiés. Par exemple, pour rechercher des documents contenant le terme machine ou deep dans le champ text, utilisez l'expression suivante.

    +
    filter = "TEXT_MATCH(text, 'machine deep')"​
    -
     
    -

    Vous pouvez également combiner plusieurs expressions TEXT_MATCH à l'aide d'opérateurs logiques pour effectuer une correspondance ET. Par exemple, pour rechercher les documents contenant les mots-clés machine et deep dans le champ text, utilisez l'expression suivante.

    +
    String filter = "TEXT_MATCH(text, 'machine deep')";
    +
    +
    const filter = "TEXT_MATCH(text, 'machine deep')";
    +
    +
    export filter="\"TEXT_MATCH(text, 'machine deep')\""
    +
    +

    Vous pouvez également combiner plusieurs expressions TEXT_MATCH à l'aide d'opérateurs logiques pour effectuer une correspondance ET. Par exemple, pour rechercher des documents contenant à la fois machine et deep dans le champ text, utilisez l'expression suivante.

    +
    filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"​
    -
     
    -

    Recherche avec correspondance de mots-clés

    La correspondance par mot-clé peut être utilisée en combinaison avec la recherche de similarité vectorielle pour restreindre le champ de recherche et améliorer les performances de la recherche. En filtrant la collection à l'aide de la correspondance de mots-clés avant la recherche de similarité vectorielle, vous pouvez réduire le nombre de documents à rechercher, ce qui accélère les temps de recherche.

    -

    Dans cet exemple, l'expression filter filtre les résultats de la recherche pour n'inclure que les documents qui correspondent aux mots-clés spécifiés keyword1 ou keyword2. La recherche vectorielle de similarité est ensuite effectuée sur ce sous-ensemble de documents filtrés.

    +
    String filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')";
    +
    +
    const filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"
    +
    +
    export filter="\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\""
    +
    +

    Recherche avec correspondance de texte

    La correspondance de texte peut être utilisée en combinaison avec la recherche de similarité vectorielle pour restreindre le champ de recherche et améliorer les performances de la recherche. En filtrant la collection à l'aide de la correspondance de texte avant la recherche de similarité vectorielle, vous pouvez réduire le nombre de documents à rechercher, ce qui accélère les temps de recherche.

    +

    Dans cet exemple, l'expression filter filtre les résultats de la recherche pour n'inclure que les documents qui correspondent au terme spécifié keyword1 ou keyword2. La recherche vectorielle de similarité est ensuite effectuée sur ce sous-ensemble de documents filtrés.

    +
    # Match entities with `keyword1` or `keyword2`​
     filter = "TEXT_MATCH(text, 'keyword1 keyword2')"​
     ​
    @@ -150,8 +296,58 @@ result = MilvusClient.search(​
     )​
     
     
    -

    Requête avec correspondance de mots-clés

    La correspondance par mot-clé peut également être utilisée pour le filtrage scalaire dans les opérations de requête. En spécifiant une expression TEXT_MATCH dans le paramètre expr de la méthode query(), vous pouvez récupérer les documents qui correspondent aux mots-clés donnés.

    -

    L'exemple ci-dessous permet d'extraire les documents dont le champ text contient les deux mots-clés keyword1 et keyword2.

    +
    String filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
    +
    +SearchResp searchResp = client.search(SearchReq.builder()
    +        .collectionName("YOUR_COLLECTION_NAME")
    +        .annsField("embeddings")
    +        .data(Collections.singletonList(queryVector)))
    +        .filter(filter)
    +        .topK(10)
    +        .outputFields(Arrays.asList("id", "text"))
    +        .build());
    +
    +
    // Match entities with `keyword1` or `keyword2`
    +const filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
    +
    +// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field
    +const result = await client.search(
    +    collection_name: "YOUR_COLLECTION_NAME", // Your collection name
    +    anns_field: "embeddings", // Vector field name
    +    data: [query_vector], // Query vector
    +    filter: filter,
    +    params: {"nprobe": 10},
    +    limit: 10, // Max. number of results to return
    +    output_fields: ["id", "text"] //Fields to return
    +);
    +
    +
    export filter="\"TEXT_MATCH(text, 'keyword1 keyword2')\""
    +
    +export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "collectionName": "demo2",
    +    "annsField": "my_vector",
    +    "data": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],
    +    "filter": '"$filter"',
    +    "searchParams": {
    +        "params": {
    +            "nprobe": 10
    +        }
    +    },
    +    "limit": 3,
    +    "outputFields": ["text","id"]
    +}'
    +
    +

    Requête avec correspondance de texte

    La correspondance de texte peut également être utilisée pour le filtrage scalaire dans les opérations de requête. En spécifiant une expression TEXT_MATCH dans le paramètre expr de la méthode query(), vous pouvez récupérer les documents qui correspondent aux termes donnés.

    +

    L'exemple ci-dessous permet de récupérer les documents dont le champ text contient à la fois les termes keyword1 et keyword2.

    +
    # Match entities with both `keyword1` and `keyword2`​
     filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')"​
     ​
    @@ -161,6 +357,39 @@ result = MilvusClient.query(​
         output_fields=["id", "text"]​
     )​
     
    +
    +
    String filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
    +
    +QueryResp queryResp = client.query(QueryReq.builder()
    +        .collectionName("YOUR_COLLECTION_NAME")
    +        .filter(filter)
    +        .outputFields(Arrays.asList("id", "text"))
    +        .build()
    +);
    +
    +
    // Match entities with both `keyword1` and `keyword2`
    +const filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
    +
    +const result = await client.query(
    +    collection_name: "YOUR_COLLECTION_NAME",
    +    filter: filter, 
    +    output_fields: ["id", "text"]
    +)
    +
    +
    export filter="\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\""
    +
    +export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "collectionName": "demo2",
    +    "filter": '"$filter"',
    +    "outputFields": ["id", "text"]
    +}'
     

    Points à prendre en considération

      -
    • L'activation de la recherche par mot-clé pour un champ déclenche la création d'un index inversé, qui consomme des ressources de stockage. Tenez compte de l'impact sur le stockage lorsque vous décidez d'activer cette fonctionnalité, car il varie en fonction de la taille du texte, des jetons uniques et de l'analyseur utilisé.

    • +
    • L'activation de la correspondance textuelle pour un champ déclenche la création d'un index inversé, qui consomme des ressources de stockage. Tenez compte de l'impact sur le stockage lorsque vous décidez d'activer cette fonctionnalité, car il varie en fonction de la taille du texte, des jetons uniques et de l'analyseur utilisé.

    • Une fois que vous avez défini un analyseur dans votre schéma, ses paramètres deviennent permanents pour cette collection. Si vous décidez qu'un autre analyseur répondrait mieux à vos besoins, vous pouvez envisager d'abandonner la collection existante et d'en créer une nouvelle avec la configuration d'analyseur souhaitée.

    diff --git a/localization/v2.5.x/site/fr/userGuide/search-query-get/multi-vector-search.md b/localization/v2.5.x/site/fr/userGuide/search-query-get/multi-vector-search.md index 63e1d6589..5bd39c7c5 100644 --- a/localization/v2.5.x/site/fr/userGuide/search-query-get/multi-vector-search.md +++ b/localization/v2.5.x/site/fr/userGuide/search-query-get/multi-vector-search.md @@ -41,11 +41,11 @@ title: Recherche hybride

    La recherche hybride convient aux deux scénarios suivants.

    Recherche de vecteurs denses et épars

    Différents types de vecteurs peuvent représenter différentes informations, et l'utilisation de différents modèles d'intégration peut représenter de manière plus complète différentes caractéristiques et aspects des données. Par exemple, l'utilisation de différents modèles d'intégration pour la même phrase peut générer un vecteur dense pour représenter le sens sémantique et un vecteur clairsemé pour représenter la fréquence des mots dans la phrase.

      -
    • Vecteurs épars : Les vecteurs épars se caractérisent par leur dimensionnalité élevée et la présence de quelques valeurs non nulles. Cette structure les rend particulièrement adaptés aux applications traditionnelles de recherche d'informations. Dans la plupart des cas, le nombre de dimensions utilisées dans les vecteurs épars correspond à différents tokens dans une ou plusieurs langues. Chaque dimension se voit attribuer une valeur qui indique l'importance relative de ce mot dans le document. Cette disposition s'avère avantageuse pour les tâches qui impliquent la correspondance de mots-clés.

    • -
    • Vecteurs denses : Les vecteurs denses sont des encastrements dérivés des réseaux neuronaux. Lorsqu'ils sont disposés dans un tableau ordonné, ces vecteurs capturent l'essence sémantique du texte d'entrée. Il convient de noter que les vecteurs denses ne sont pas limités au traitement de texte ; ils sont également largement utilisés dans le domaine de la vision par ordinateur pour représenter la sémantique des données visuelles. Ces vecteurs denses, généralement générés par des modèles d'intégration de texte, sont caractérisés par le fait que la plupart ou tous les éléments sont non nuls. Les vecteurs denses sont donc particulièrement efficaces pour les applications de recherche sémantique, car ils peuvent renvoyer les résultats les plus similaires sur la base de la distance vectorielle, même en l'absence de correspondances exactes entre les mots clés. Cette capacité permet d'obtenir des résultats de recherche plus nuancés et tenant compte du contexte, en saisissant souvent des relations entre des concepts qui pourraient échapper aux approches basées sur les mots-clés.

    • +
    • Vecteurs épars : Les vecteurs épars se caractérisent par leur dimensionnalité élevée et la présence de quelques valeurs non nulles. Cette structure les rend particulièrement adaptés aux applications traditionnelles de recherche d'informations. Dans la plupart des cas, le nombre de dimensions utilisées dans les vecteurs épars correspond à différents tokens dans une ou plusieurs langues. Chaque dimension se voit attribuer une valeur qui indique l'importance relative de ce mot dans le document. Cette disposition s'avère avantageuse pour les tâches qui impliquent la mise en correspondance de textes.

    • +
    • Vecteurs denses : Les vecteurs denses sont des encastrements dérivés des réseaux neuronaux. Lorsqu'ils sont disposés dans un tableau ordonné, ces vecteurs capturent l'essence sémantique du texte d'entrée. Il convient de noter que les vecteurs denses ne sont pas limités au traitement de texte ; ils sont également largement utilisés dans le domaine de la vision par ordinateur pour représenter la sémantique des données visuelles. Ces vecteurs denses, généralement générés par des modèles d'intégration de texte, sont caractérisés par le fait que la plupart ou tous les éléments sont non nuls. Les vecteurs denses sont donc particulièrement efficaces pour les applications de recherche sémantique, car ils peuvent renvoyer les résultats les plus similaires sur la base de la distance vectorielle, même en l'absence de correspondances textuelles exactes. Cette capacité permet d'obtenir des résultats de recherche plus nuancés et tenant compte du contexte, en saisissant souvent des relations entre des concepts qui pourraient échapper aux approches basées sur les mots-clés.

    Pour plus de détails, voir Vecteur clair et Vecteur dense.

    -

    Recherche multimodale

    La recherche multimodale fait référence à la recherche de similarités entre des données non structurées et plusieurs modalités (images, vidéos, audio, texte, etc.). Par exemple, une personne peut être représentée à l'aide de différentes modalités de données telles que les empreintes digitales, les empreintes vocales et les traits du visage. La recherche hybride permet d'effectuer plusieurs recherches simultanément. Par exemple, la recherche d'une personne avec des empreintes digitales et des empreintes vocales similaires.

    +

    Recherche multimodale

    La recherche multimodale fait référence à la recherche de similarités entre des données non structurées et plusieurs modalités (images, vidéos, audio, texte, etc.). Par exemple, une personne peut être représentée à l'aide de différentes modalités de données telles que les empreintes digitales, les empreintes vocales et les caractéristiques faciales. La recherche hybride permet d'effectuer plusieurs recherches simultanément. Par exemple, la recherche d'une personne avec des empreintes digitales et des empreintes vocales similaires.

    Déroulement des opérations

    Lors des recherches de vecteurs épars, vous pouvez simplifier le processus de génération de vecteurs d'intégration épars en tirant parti des fonctionnalités de la recherche plein texte. Pour plus de détails, voir Recherche en texte intégral.

    -

    Créer un index

    Après avoir défini le schéma de la collection, il est nécessaire de configurer les index des vecteurs et les métriques de similarité. Dans cet exemple, un index IVF_FLAT est créé pour le champ vectoriel dense dense, et un index SPARSE_INVERTED_INDEX est créé pour le champ vectoriel clairsemé sparse. Pour en savoir plus sur les types d'index pris en charge, voir Index Explained.

    +

    Créer un index

    Après avoir défini le schéma de la collection, il est nécessaire de configurer les index des vecteurs et les métriques de similarité. Dans cet exemple, un index IVF_FLAT est créé pour le champ vectoriel dense dense et un index SPARSE_INVERTED_INDEX est créé pour le champ vectoriel clairsemé sparse. Pour en savoir plus sur les types d'index pris en charge, voir Index Explained.

    from pymilvus import MilvusClient​
    diff --git a/localization/v2.5.x/site/fr/userGuide/search-query-get/single-vector-search.md b/localization/v2.5.x/site/fr/userGuide/search-query-get/single-vector-search.md
    index 1479b39ed..54db75acd 100644
    --- a/localization/v2.5.x/site/fr/userGuide/search-query-get/single-vector-search.md
    +++ b/localization/v2.5.x/site/fr/userGuide/search-query-get/single-vector-search.md
    @@ -38,7 +38,7 @@ title: Recherche ANN de base
             >
           
         

    La recherche ANN et la recherche k-Nearest Neighbors (kNN) sont les méthodes habituelles de recherche de similarité vectorielle. Dans une recherche kNN, vous devez comparer tous les vecteurs d'un espace vectoriel avec le vecteur de la requête indiqué dans la demande de recherche avant de déterminer les vecteurs les plus similaires, ce qui prend beaucoup de temps et de ressources.

    -

    Contrairement aux recherches kNN, un algorithme de recherche ANN demande un fichier d'index qui enregistre l'ordre trié des intégrations vectorielles. Lorsqu'une demande de recherche arrive, vous pouvez utiliser le fichier d'index comme référence pour localiser rapidement un sous-groupe contenant probablement les intégrations vectorielles les plus similaires au vecteur de la requête. Ensuite, vous pouvez utiliser le type de métrique spécifié pour mesurer la similarité entre le vecteur de la requête et ceux du sous-groupe, trier les membres du groupe sur la base de la similarité avec le vecteur de la requête et déterminer les K premiers membres du groupe.

    +

    Contrairement aux recherches kNN, un algorithme de recherche ANN demande un fichier d'index qui enregistre l'ordre trié des intégrations vectorielles. Lorsqu'une demande de recherche arrive, vous pouvez utiliser le fichier d'index comme référence pour localiser rapidement un sous-groupe contenant probablement les intégrations vectorielles les plus similaires au vecteur de la requête. Ensuite, vous pouvez utiliser le type de métrique spécifié pour mesurer la similarité entre le vecteur de la requête et ceux du sous-groupe, trier les membres du groupe sur la base de la similarité avec le vecteur de la requête, et déterminer les K premiers membres du groupe.

    Les recherches ANN dépendent d'index préconstruits, et le débit de recherche, l'utilisation de la mémoire et l'exactitude de la recherche peuvent varier en fonction des types d'index que vous choisissez. Vous devez trouver un équilibre entre les performances et l'exactitude de la recherche.

    Pour réduire la courbe d'apprentissage, Milvus propose AUTOINDEX. Avec AUTOINDEX, Milvus peut analyser la distribution des données dans votre collection pendant la construction de l'index et définit les paramètres d'index les plus optimisés en fonction de l'analyse afin de trouver un équilibre entre les performances de recherche et l'exactitude.

    Pour plus de détails sur AUTOINDEX et les types de métriques applicables, voir AUTOINDEX et les types de métriques. Dans cette section, vous trouverez des informations détaillées sur les sujets suivants.

    @@ -890,7 +890,7 @@ curl --request POST \​

    Vous pouvez inclure des conditions de filtrage dans une demande de recherche afin que Milvus procède au filtrage des métadonnées avant d'effectuer des recherches ANN, réduisant ainsi l'étendue de la recherche de l'ensemble de la collection aux seules entités correspondant aux conditions de filtrage spécifiées.

    Pour en savoir plus sur le filtrage des métadonnées et les conditions de filtrage, voir Recherche filtrée et Filtrage des métadonnées.

  • Recherche par plage

    -

    Vous pouvez améliorer la pertinence des résultats de la recherche en limitant la distance ou le score des entités renvoyées à une plage spécifique. Dans Milvus, une recherche par plage consiste à dessiner deux cercles concentriques dont le centre est l'intégration vectorielle la plus similaire au vecteur de la requête. La requête de recherche spécifie le rayon des deux cercles et Milvus renvoie tous les enregistrements vectoriels qui se trouvent dans le cercle extérieur mais pas dans le cercle intérieur.

    +

    Vous pouvez améliorer la pertinence des résultats de recherche en limitant la distance ou le score des entités renvoyées à une plage spécifique. Dans Milvus, une recherche par plage consiste à dessiner deux cercles concentriques dont le centre est l'intégration vectorielle la plus similaire au vecteur de la requête. La requête de recherche spécifie le rayon des deux cercles et Milvus renvoie tous les enregistrements vectoriels qui se trouvent dans le cercle extérieur mais pas dans le cercle intérieur.

    Pour en savoir plus sur la recherche par plage, reportez-vous à la section Recherche par plage.

  • Recherche de regroupement

    Si les entités renvoyées ont la même valeur dans un champ spécifique, les résultats de la recherche peuvent ne pas représenter la distribution de tous les ancrages vectoriels dans l'espace vectoriel. Pour diversifier les résultats de la recherche, envisagez d'utiliser la recherche par regroupement.

    @@ -904,9 +904,9 @@ curl --request POST \​
  • Recherche en texte intégral

    La recherche en texte intégral est une fonctionnalité qui permet de récupérer des documents contenant des termes ou des phrases spécifiques dans des ensembles de données textuelles, puis de classer les résultats en fonction de leur pertinence. Cette fonction permet de surmonter les limites de la recherche sémantique, qui peut négliger des termes précis, et de s'assurer que vous recevez les résultats les plus précis et les plus pertinents en fonction du contexte. En outre, elle simplifie les recherches vectorielles en acceptant les entrées de texte brut, convertissant automatiquement vos données textuelles en encastrements épars sans qu'il soit nécessaire de générer manuellement des encastrements vectoriels.

    Pour plus de détails sur la recherche en texte intégral, voir Recherche en texte intégral.

  • -
  • Correspondance de mots-clés

    -

    La recherche par mot-clé dans Milvus permet d'extraire des documents avec précision sur la base de termes spécifiques. Cette fonction est principalement utilisée pour la recherche filtrée afin de satisfaire des conditions spécifiques et peut incorporer le filtrage scalaire pour affiner les résultats de la requête, permettant des recherches de similarité dans les vecteurs qui répondent aux critères scalaires.

    -

    Pour plus de détails sur la correspondance des mots-clés, voir Correspondance des mots-clés.

  • +
  • Correspondance de texte

    +

    La correspondance de texte dans Milvus permet une recherche précise de documents sur la base de termes spécifiques. Cette fonction est principalement utilisée pour la recherche filtrée afin de satisfaire des conditions spécifiques et peut incorporer le filtrage scalaire pour affiner les résultats de la requête, permettant des recherches de similarité dans les vecteurs qui répondent aux critères scalaires.

    +

    Pour plus d'informations sur la correspondance de texte, voir Correspondance de texte.

  • Utiliser la clé de partition

    L'implication de plusieurs champs scalaires dans le filtrage des métadonnées et l'utilisation d'une condition de filtrage assez compliquée peuvent affecter l'efficacité de la recherche. Une fois que vous avez défini un champ scalaire comme clé de partition et que vous utilisez une condition de filtrage impliquant la clé de partition dans la requête de recherche, cela peut aider à restreindre l'étendue de la recherche dans les partitions correspondant aux valeurs de clé de partition spécifiées.

    Pour plus d'informations sur la clé de partition, reportez-vous à la section Utiliser la clé de partition.

  • diff --git a/localization/v2.5.x/site/it/adminGuide/upgrade-pulsar-v3.json b/localization/v2.5.x/site/it/adminGuide/upgrade-pulsar-v3.json index 77a596a98..c0a8da0c8 100644 --- a/localization/v2.5.x/site/it/adminGuide/upgrade-pulsar-v3.json +++ b/localization/v2.5.x/site/it/adminGuide/upgrade-pulsar-v3.json @@ -1 +1 @@ -{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n\"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\ncreationTimestamp: \"2024-11-22T08:06:59Z\"​\nfinalizers:​\n- milvus.milvus.io/finalizer​\ngeneration: 3​\nlabels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\ncomponents:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\ndependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\nenabled: false # set to false​\n# you may also clean up rest fields under pulsar field​\n# it's ok to keep them though.​\npulsarv3:​\nenabled: true​\n# append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations: null # this field should be removed or set to null​\nresourceVersion: null # this field should be removed or set to null​\nuid: null # this field should be removed or set to null​\nspec:​\ndependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Aggiornamento di Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Tabella di marcia","href":"Roadmap","type":2,"isActive":false},{"label":"Procedure","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n \"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n ]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\n creationTimestamp: \"2024-11-22T08:06:59Z\"​\n finalizers:​\n - milvus.milvus.io/finalizer​\n generation: 3​\n labels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\n components:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\n dependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\n enabled: false # set to false​\n # you may also clean up rest fields under pulsar field​\n # it's ok to keep them though.​\npulsarv3:​\n enabled: true​\n # append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations: null # this field should be removed or set to null​\n resourceVersion: null # this field should be removed or set to null​\n uid: null # this field should be removed or set to null​\nspec:​\n dependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Aggiornamento di Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Tabella di marcia","href":"Roadmap","type":2,"isActive":false},{"label":"Procedure","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/it/adminGuide/upgrade-pulsar-v3.md b/localization/v2.5.x/site/it/adminGuide/upgrade-pulsar-v3.md index c97c64358..98d5e1288 100644 --- a/localization/v2.5.x/site/it/adminGuide/upgrade-pulsar-v3.md +++ b/localization/v2.5.x/site/it/adminGuide/upgrade-pulsar-v3.md @@ -113,11 +113,11 @@ Forwarding from 127.

    Uscita.

    {​
    -"segmentIDs": [​
    +  "segmentIDs": [​
         454097953998181000,​
         454097953999383600,​
         454097953998180800​
    -]​
    +  ]​
     }​
     
     
    @@ -223,15 +223,15 @@ head milvus.yaml -n 20
    apiVersion: milvus.io/v1beta1​
     kind: Milvus​
     metadata:​
    -annotations:​
    +  annotations:​
         milvus.io/dependency-values-merged: "true"​
         milvus.io/pod-service-label-added: "true"​
         milvus.io/querynode-current-group-id: "0"​
    -creationTimestamp: "2024-11-22T08:06:59Z"​
    -finalizers:​
    -- milvus.milvus.io/finalizer​
    -generation: 3​
    -labels:​
    +  creationTimestamp: "2024-11-22T08:06:59Z"​
    +  finalizers:​
    +  - milvus.milvus.io/finalizer​
    +  generation: 3​
    +  labels:​
         app: milvus​
         milvus.io/operator-version: 1.1.2​
     name: my-release​
    @@ -239,23 +239,23 @@ namespace: default​
     resourceVersion: "692217324"​
     uid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​
     spec:​
    -components:​
    +  components:​
     
     
  • Creare un file patch.yaml con il seguente contenuto.

    # a patch to retain etcd & storage data and delete pulsar data while delete milvus​
     spec:​
    -dependencies:​
    +  dependencies:​
         etcd:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         storage:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         pulsar:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Delete​
             pvcDeletion: true​
     
    @@ -302,12 +302,12 @@ milvus.milvus.io "my-release" deleted
     
  • Modificare il sito values.yaml salvato nel passo precedente.

    # change the following:​
     pulsar:​
    -enabled: false # set to false​
    -# you may also clean up rest fields under pulsar field​
    -# it's ok to keep them though.​
    +  enabled: false # set to false​
    +  # you may also clean up rest fields under pulsar field​
    +  # it's ok to keep them though.​
     pulsarv3:​
    -enabled: true​
    -# append other values for pulsar v3 chart if needs​
    +  enabled: true​
    +  # append other values for pulsar v3 chart if needs​
     
     
  • Aggiornare il repo locale di helm

    @@ -368,13 +368,13 @@ my-release-pulsarv3-zookeeper-2 < apiVersion: milvus.io/v1beta1​ kind: Milvus​ metadata:​ -annotations: null # this field should be removed or set to null​ -resourceVersion: null # this field should be removed or set to null​ -uid: null # this field should be removed or set to null​ + annotations: null # this field should be removed or set to null​ + resourceVersion: null # this field should be removed or set to null​ + uid: null # this field should be removed or set to null​ spec:​ -dependencies:​ + dependencies:​ pulsar:​ - inCluster:​ + inCluster:​ chartVersion: pulsar-v3​ # delete all previous values for pulsar v2 and set it to null.​ # you may add additional values here for pulsar v3 if you're sure about it.​ diff --git a/localization/v2.5.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.5.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.json index 1b7706ce0..32355e59c 100644 --- a/localization/v2.5.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.5.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Eseguire Milvus in Kubernetes con Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Panoramica","href":"Overview","type":2,"isActive":false},{"label":"Prerequisiti","href":"Prerequisites","type":2,"isActive":false},{"label":"Installare Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Installazione online","href":"Online-install","type":2,"isActive":false},{"label":"Installazione offline","href":"Offline-install","type":2,"isActive":false},{"label":"Aggiornamento del cluster Milvus in esecuzione","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Disinstallare Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Cosa succede dopo","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Eseguire Milvus in Kubernetes con Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Panoramica","href":"Overview","type":2,"isActive":false},{"label":"Prerequisiti","href":"Prerequisites","type":2,"isActive":false},{"label":"Installare Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Installazione online","href":"Online-install","type":2,"isActive":false},{"label":"Installazione offline","href":"Offline-install","type":2,"isActive":false},{"label":"Aggiornamento del cluster Milvus in esecuzione","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Disinstallare Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"Cosa succede dopo","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.5.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.md index 059d2d65f..271437878 100644 --- a/localization/v2.5.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.5.x/site/it/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

    Prima di installare Milvus Helm Charts, è necessario aggiungere il repository Milvus Helm.

    -
    $ helm repo add milvus https://github.com/zilliztech/milvus-helm
    +
    $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
     

    Il repository di Milvus Helm Charts all'indirizzo https://github.com/milvus-io/milvus-helm è stato archiviato ed è possibile ottenere ulteriori aggiornamenti da https://github.com/zilliztech/milvus-helm come segue:

    -
    helm repo add zilliztech https://github.com/zilliztech/milvus-helm
    +
    helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
     helm repo update
     # upgrade existing helm release
     helm upgrade my-release zilliztech/milvus
    diff --git a/localization/v2.5.x/site/it/home/home.md b/localization/v2.5.x/site/it/home/home.md
    index 642adbe17..442260bf6 100644
    --- a/localization/v2.5.x/site/it/home/home.md
    +++ b/localization/v2.5.x/site/it/home/home.md
    @@ -114,7 +114,7 @@ Qui imparerete cos'è Milvus e come installare, usare e distribuire Milvus per c
         

    Nov 2024 - Rilascio di Milvus 2.5.0

    • Aggiunta una guida su come condurre una ricerca full text.
    • -
    • Aggiunta una guida su come effettuare la corrispondenza delle parole chiave.
    • +
    • Aggiunta una guida su come effettuare le corrispondenze testuali.
    • Aggiunta una guida su come abilitare i valori nullable e predefiniti.
    • Aggiunte le descrizioni degli analizzatori.
    • Aggiunta la descrizione degli indici bitmap.
    • diff --git a/localization/v2.5.x/site/it/menuStructure/it.json b/localization/v2.5.x/site/it/menuStructure/it.json index b78127184..698e445e0 100644 --- a/localization/v2.5.x/site/it/menuStructure/it.json +++ b/localization/v2.5.x/site/it/menuStructure/it.json @@ -286,6 +286,18 @@ "order": 8, "children": [] }, + { + "label": "Tipi metrici", + "id": "metric.md", + "order": 9, + "children": [] + }, + { + "label": "Livello di coerenza", + "id": "consistency.md", + "order": 10, + "children": [] + }, { "label": "Replica in memoria", "id": "replica.md", @@ -602,31 +614,39 @@ ] }, { - "label": "Gestire gli indici", + "label": "Indici", "id": "manage_indexes", "order": 4, "isMenu": true, "children": [ { - "label": "Campi vettoriali indice", + "label": "Indici vettoriali", "id": "index-vector-fields.md", "order": 0, "children": [] }, { - "label": "Campi scalari indice", - "id": "index-scalar-fields.md", + "label": "Indici scalari", + "id": "scalar-index", "order": 1, - "children": [] - }, - { - "label": "Indice BITMAP", - "id": "bitmap.md", - "order": 2, - "children": [] + "isMenu": true, + "children": [ + { + "label": "Campi scalari indice", + "id": "index-scalar-fields.md", + "order": 1, + "children": [] + }, + { + "label": "Indice della bitmap", + "id": "bitmap.md", + "order": 2, + "children": [] + } + ] }, { - "label": "Indice con GPU", + "label": "Indici abilitati alle GPU", "id": "index-with-gpu.md", "order": 3, "children": [] @@ -682,7 +702,7 @@ "children": [] }, { - "label": "Parola chiave abbinata", + "label": "Corrispondenza del testo", "id": "keyword-match.md", "order": 7, "children": [] @@ -699,30 +719,6 @@ "order": 9, "children": [] }, - { - "label": "Utilizzare mmap", - "id": "mmap.md", - "order": 10, - "children": [] - }, - { - "label": "Compattazione del clustering", - "id": "clustering-compaction.md", - "order": 11, - "children": [] - }, - { - "label": "Livello di coerenza", - "id": "consistency.md", - "order": 12, - "children": [] - }, - { - "label": "Tipi metrici", - "id": "metric.md", - "order": 13, - "children": [] - }, { "label": "Filtraggio dei metadati", "id": "boolean.md", @@ -736,26 +732,6 @@ "children": [] } ] - }, - { - "label": "Importazione dei dati", - "id": "data_import", - "order": 6, - "isMenu": true, - "children": [ - { - "label": "Preparare i dati di partenza", - "id": "prepare-source-data.md", - "order": 0, - "children": [] - }, - { - "label": "Importazione dei dati", - "id": "import-data.md", - "order": 1, - "children": [] - } - ] } ] }, @@ -897,11 +873,31 @@ } ] }, + { + "label": "Importazione dei dati", + "id": "data_import", + "order": 5, + "isMenu": true, + "children": [ + { + "label": "Preparare i dati di partenza", + "id": "prepare-source-data.md", + "order": 0, + "children": [] + }, + { + "label": "Importazione dei dati", + "id": "import-data.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Migrazione di Milvus", "id": "milvus_migration", "isMenu": true, - "order": 5, + "order": 6, "children": [ { "label": "Panoramica", @@ -1321,10 +1317,30 @@ } ] }, + { + "label": "Ottimizzazione dello stoccaggio", + "id": "storage_optimization", + "order": 10, + "isMenu": true, + "children": [ + { + "label": "Utilizzare mmap", + "id": "mmap.md", + "order": 0, + "children": [] + }, + { + "label": "Compattazione del clustering", + "id": "clustering-compaction.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Sicurezza", "id": "security", - "order": 10, + "order": 11, "isMenu": true, "children": [ { diff --git a/localization/v2.5.x/site/it/release_notes.md b/localization/v2.5.x/site/it/release_notes.md index 75f70063f..ce58b03a3 100644 --- a/localization/v2.5.x/site/it/release_notes.md +++ b/localization/v2.5.x/site/it/release_notes.md @@ -50,7 +50,7 @@ title: Note di rilascio

      WebUI di gestione dei cluster (Beta)

      Per supportare al meglio i dati massivi e le funzionalità più ricche, il sofisticato design di Milvus include varie dipendenze, numerosi ruoli dei nodi, strutture di dati complesse e altro ancora. Questi aspetti possono rappresentare una sfida per l'utilizzo e la manutenzione.

      Milvus 2.5 introduce una WebUI integrata per la gestione dei cluster, che riduce le difficoltà di manutenzione del sistema visualizzando le complesse informazioni dell'ambiente di runtime di Milvus. Queste includono dettagli su database e collezioni, segmenti, canali, dipendenze, stato di salute dei nodi, informazioni sui task, query lente e altro ancora.

      Corrispondenza del testo

      Milvus 2.5 sfrutta gli analizzatori e l'indicizzazione di Tantivy per la preelaborazione del testo e la creazione di indici, supportando una precisa corrispondenza in linguaggio naturale dei dati di testo in base a termini specifici. Questa funzione è utilizzata principalmente per la ricerca filtrata per soddisfare condizioni specifiche e può incorporare un filtro scalare per affinare i risultati delle query, consentendo la ricerca di similarità all'interno di vettori che soddisfano criteri scalari.

      -

      Per ulteriori informazioni, consultare la sezione Corrispondenza di parole chiave.

      +

      Per ulteriori informazioni, consultare la sezione Corrispondenza di testo.

      Indice Bitmap

      Alla famiglia Milvus è stato aggiunto un nuovo indice di dati scalari. L'indice BitMap utilizza un array di bit, di lunghezza pari al numero di righe, per rappresentare l'esistenza di valori e accelerare le ricerche.

      Gli indici Bitmap sono stati tradizionalmente efficaci per i campi a bassa cardinalità, che hanno un numero modesto di valori distinti, ad esempio una colonna contenente informazioni sul sesso con solo due valori possibili: maschio e femmina.

      Per ulteriori informazioni, consultare Indice bitmap.

      diff --git a/localization/v2.5.x/site/it/tutorials/hybrid_search_with_milvus.md b/localization/v2.5.x/site/it/tutorials/hybrid_search_with_milvus.md index 11e8aea40..a2f87df38 100644 --- a/localization/v2.5.x/site/it/tutorials/hybrid_search_with_milvus.md +++ b/localization/v2.5.x/site/it/tutorials/hybrid_search_with_milvus.md @@ -25,10 +25,10 @@ title: Ricerca ibrida con Milvus

      Milvus supporta metodi di recupero densi, radi e ibridi:

      • Recupero denso: Utilizza il contesto semantico per comprendere il significato delle query.
      • -
      • Recupero sparso: Enfatizza la corrispondenza delle parole chiave per trovare risultati basati su termini specifici, equivalente alla ricerca full-text.
      • +
      • Recupero sparso: Enfatizza la corrispondenza del testo per trovare risultati basati su termini specifici, equivalente alla ricerca full-text.
      • Recupero ibrido: Combina gli approcci Dense e Sparse, catturando il contesto completo e le parole chiave specifiche per ottenere risultati di ricerca completi.
      -

      Integrando questi metodi, la ricerca ibrida di Milvus bilancia le somiglianze semantiche e lessicali, migliorando la rilevanza complessiva dei risultati della ricerca. Questo quaderno illustra il processo di impostazione e utilizzo di queste strategie di recupero, evidenziandone l'efficacia in vari scenari di ricerca.

      +

      Integrando questi metodi, la ricerca ibrida di Milvus bilancia le somiglianze semantiche e lessicali, migliorando la rilevanza complessiva dei risultati della ricerca. Questo quaderno illustra il processo di impostazione e utilizzo di queste strategie di reperimento, evidenziandone l'efficacia in vari scenari di ricerca.

      Dipendenze e ambiente

      $ pip install --upgrade pymilvus "pymilvus[model]"
       

      Scaricare il set di dati

      Per dimostrare la ricerca, abbiamo bisogno di un corpus di documenti. Utilizziamo il dataset Quora Duplicate Questions e collochiamolo nella directory locale.

      @@ -116,7 +116,7 @@ dense_index = {"index_type": "dense_vector", dense_index) col.load()
    -

    Inserire i dati nella raccolta Milvus

    Inserire i documenti e le loro incorporazioni nella raccolta.

    +

    Inserire i dati nella raccolta Milvus

    Inserire i documenti e i loro embedding nella collezione.

    # For efficiency, we insert 50 records in each small batch
     for i in range(0, len(docs), 50):
         batched_entities = [
    @@ -290,7 +290,7 @@ formatted_results = doc_text_formatting(ef, query, hybrid_results)
     

    Quale attività è meglio avviare a Hyderabad?

    Quale attività è meglio avviare a Hyderabad?

    Qual è il modo migliore per iniziare la robotica? Qual è la migliore scheda di sviluppo che posso iniziare a lavorare su di essa?

    -

    Di quale matematica ha bisogno un principiante per capire gli algoritmi di programmazione? Quali libri sugli algoritmi sono adatti per un principiante completo?

    +

    Di quale matematica ha bisogno un principiante per capire gli algoritmi della programmazione informatica? Quali libri sugli algoritmi sono adatti per un principiante completo?

    Come fare in modo che la vita si adatti a te e impedisca alla vita di abusare di te mentalmente ed emotivamente?

    Risultati della ricerca ibrida:

    Qual è il modo migliore per iniziare la robotica? Qual è la migliore scheda di sviluppo su cui posso iniziare a lavorare?

    @@ -305,7 +305,7 @@ formatted_results = doc_text_formatting(ef, query, hybrid_results)

    Come si impara un linguaggio informatico come java?

    Qual è l'alternativa all' apprendimento automatico?

    Come si crea un nuovo terminale e una nuova shell in Linux utilizzando la programmazione C?

    -

    Come si crea una nuova shell in un nuovo terminale usando la programmazione C (terminale Linux)?

    +

    Come si crea una nuova shell in un nuovo terminale utilizzando la programmazione C (terminale Linux)?

    Quale attività è meglio avviare a Hyderabad?

    Quale attività è meglio avviare a Hyderabad?

    Di quale matematica ha bisogno un principiante per capire gli algoritmi della programmazione informatica? Quali libri sugli algoritmi sono adatti a un principiante assoluto?

    diff --git a/localization/v2.5.x/site/it/userGuide/collections/manage-collections.md b/localization/v2.5.x/site/it/userGuide/collections/manage-collections.md index 880f7013b..31c8775e6 100644 --- a/localization/v2.5.x/site/it/userGuide/collections/manage-collections.md +++ b/localization/v2.5.x/site/it/userGuide/collections/manage-collections.md @@ -79,7 +79,7 @@ title: Spiegazione delle raccolte

    Analogamente al campo primario di un database relazionale, una collezione ha un campo primario per distinguere un'entità dalle altre. Ogni valore del campo primario è globalmente unico e corrisponde a un'entità specifica.

    Come mostrato nel grafico precedente, il campo id funge da campo primario e il primo ID 0 corrisponde a un'entità intitolata Il tasso di mortalità del Coronavirus non è importante. Non ci sarà nessun'altra entità che abbia il campo primario 0.

    -

    Un campo primario accetta solo numeri interi o stringhe. Quando si inseriscono le entità, i valori del campo primario devono essere inclusi per impostazione predefinita. Tuttavia, se si è abilitato l'AutoId alla creazione della collezione, Milvus genererà questi valori al momento dell'inserimento dei dati. In questo caso, escludere i valori del campo primario dalle entità da inserire.

    +

    Un campo primario accetta solo numeri interi o stringhe. Quando si inseriscono le entità, i valori del campo primario devono essere inclusi per impostazione predefinita. Tuttavia, se si è abilitato l'AutoId alla creazione della collezione, Milvus genererà questi valori al momento dell'inserimento dei dati. In tal caso, escludere i valori del campo primario dalle entità da inserire.

    Per ulteriori informazioni, consultare Campo primario e AutoID.

    Indice

    Il caricamento di una collezione è il prerequisito per eseguire ricerche e query di similarità nelle collezioni. Quando si carica una collezione, Milvus carica in memoria tutti i file di indice e i dati grezzi di ogni campo per rispondere rapidamente alle ricerche e alle query.

    +

    Il caricamento di una collezione è il prerequisito per effettuare ricerche di similarità e query nelle collezioni. Quando si carica una collezione, Milvus carica in memoria tutti i file di indice e i dati grezzi di ogni campo per rispondere rapidamente alle ricerche e alle query.

    Le ricerche e le query sono operazioni che richiedono molta memoria. Per risparmiare sui costi, si consiglia di rilasciare le raccolte che non sono attualmente in uso.

    Per maggiori dettagli, consultare Carica e rilascia.

    Ricerca e query

    -

    Non sono richieste configurazioni di parametri aggiuntivi.

  • +

    Non sono necessarie configurazioni di parametri aggiuntivi.

  • IndiceGPU_CAGRA

    search_params = {
         "metric_type": "L2",
    @@ -164,8 +164,8 @@ collection.create_index(
     
    • itopk_size: Determina la dimensione dei risultati intermedi conservati durante la ricerca. Un valore maggiore può migliorare la ricerca a scapito delle prestazioni. Deve essere almeno uguale al valore finale top-k(limite) ed è tipicamente una potenza di 2 (ad esempio, 16, 32, 64, 128).

    • search_width: Specifica il numero di punti di ingresso nel grafo CAGRA durante la ricerca. L'aumento di questo valore può migliorare il richiamo, ma può influire sulle prestazioni della ricerca.

    • -
    • min_iterations / max_iterations: Questi parametri controllano il processo di iterazione della ricerca. Per impostazione predefinita, sono impostati su 0 e CAGRA determina automaticamente il numero di iterazioni in base a itopk_size e search_width. La regolazione manuale di questi valori può aiutare a bilanciare prestazioni e accuratezza.

    • -
    • team_size: Specifica il numero di thread CUDA utilizzati per calcolare la distanza metrica sulla GPU. I valori più comuni sono una potenza di 2 fino a 32 (ad esempio, 2, 4, 8, 16, 32). Ha un impatto minimo sulle prestazioni della ricerca. Il valore predefinito è 0, dove Milvus seleziona automaticamente il team_size in base alla dimensione del vettore.

    • +
    • min_iterations / max_iterations: Questi parametri controllano il processo di iterazione della ricerca. Per impostazione predefinita, sono impostati su 0 e CAGRA determina automaticamente il numero di iterazioni in base a itopk_size e search_width. La regolazione manuale di questi valori può aiutare a bilanciare prestazioni e precisione.

    • +
    • team_size: Specifica il numero di thread CUDA utilizzati per calcolare la distanza metrica sulla GPU. I valori comuni sono una potenza di 2 fino a 32 (ad esempio, 2, 4, 8, 16, 32). Ha un impatto minimo sulle prestazioni della ricerca. Il valore predefinito è 0, dove Milvus seleziona automaticamente il team_size in base alla dimensione del vettore.

  • IndiceGPU_IVF_FLAT o GPU_IVF_PQ

    search_params = {
    @@ -203,7 +203,7 @@ collection.search(
           
         

    Quando si usano gli indici GPU, occorre tenere conto di alcuni vincoli:

      -
    • Per GPU_IVF_FLAT, il valore massimo di limit è 256.

    • +
    • Per GPU_IVF_FLAT, il valore massimo per il limite è 1024.

    • Per GPU_IVF_PQ e GPU_CAGRA, il valore massimo di limit è 1024.

    • Sebbene non sia stato fissato un limite per GPU_BRUTE_FORCE, si consiglia di non superare i 4096 per evitare potenziali problemi di prestazioni.

    • Attualmente, gli indici GPU non supportano la distanza COSINE. Se è necessaria la distanza COSINE, i dati devono essere prima normalizzati e poi si può usare la distanza del prodotto interno (IP) come sostituto.

    • diff --git a/localization/v2.5.x/site/it/userGuide/schema/analyzer/analyzer-overview.md b/localization/v2.5.x/site/it/userGuide/schema/analyzer/analyzer-overview.md index 849849599..d1f3c43cd 100644 --- a/localization/v2.5.x/site/it/userGuide/schema/analyzer/analyzer-overview.md +++ b/localization/v2.5.x/site/it/userGuide/schema/analyzer/analyzer-overview.md @@ -24,12 +24,12 @@ summary: >- >

      Nell'elaborazione del testo, un analizzatore è un componente cruciale che converte il testo grezzo in un formato strutturato e ricercabile. Ogni analizzatore è generalmente composto da due elementi fondamentali: tokenizer e filtro. Insieme, trasformano il testo in ingresso in token, li raffinano e li preparano per un'indicizzazione e un recupero efficienti.

      -

      In Milvus, gli analizzatori vengono configurati durante la creazione della raccolta, quando si aggiungono i campi VARCHAR allo schema della raccolta. I token prodotti da un analizzatore possono essere usati per costruire un indice per la corrispondenza con le parole chiave o convertiti in embedding sparsi per la ricerca full text. Per ulteriori informazioni, fare riferimento a Corrispondenza per parola chiave o Ricerca a testo completo.

      +

      In Milvus, gli analizzatori vengono configurati durante la creazione della raccolta, quando si aggiungono i campi VARCHAR allo schema della raccolta. I token prodotti da un analizzatore possono essere usati per costruire un indice per la corrispondenza del testo o convertiti in embedding sparsi per la ricerca full text. Per ulteriori informazioni, fare riferimento a Corrispondenza del testo o Ricerca di testo completo.

      L'uso degli analizzatori può influire sulle prestazioni.

      • Ricerca a testo pieno: Per la ricerca full text, i canali DataNode e QueryNode consumano i dati più lentamente perché devono attendere il completamento della tokenizzazione. Di conseguenza, i dati appena ingeriti impiegano più tempo per diventare disponibili per la ricerca.

      • -
      • Corrispondenza di parole chiave: Per la corrispondenza delle parole chiave, anche la creazione dell'indice è più lenta, poiché la tokenizzazione deve essere completata prima di poter costruire un indice.

      • +
      • Corrispondenza di testo: Per la corrispondenza del testo, anche la creazione dell'indice è più lenta, poiché la tokenizzazione deve essere completata prima di poter costruire un indice.

      Anatomia di un analizzatore

    Per ulteriori informazioni sui parametri di ricerca per similarità, consultare la sezione Ricerca di base di RNA.

    +

    Limiti

    Quando si utilizzano vettori sparsi in Milvus, si devono considerare i seguenti limiti:

    +
      +
    • Attualmente, per i vettori sparsi è supportata solo la metrica della distanza IP. L'elevata dimensionalità dei vettori sparsi rende impraticabili le distanze L2 e coseno.

    • +
    • Per i campi vettoriali sparsi sono supportati solo i tipi di indice SPARSE_INVERTED_INDEX e SPARSE_WAND.

    • +
    • I tipi di dati supportati per i vettori sparsi:

      +
        +
      • La parte della dimensione deve essere un intero a 32 bit senza segno;
      • +
      • La parte del valore può essere un numero non negativo a 32 bit in virgola mobile.
      • +
    • +
    • I vettori sparsi devono soddisfare i seguenti requisiti per l'inserimento e la ricerca:

      +
        +
      • Almeno un valore del vettore è diverso da zero;
      • +
      • Gli indici del vettore sono non negativi.
      • +
    • +
    +

    DOMANDE FREQUENTI

      +
    • Potete spiegare la differenza tra SPARSE_INVERTED_INDEX e SPARSE_WAND e come posso scegliere tra i due?

      +

      SPARSE_INVERTED_INDEX è un indice invertito tradizionale, mentre SPARSE_WAND utilizza l'algoritmo Weak-AND per ridurre il numero di valutazioni della distanza IP completa durante la ricerca. SPARSE_WAND è in genere più veloce, ma le sue prestazioni possono diminuire con l'aumentare della densità dei vettori. Per scegliere, è necessario condurre esperimenti e benchmark in base al set di dati e al caso d'uso specifici.

    • +
    • Come scegliere i parametri drop_ratio_build e drop_ratio_search?

      +

      La scelta di drop_ratio_build e drop_ratio_search dipende dalle caratteristiche dei dati e dai requisiti di latenza/throughput e precisione della ricerca.

    • +
    • La dimensione di un incorporamento rado può essere un qualsiasi valore discreto all'interno dello spazio uint32?

      +

      Sì, con un'eccezione. La dimensione di un incorporamento sparse può essere qualsiasi valore nell'intervallo [0, maximum of uint32). Ciò significa che non è possibile utilizzare il valore massimo di uint32.

    • +
    • Le ricerche sui segmenti crescenti sono condotte attraverso un indice o con la forza bruta?

      +

      Le ricerche sui segmenti crescenti vengono condotte attraverso un indice dello stesso tipo dell'indice del segmento sigillato. Per i nuovi segmenti crescenti prima che l'indice sia costruito, si usa una ricerca a forza bruta.

    • +
    • È possibile avere vettori sparsi e densi in un'unica collezione?

      +

      Sì, grazie al supporto di più tipi di vettore, è possibile creare collezioni con colonne di vettori sia sparse che dense ed eseguire ricerche ibride su di esse.

    • +
    diff --git a/localization/v2.5.x/site/it/userGuide/search-query-get/boolean.md b/localization/v2.5.x/site/it/userGuide/search-query-get/boolean.md index dcd955d2a..48fd41476 100644 --- a/localization/v2.5.x/site/it/userGuide/search-query-get/boolean.md +++ b/localization/v2.5.x/site/it/userGuide/search-query-get/boolean.md @@ -1,8 +1,8 @@ --- id: boolean.md summary: >- - Un'espressione di filtro può essere utilizzata per filtrare uno specifico - campo scalare durante una ricerca o un'interrogazione per ottenere risultati + Un'espressione di filtro può essere utilizzata per filtrare un campo scalare + specifico durante una ricerca o un'interrogazione per ottenere risultati esattamente corrispondenti. Questa guida introduce l'uso delle espressioni di filtro in Zilliz attraverso un set di dati di esempio. A scopo dimostrativo, questa guida fornirà solo esempi di operazioni di query. @@ -700,9 +700,9 @@ curl --request POST \​

    Match operators​

    Match operators include:​

    • like: Match constants or prefixes (prefix%), infixes (%infix%), and suffixes (%suffix) within constants. It relies on a brute-force search mechanism using wildcards and does not involve text tokenization. While it can achieve exact matches, its query efficiency is relatively low, making it suitable for simple matching tasks or queries on smaller datasets.​

    • -
    • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​ -

      -

      To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to ​Keyword Match.​

      +
    • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​

      +

      +

      To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to Text Match.​

    Example 1: Apply filter on scalar field​

    The following example demonstrates how to filter products whose color is red. In this case, you can quickly filter all red products by matching the prefix 'red%’. Similarly, you can use the expression color in ['red_7025’, 'red_4794’, ‘red_9392’] to filter all red products. However, when the data is more complex, we recommend using the like operator for more efficient filtering.​

    @@ -857,8 +857,8 @@ curl --request POST \​ ]​
    -

    Example 3: Keyword match on VARCHAR fields​

    The TEXT_MATCH expression is used for keyword match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to ​Keyword Match.​

    -

    The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the keyword "Apple" or "iPhone":​

    +

    Example 3: Text match on VARCHAR fields​

    The TEXT_MATCH expression is used for text match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to Text Match.​

    +

    The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the term "Apple" or "iPhone":​

    Python Java diff --git a/localization/v2.5.x/site/it/userGuide/search-query-get/full-text-search.json b/localization/v2.5.x/site/it/userGuide/search-query-get/full-text-search.json index ac1514bc8..bc5ce6a6a 100644 --- a/localization/v2.5.x/site/it/userGuide/search-query-get/full-text-search.json +++ b/localization/v2.5.x/site/it/userGuide/search-query-get/full-text-search.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","MilvusClient.insert('demo', [​\n {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​\n {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​\n {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​\n])​\n\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['Who started AI research?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Ricerca a testo completo","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Panoramica","href":"Overview​","type":2,"isActive":false},{"label":"Creare una raccolta per la ricerca full text","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Inserire i dati di testo","href":"Insert-text-data","type":2,"isActive":false},{"label":"Eseguire la ricerca di testo completo","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .build();\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"sparse\")\n .dataType(DataType.SparseFloatVector)\n .build());\n","import { MilvusClient, DataType } from \"@zilliz/milvus2-sdk-node\";\n\nconst address = \"http://localhost:19530\";\nconst token = \"root:Milvus\";\nconst client = new MilvusClient({address, token});\nconst schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n\nconsole.log(res.results)\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","import io.milvus.common.clientenum.FunctionType;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq.Function;\n\nimport java.util.*;\n\nschema.addFunction(Function.builder()\n .functionType(FunctionType.BM25)\n .name(\"text_bm25_emb\")\n .inputFieldNames(Collections.singletonList(\"text\"))\n .outputFieldNames(Collections.singletonList(\"vector\"))\n .build());\n","const functions = [\n {\n name: 'text_bm25_emb',\n description: 'bm25 function',\n type: FunctionType.BM25,\n input_field_names: ['text'],\n output_field_names: ['vector'],\n params: {},\n },\n];\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ],\n \"functions\": [\n {\n \"name\": \"text_bm25_emb\",\n \"type\": \"BM25\",\n \"inputFieldNames\": [\"text\"],\n \"outputFieldNames\": [\"sparse\"],\n \"params\": {}\n }\n ]\n }'\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","import io.milvus.v2.common.IndexParam;\n\nList indexes = new ArrayList<>();\nindexes.add(IndexParam.builder()\n .fieldName(\"sparse\")\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)\n .metricType(IndexParam.MetricType.BM25)\n .build());\n","const index_params = [\n {\n fieldName: \"sparse\",\n metricType: \"BM25\",\n indexType: \"AUTOINDEX\",\n },\n];\n","export indexParams='[\n {\n \"fieldName\": \"sparse\",\n \"metricType\": \"BM25\",\n \"indexType\": \"AUTOINDEX\"\n }\n ]'\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()\n .collectionName(\"demo\")\n .collectionSchema(schema)\n .indexParams(indexes)\n .build();\nclient.createCollection(requestCreate);\n","await client.create_collection(\n collection_name: 'demo', \n schema: schema, \n index_params: index_params\n);\n","export CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d \"{\n \\\"collectionName\\\": \\\"demo\\\",\n \\\"schema\\\": $schema,\n \\\"indexParams\\\": $indexParams\n}\"\n","client.insert('demo', [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n])\n\n","import com.google.gson.Gson;\nimport com.google.gson.JsonObject;\n\nimport io.milvus.v2.service.vector.request.InsertReq;\n\nGson gson = new Gson();\nList rows = Arrays.asList(\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval is a field of study.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval focuses on finding relevant information in large datasets.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"data mining and information retrieval overlap in research.\\\"}\", JsonObject.class)\n);\n\nclient.insert(InsertReq.builder()\n .collectionName(\"demo\")\n .data(rows)\n .build());\n","await client.insert({\ncollection_name: 'demo', \ndata: [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n]);\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"data\": [\n {\"text\": \"information retrieval is a field of study.\"},\n {\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"},\n {\"text\": \"data mining and information retrieval overlap in research.\"} \n ],\n \"collectionName\": \"demo\"\n}'\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['whats the focus of information retrieval?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n","import io.milvus.v2.service.vector.request.SearchReq;\nimport io.milvus.v2.service.vector.request.data.EmbeddedText;\nimport io.milvus.v2.service.vector.response.SearchResp;\n\nMap searchParams = new HashMap<>();\nsearchParams.put(\"drop_ratio_search\", 0.6);\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"demo\")\n .data(Collections.singletonList(new EmbeddedText(\"whats the focus of information retrieval?\")))\n .annsField(\"sparse\")\n .topK(3)\n .searchParams(searchParams)\n .outputFields(Collections.singletonList(\"text\"))\n .build());\n","await client.search(\n collection_name: 'demo', \n data: ['whats the focus of information retrieval?'],\n anns_field: 'sparse',\n limit: 3,\n params: {'drop_ratio_search': 0.6},\n)\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"demo\",\n \"data\": [\n \"whats the focus of information retrieval?\"\n ],\n \"annsField\": \"sparse\",\n \"limit\": 3,\n \"outputFields\": [\n \"text\"\n ],\n \"searchParams\":{\n \"params\":{\n \"drop_ratio_search\":0.6\n }\n }\n}'\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Ricerca a testo completo","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Panoramica","href":"Overview​","type":2,"isActive":false},{"label":"Creare una raccolta per la ricerca full text","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Inserire i dati di testo","href":"Insert-text-data","type":2,"isActive":false},{"label":"Eseguire una ricerca full text","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/it/userGuide/search-query-get/full-text-search.md b/localization/v2.5.x/site/it/userGuide/search-query-get/full-text-search.md index 5f112e0b1..6789d0d20 100644 --- a/localization/v2.5.x/site/it/userGuide/search-query-get/full-text-search.md +++ b/localization/v2.5.x/site/it/userGuide/search-query-get/full-text-search.md @@ -22,7 +22,7 @@ summary: >- d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

    La ricerca full text è una funzione che recupera i documenti contenenti termini o frasi specifiche nei dataset di testo, classificando poi i risultati in base alla rilevanza. Questa funzione supera le limitazioni della ricerca semantica, che potrebbe trascurare termini precisi, garantendo la ricezione dei risultati più accurati e contestualmente rilevanti. Inoltre, semplifica le ricerche vettoriali accettando input di testo grezzo, convertendo automaticamente i dati testuali in embedding sparsi senza la necessità di generare manualmente embedding vettoriali.

    +

    La ricerca full text è una funzione che recupera i documenti contenenti termini o frasi specifiche nei dataset di testo, classificando poi i risultati in base alla rilevanza. Questa funzione supera le limitazioni della ricerca semantica, che potrebbe trascurare termini precisi, garantendo la ricezione di risultati più accurati e contestualmente rilevanti. Inoltre, semplifica le ricerche vettoriali accettando input di testo grezzo, convertendo automaticamente i dati testuali in embedding sparsi senza la necessità di generare manualmente embedding vettoriali.

    Utilizzando l'algoritmo BM25 per il punteggio di rilevanza, questa funzione è particolarmente preziosa negli scenari di retrieval-augmented generation (RAG), dove dà priorità ai documenti che corrispondono strettamente a termini di ricerca specifici.

    Integrando la ricerca full text con la ricerca vettoriale densa basata sulla semantica, è possibile migliorare l'accuratezza e la pertinenza dei risultati della ricerca. Per ulteriori informazioni, consultare Ricerca ibrida.

    @@ -46,7 +46,7 @@ summary: >-
    1. Inserimento del testo: L'utente inserisce documenti di testo grezzi o fornisce un testo di query senza necessità di incorporazione manuale.

    2. Analisi del testo: Milvus utilizza un analizzatore per tokenizzare il testo in ingresso in singoli termini ricercabili.

    3. -
    4. Elaborazione della funzione: La funzione integrata riceve i termini tokenizzati e li converte in rappresentazioni vettoriali rade.

    5. +
    6. Elaborazione della funzione: La funzione incorporata riceve i termini tokenizzati e li converte in rappresentazioni vettoriali rade.

    7. Memorizzazione delle collezioni: Milvus memorizza queste rappresentazioni rade in una raccolta per un recupero efficiente.

    8. Punteggio BM25: Durante la ricerca, Milvus applica l'algoritmo BM25 per calcolare i punteggi dei documenti memorizzati e classifica i risultati corrispondenti in base alla pertinenza con il testo dell'interrogazione.

    @@ -82,6 +82,8 @@ summary: >-
  • Un campo SPARSE_FLOAT_VECTOR riservato alla memorizzazione di incorporazioni rade che Milvus genererà automaticamente per il campo VARCHAR.

  • Definire lo schema della raccolta

    Per prima cosa, creare lo schema e aggiungere i campi necessari.

    +
    from pymilvus import MilvusClient, DataType, Function, FunctionType​
     ​
     schema = MilvusClient.create_schema()​
    @@ -90,6 +92,80 @@ schema.add_field(field_name="id", dat
     schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​
     schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)​
     
    +
    +
    import io.milvus.v2.common.DataType;
    +import io.milvus.v2.service.collection.request.AddFieldReq;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
    +        .build();
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("id")
    +        .dataType(DataType.Int64)
    +        .isPrimaryKey(true)
    +        .autoID(true)
    +        .build());
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("text")
    +        .dataType(DataType.VarChar)
    +        .maxLength(1000)
    +        .enableAnalyzer(true)
    +        .build());
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("sparse")
    +        .dataType(DataType.SparseFloatVector)
    +        .build());
    +
    +
    import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
    +
    +const address = "http://localhost:19530";
    +const token = "root:Milvus";
    +const client = new MilvusClient({address, token});
    +const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +console.log(res.results)
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ]
    +    }'
     

    In questa configurazione.

      @@ -98,6 +174,8 @@ schema.add_field(field_name="sparse",
    • sparse: un campo vettoriale riservato alla memorizzazione delle incorporazioni sparse generate internamente per le operazioni di ricerca full text. Il tipo di dati deve essere SPARSE_FLOAT_VECTOR.

    Ora, definire una funzione che converta il testo in rappresentazioni vettoriali rade e aggiungerla allo schema.

    +
    bm25_function = Function(​
         name="text_bm25_emb", # Function name​
         input_field_names=["text"], # Name of the VARCHAR field containing raw text data​
    @@ -107,6 +185,62 @@ schema.add_field(field_name="sparse",
     ​
     schema.add_function(bm25_function)​
     
    +
    +
    import io.milvus.common.clientenum.FunctionType;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
    +
    +import java.util.*;
    +
    +schema.addFunction(Function.builder()
    +        .functionType(FunctionType.BM25)
    +        .name("text_bm25_emb")
    +        .inputFieldNames(Collections.singletonList("text"))
    +        .outputFieldNames(Collections.singletonList("vector"))
    +        .build());
    +
    +
    const functions = [
    +    {
    +      name: 'text_bm25_emb',
    +      description: 'bm25 function',
    +      type: FunctionType.BM25,
    +      input_field_names: ['text'],
    +      output_field_names: ['vector'],
    +      params: {},
    +    },
    +];
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ],
    +        "functions": [
    +            {
    +                "name": "text_bm25_emb",
    +                "type": "BM25",
    +                "inputFieldNames": ["text"],
    +                "outputFieldNames": ["sparse"],
    +                "params": {}
    +            }
    +        ]
    +    }'
     

    Parametro

    Descrizione

    @@ -124,6 +258,8 @@ schema.add_function(bm25_function)​

    Per collezioni con più campi VARCHAR che richiedono la conversione da testo a vettore sparso, aggiungere funzioni separate allo schema della collezione, assicurandosi che ogni funzione abbia un nome e un valore output_field_names unici.

    Configurare l'indice

    Dopo aver definito lo schema con i campi necessari e la funzione incorporata, configurare l'indice per la raccolta. Per semplificare questo processo, utilizzare AUTOINDEX come index_type, un'opzione che consente a Milvus di scegliere e configurare il tipo di indice più adatto in base alla struttura dei dati.

    +
    index_params = MilvusClient.prepare_index_params()​
     ​
     index_params.add_index(​
    @@ -132,6 +268,31 @@ index_params.add_index(​
         metric_type="BM25"​
     )​
     
    +
    +
    import io.milvus.v2.common.IndexParam;
    +
    +List<IndexParam> indexes = new ArrayList<>();
    +indexes.add(IndexParam.builder()
    +        .fieldName("sparse")
    +        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
    +        .metricType(IndexParam.MetricType.BM25)
    +        .build());
    +
    +
    const index_params = [
    +  {
    +    fieldName: "sparse",
    +    metricType: "BM25",
    +    indexType: "AUTOINDEX",
    +  },
    +];
    +
    +
    export indexParams='[
    +        {
    +            "fieldName": "sparse",
    +            "metricType": "BM25",
    +            "indexType": "AUTOINDEX"
    +        }
    +    ]'
     

    Parametro

    Descrizione

    @@ -142,13 +303,43 @@ index_params.add_index(​

    metric_type

    Il valore di questo parametro deve essere impostato su BM25 per la funzionalità di ricerca full text.

    -

    Creare la collezione

    Creare ora la collezione utilizzando i parametri di schema e di indice definiti.

    +

    Creare la collezione

    Creare ora la collezione utilizzando i parametri dello schema e dell'indice definiti.

    +
    MilvusClient.create_collection(​
         collection_name='demo', ​
         schema=schema, ​
         index_params=index_params​
     )​
     
    +
    +
    import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq requestCreate = CreateCollectionReq.builder()
    +        .collectionName("demo")
    +        .collectionSchema(schema)
    +        .indexParams(indexes)
    +        .build();
    +client.createCollection(requestCreate);
    +
    +
    await client.create_collection(
    +    collection_name: 'demo', 
    +    schema: schema, 
    +    index_params: index_params
    +);
    +
    +
    export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d "{
    +    \"collectionName\": \"demo\",
    +    \"schema\": $schema,
    +    \"indexParams\": $indexParams
    +}"
     

    Inserire i dati di testo

    Dopo aver impostato la raccolta e l'indice, si è pronti a inserire i dati di testo. In questo processo, è sufficiente fornire il testo grezzo. La funzione integrata definita in precedenza genera automaticamente il vettore sparse corrispondente per ogni voce di testo.

    -
    MilvusClient.insert('demo', [​
    -    {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​
    -    {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​
    -    {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​
    -])​
    +
    +
    client.insert('demo', [
    +    {'text': 'information retrieval is a field of study.'},
    +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
    +    {'text': 'data mining and information retrieval overlap in research.'},
    +])
     
     
    -
    +
    await client.insert({
    +collection_name: 'demo', 
    +data: [
    +    {'text': 'information retrieval is a field of study.'},
    +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
    +    {'text': 'data mining and information retrieval overlap in research.'},
    +]);
    +
    +
    curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "data": [
    +        {"text": "information retrieval is a field of study."},
    +        {"text": "information retrieval focuses on finding relevant information in large datasets."},
    +        {"text": "data mining and information retrieval overlap in research."}       
    +    ],
    +    "collectionName": "demo"
    +}'
    +
    +

    Una volta inseriti i dati nella collezione, è possibile eseguire ricerche full text utilizzando query di testo grezzo. Milvus converte automaticamente la query in un vettore rado e classifica i risultati della ricerca utilizzando l'algoritmo BM25, per poi restituire i risultati topK (limit).

    +

    Una volta inseriti i dati nella raccolta, è possibile eseguire ricerche full text utilizzando query di testo grezzo. Milvus converte automaticamente la query in un vettore sparse e classifica i risultati della ricerca utilizzando l'algoritmo BM25, per poi restituire i risultati topK (limit).

    +
    search_params = {​
         'params': {'drop_ratio_search': 0.6},​
     }​
     ​
     MilvusClient.search(​
         collection_name='demo', ​
    -    data=['Who started AI research?'],​
    +    data=['whats the focus of information retrieval?'],​
         anns_field='sparse',​
         limit=3,​
         search_params=search_params​
     )​
     
    +
    +
    import io.milvus.v2.service.vector.request.SearchReq;
    +import io.milvus.v2.service.vector.request.data.EmbeddedText;
    +import io.milvus.v2.service.vector.response.SearchResp;
    +
    +Map<String,Object> searchParams = new HashMap<>();
    +searchParams.put("drop_ratio_search", 0.6);
    +SearchResp searchResp = client.search(SearchReq.builder()
    +        .collectionName("demo")
    +        .data(Collections.singletonList(new EmbeddedText("whats the focus of information retrieval?")))
    +        .annsField("sparse")
    +        .topK(3)
    +        .searchParams(searchParams)
    +        .outputFields(Collections.singletonList("text"))
    +        .build());
    +
    +
    await client.search(
    +    collection_name: 'demo', 
    +    data: ['whats the focus of information retrieval?'],
    +    anns_field: 'sparse',
    +    limit: 3,
    +    params: {'drop_ratio_search': 0.6},
    +)
    +
    +
    curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +--data-raw '{
    +    "collectionName": "demo",
    +    "data": [
    +        "whats the focus of information retrieval?"
    +    ],
    +    "annsField": "sparse",
    +    "limit": 3,
    +    "outputFields": [
    +        "text"
    +    ],
    +    "searchParams":{
    +        "params":{
    +            "drop_ratio_search":0.6
    +        }
    +    }
    +}'
     

    Parametro

    Descrizione

    diff --git a/localization/v2.5.x/site/it/userGuide/search-query-get/keyword-match.json b/localization/v2.5.x/site/it/userGuide/search-query-get/keyword-match.json index 146996939..905869455 100644 --- a/localization/v2.5.x/site/it/userGuide/search-query-get/keyword-match.json +++ b/localization/v2.5.x/site/it/userGuide/search-query-get/keyword-match.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n"],"headingContent":"Keyword Match​","anchorList":[{"label":"Ricerca per parola chiave","href":"Keyword-Match​","type":1,"isActive":false},{"label":"Panoramica","href":"Overview","type":2,"isActive":false},{"label":"Attivazione della corrispondenza per parole chiave","href":"Enable-keyword-match","type":2,"isActive":false},{"label":"Utilizzare la corrispondenza delle parole chiave","href":"Use-keyword-match","type":2,"isActive":false},{"label":"Considerazioni","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .enableDynamicField(false)\n .build();\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true,\n \"enable_match\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","Map analyzerParams = new HashMap<>();\nanalyzerParams.put(\"type\", \"english\");\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(200)\n .enableAnalyzer(true)\n .analyzerParams(analyzerParams)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n analyzer_params: { type: 'english' },\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 200,\n \"enable_analyzer\": true,\n \"enable_match\": true,\n \"analyzer_params\": {\"type\": \"english\"}\n }\n },\n {\n \"fieldName\": \"my_vector\",\n \"dataType\": \"FloatVector\",\n \"elementTypeParams\": {\n \"dim\": \"5\"\n }\n }\n ]\n }'\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine deep')\";\n","export filter=\"\\\"TEXT_MATCH(text, 'machine deep')\\\"\"\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"\n","export filter=\"\\\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\\\"\"\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .annsField(\"embeddings\")\n .data(Collections.singletonList(queryVector)))\n .filter(filter)\n .topK(10)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build());\n","// Match entities with `keyword1` or `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\n// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field\nconst result = await client.search(\n collection_name: \"YOUR_COLLECTION_NAME\", // Your collection name\n anns_field: \"embeddings\", // Vector field name\n data: [query_vector], // Query vector\n filter: filter,\n params: {\"nprobe\": 10},\n limit: 10, // Max. number of results to return\n output_fields: [\"id\", \"text\"] //Fields to return\n);\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1 keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"annsField\": \"my_vector\",\n \"data\": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],\n \"filter\": '\"$filter\"',\n \"searchParams\": {\n \"params\": {\n \"nprobe\": 10\n }\n },\n \"limit\": 3,\n \"outputFields\": [\"text\",\"id\"]\n}'\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nQueryResp queryResp = client.query(QueryReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .filter(filter)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build()\n);\n","// Match entities with both `keyword1` and `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nconst result = await client.query(\n collection_name: \"YOUR_COLLECTION_NAME\",\n filter: filter, \n output_fields: [\"id\", \"text\"]\n)\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/query\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"filter\": '\"$filter\"',\n \"outputFields\": [\"id\", \"text\"]\n}'\n"],"headingContent":"Text Match​","anchorList":[{"label":"Corrispondenza del testo","href":"Text-Match​","type":1,"isActive":false},{"label":"Panoramica","href":"Overview","type":2,"isActive":false},{"label":"Abilita la corrispondenza del testo","href":"Enable-text-match","type":2,"isActive":false},{"label":"Utilizzare la corrispondenza del testo","href":"Use-text-match","type":2,"isActive":false},{"label":"Considerazioni","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/it/userGuide/search-query-get/keyword-match.md b/localization/v2.5.x/site/it/userGuide/search-query-get/keyword-match.md index 97788512b..614d958ad 100644 --- a/localization/v2.5.x/site/it/userGuide/search-query-get/keyword-match.md +++ b/localization/v2.5.x/site/it/userGuide/search-query-get/keyword-match.md @@ -1,15 +1,14 @@ --- id: keyword-match.md summary: >- - La corrispondenza delle parole chiave in Milvus consente di recuperare - documenti precisi in base a termini specifici. Questa funzione è utilizzata - principalmente per la ricerca filtrata per soddisfare condizioni specifiche e - può incorporare un filtro scalare per affinare i risultati della query, - consentendo ricerche di somiglianza all'interno di vettori che soddisfano - criteri scalari. -title: Ricerca per parola chiave + La corrispondenza del testo in Milvus consente di recuperare documenti precisi + in base a termini specifici. Questa funzione è utilizzata principalmente per + la ricerca filtrata per soddisfare condizioni specifiche e può incorporare un + filtro scalare per affinare i risultati della query, consentendo ricerche di + similarità all'interno di vettori che soddisfano criteri scalari. +title: Corrispondenza del testo --- -

    Ricerca per parola chiave

    La corrispondenza delle parole chiave in Milvus consente di recuperare documenti precisi in base a termini specifici. Questa funzione è utilizzata principalmente per la ricerca filtrata per soddisfare condizioni specifiche e può incorporare un filtro scalare per affinare i risultati della query, consentendo ricerche di similarità all'interno di vettori che soddisfano criteri scalari.

    +

    La corrispondenza del testo in Milvus consente di recuperare documenti precisi in base a termini specifici. Questa funzione è utilizzata principalmente per la ricerca filtrata per soddisfare condizioni specifiche e può incorporare un filtro scalare per affinare i risultati della query, consentendo ricerche di similarità all'interno di vettori che soddisfano criteri scalari.

    -

    La corrispondenza delle parole chiave si concentra sulla ricerca delle occorrenze esatte dei termini della query, senza assegnare un punteggio alla rilevanza dei documenti abbinati. Se si desidera recuperare i documenti più rilevanti in base al significato semantico e all'importanza dei termini della query, si consiglia di utilizzare la ricerca full text.

    +

    La corrispondenza del testo si concentra sulla ricerca delle occorrenze esatte dei termini della query, senza assegnare un punteggio alla rilevanza dei documenti abbinati. Se si desidera recuperare i documenti più rilevanti in base al significato semantico e all'importanza dei termini della query, si consiglia di utilizzare la ricerca full text.

    Panoramica

    Milvus integra Tantivy per alimentare l'indice inverso e la ricerca per parole chiave. Per ogni inserimento di testo, Milvus lo indicizza seguendo la procedura.

    +

    Milvus integra Tantivy per alimentare l'indice inverso e la ricerca testuale basata sui termini. Per ogni inserimento di testo, Milvus lo indicizza seguendo la procedura.

    1. Analizzatore: L'analizzatore elabora il testo in ingresso tokenizzandolo in singole parole, o token, e applicando poi i filtri necessari. Ciò consente a Milvus di costruire un indice basato su questi token.

    2. Indicizzazione: Dopo l'analisi del testo, Milvus crea un indice inverso che mappa ogni singolo token con i documenti che lo contengono.

    -

    Quando un utente esegue una corrispondenza di parole chiave, l'indice invertito viene utilizzato per recuperare rapidamente tutti i documenti che le contengono. Questo è molto più veloce della scansione di ogni singolo documento.

    +

    Quando un utente esegue una corrispondenza di testo, l'indice invertito viene utilizzato per recuperare rapidamente tutti i documenti che contengono i termini. Questo è molto più veloce della scansione di ogni singolo documento.

    - Keyword Match - Corrispondenza per parola chiave

    -

    Attivazione della corrispondenza per parole chiave

    La corrispondenza per parola chiave funziona sul tipo di campo VARCHAR, che è essenzialmente il tipo di dati stringa di Milvus. Per abilitare la corrispondenza per parola chiave, impostare sia enable_analyzer che enable_match su True e poi configurare facoltativamente un analizzatore per l'analisi del testo quando si definisce lo schema della raccolta.

    -

    Impostare enable_analyzer e enable_match

    Per abilitare la corrispondenza delle parole chiave per un campo specifico VARCHAR, impostare entrambi i parametri enable_analyzer e enable_match su True quando si definisce lo schema del campo. In questo modo Milvus viene istruito a tokenizzare il testo e a creare un indice invertito per il campo specificato, consentendo corrispondenze rapide ed efficienti con le parole chiave.

    +

    La corrispondenza testuale funziona con il tipo di campo VARCHAR, che è essenzialmente il tipo di dati stringa di Milvus. Per abilitare la corrispondenza al testo, impostare sia enable_analyzer che enable_match su True e poi configurare facoltativamente un analizzatore per l'analisi del testo quando si definisce lo schema della raccolta.

    +

    Impostare enable_analyzer e enable_match

    Per abilitare la corrispondenza al testo per un campo specifico VARCHAR, impostare entrambi i parametri enable_analyzer e enable_match su True quando si definisce lo schema del campo. In questo modo Milvus viene istruito a tokenizzare il testo e a creare un indice invertito per il campo specificato, consentendo corrispondenze di testo rapide ed efficienti.

    +
    from pymilvus import MilvusClient, DataType​
     ​
     schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​
    @@ -83,9 +84,74 @@ schema.add_field(​
     )​
     
     
    -

    Opzionale: Configurare un analizzatore

    Le prestazioni e la precisione della corrispondenza delle parole chiave dipendono dall'analizzatore selezionato. Diversi analizzatori sono adatti a varie lingue e strutture di testo, quindi la scelta di quello giusto può avere un impatto significativo sui risultati di ricerca per il vostro caso d'uso specifico.

    +
    import io.milvus.v2.common.DataType;
    +import io.milvus.v2.service.collection.request.AddFieldReq;
    +import io.milvus.v2.service.collection.request.CreateCollectionReq;
    +
    +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
    +        .enableDynamicField(false)
    +        .build();
    +
    +schema.addField(AddFieldReq.builder()
    +        .fieldName("text")
    +        .dataType(DataType.VarChar)
    +        .maxLength(1000)
    +        .enableAnalyzer(true)
    +        .enableMatch(true)
    +        .build());
    +
    +
    +
    const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 1000,
    +                    "enable_analyzer": true,
    +                    "enable_match": true
    +                }
    +            },
    +            {
    +                "fieldName": "sparse",
    +                "dataType": "SparseFloatVector"
    +            }
    +        ]
    +    }'
    +
    +
    +

    Opzionale: Configurare un analizzatore

    Le prestazioni e l'accuratezza della corrispondenza del testo dipendono dall'analizzatore selezionato. Diversi analizzatori sono adatti a varie lingue e strutture di testo, quindi la scelta di quello giusto può avere un impatto significativo sui risultati della ricerca per il vostro caso d'uso specifico.

    Per impostazione predefinita, Milvus utilizza l'analizzatore standard, che tokenizza il testo in base agli spazi bianchi e alla punteggiatura, rimuove i token più lunghi di 40 caratteri e converte il testo in minuscolo. Non sono necessari parametri aggiuntivi per applicare questa impostazione predefinita. Per ulteriori informazioni, consultare Standard.

    Nei casi in cui sia necessario un analizzatore diverso, è possibile configurarne uno utilizzando il parametro analyzer_params. Ad esempio, per applicare l'analizzatore english per l'elaborazione del testo inglese.

    +
    analyzer_params={​
         "type": "english"​
     }​
    @@ -100,8 +166,71 @@ schema.add_field(​
     )​
     
     
    -

    Milvus offre anche altri analizzatori adatti a lingue e scenari diversi. Per maggiori dettagli, consultare la sezione Panoramica.

    -

    Utilizzare la corrispondenza delle parole chiave +
    const schema = [
    +  {
    +    name: "id",
    +    data_type: DataType.Int64,
    +    is_primary_key: true,
    +  },
    +  {
    +    name: "text",
    +    data_type: "VarChar",
    +    enable_analyzer: true,
    +    enable_match: true,
    +    max_length: 1000,
    +    analyzer_params: { type: 'english' },
    +  },
    +  {
    +    name: "sparse",
    +    data_type: DataType.SparseFloatVector,
    +  },
    +];
    +
    +
    +
    export schema='{
    +        "autoId": true,
    +        "enabledDynamicField": false,
    +        "fields": [
    +            {
    +                "fieldName": "id",
    +                "dataType": "Int64",
    +                "isPrimary": true
    +            },
    +            {
    +                "fieldName": "text",
    +                "dataType": "VarChar",
    +                "elementTypeParams": {
    +                    "max_length": 200,
    +                    "enable_analyzer": true,
    +                    "enable_match": true,
    +                    "analyzer_params": {"type": "english"}
    +                }
    +            },
    +            {
    +                "fieldName": "my_vector",
    +                "dataType": "FloatVector",
    +                "elementTypeParams": {
    +                    "dim": "5"
    +                }
    +            }
    +        ]
    +    }'
    +
    +
    +

    Milvus offre anche altri analizzatori adatti a diversi linguaggi e scenari. Per maggiori dettagli, consultare la sezione Panoramica.

    +

    Utilizzare la corrispondenza del testo

    Una volta abilitata la corrispondenza di parole chiave per un campo VARCHAR nello schema della raccolta, è possibile eseguire corrispondenze di parole chiave utilizzando l'espressione TEXT_MATCH.

    -

    Sintassi dell'espressione TEXT_MATCH

    L'espressione TEXT_MATCH è usata per specificare il campo e le parole chiave da cercare. La sua sintassi è la seguente.

    -
    TEXT_MATCH(field_name, text)​
    +    

    Una volta abilitata la corrispondenza del testo per un campo VARCHAR nello schema della raccolta, è possibile eseguire corrispondenze di testo utilizzando l'espressione TEXT_MATCH.

    +

    Sintassi dell'espressione TEXT_MATCH

    L'espressione TEXT_MATCH è usata per specificare il campo e i termini da cercare. La sua sintassi è la seguente.

    +
    TEXT_MATCH(field_name, text)​
     
     
    • field_name: Il nome del campo VARCHAR da cercare.

    • -
    • text: Le parole chiave da cercare. Più parole chiave possono essere separate da spazi o da altri delimitatori appropriati in base alla lingua e all'analizzatore configurato.

    • +
    • text: I termini da cercare. I termini multipli possono essere separati da spazi o da altri delimitatori appropriati in base alla lingua e all'analizzatore configurato.

    -

    Per impostazione predefinita, TEXT_MATCH utilizza la logica di corrispondenza OR, ovvero restituisce i documenti che contengono una qualsiasi delle parole chiave specificate. Ad esempio, per cercare documenti contenenti le parole chiave machine o deep nel campo text, utilizzare la seguente espressione.

    +

    Per impostazione predefinita, TEXT_MATCH utilizza la logica di corrispondenza OR, ovvero restituisce i documenti che contengono uno qualsiasi dei termini specificati. Ad esempio, per cercare documenti contenenti il termine machine o deep nel campo text, utilizzare la seguente espressione.

    +
    filter = "TEXT_MATCH(text, 'machine deep')"​
    -
     
    -

    È anche possibile combinare più espressioni TEXT_MATCH utilizzando gli operatori logici per eseguire la corrispondenza AND. Ad esempio, per cercare documenti contenenti sia machine che deep nel campo text, utilizzare la seguente espressione.

    +
    String filter = "TEXT_MATCH(text, 'machine deep')";
    +
    +
    const filter = "TEXT_MATCH(text, 'machine deep')";
    +
    +
    export filter="\"TEXT_MATCH(text, 'machine deep')\""
    +
    +

    È inoltre possibile combinare più espressioni TEXT_MATCH utilizzando gli operatori logici per eseguire la corrispondenza AND. Ad esempio, per cercare documenti contenenti sia machine che deep nel campo text, utilizzare la seguente espressione.

    +
    filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"​
    -
     
    -

    Ricerca con corrispondenza di parole chiave

    La corrispondenza per parola chiave può essere utilizzata in combinazione con la ricerca per similarità vettoriale per restringere l'ambito di ricerca e migliorare le prestazioni della ricerca. Filtrando la raccolta con la corrispondenza di parole chiave prima della ricerca per similarità vettoriale, è possibile ridurre il numero di documenti da ricercare, con conseguenti tempi di interrogazione più rapidi.

    -

    In questo esempio, l'espressione filter filtra i risultati della ricerca per includere solo i documenti che corrispondono alle parole chiave specificate keyword1 o keyword2. La ricerca di similarità vettoriale viene quindi eseguita su questo sottoinsieme filtrato di documenti.

    +
    String filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')";
    +
    +
    const filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"
    +
    +
    export filter="\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\""
    +
    +

    Ricerca con corrispondenza di testo

    La corrispondenza del testo può essere utilizzata in combinazione con la ricerca per similarità vettoriale per restringere l'ambito di ricerca e migliorare le prestazioni della ricerca. Filtrando la raccolta con la corrispondenza del testo prima della ricerca per similarità vettoriale, è possibile ridurre il numero di documenti da ricercare, con conseguenti tempi di interrogazione più rapidi.

    +

    In questo esempio, l'espressione filter filtra i risultati della ricerca per includere solo i documenti che corrispondono al termine specificato keyword1 o keyword2. La ricerca di similarità vettoriale viene quindi eseguita su questo sottoinsieme di documenti filtrati.

    +
    # Match entities with `keyword1` or `keyword2`​
     filter = "TEXT_MATCH(text, 'keyword1 keyword2')"​
     ​
    @@ -150,8 +295,58 @@ result = MilvusClient.search(​
     )​
     
     
    -

    Query con corrispondenza di parole chiave

    La corrispondenza di parole chiave può essere utilizzata anche per il filtraggio scalare nelle operazioni di query. Specificando un'espressione TEXT_MATCH nel parametro expr del metodo query(), è possibile recuperare i documenti che corrispondono alle parole chiave indicate.

    -

    L'esempio seguente recupera i documenti in cui il campo text contiene entrambe le parole chiave keyword1 e keyword2.

    +
    String filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
    +
    +SearchResp searchResp = client.search(SearchReq.builder()
    +        .collectionName("YOUR_COLLECTION_NAME")
    +        .annsField("embeddings")
    +        .data(Collections.singletonList(queryVector)))
    +        .filter(filter)
    +        .topK(10)
    +        .outputFields(Arrays.asList("id", "text"))
    +        .build());
    +
    +
    // Match entities with `keyword1` or `keyword2`
    +const filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
    +
    +// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field
    +const result = await client.search(
    +    collection_name: "YOUR_COLLECTION_NAME", // Your collection name
    +    anns_field: "embeddings", // Vector field name
    +    data: [query_vector], // Query vector
    +    filter: filter,
    +    params: {"nprobe": 10},
    +    limit: 10, // Max. number of results to return
    +    output_fields: ["id", "text"] //Fields to return
    +);
    +
    +
    export filter="\"TEXT_MATCH(text, 'keyword1 keyword2')\""
    +
    +export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "collectionName": "demo2",
    +    "annsField": "my_vector",
    +    "data": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],
    +    "filter": '"$filter"',
    +    "searchParams": {
    +        "params": {
    +            "nprobe": 10
    +        }
    +    },
    +    "limit": 3,
    +    "outputFields": ["text","id"]
    +}'
    +
    +

    Query con corrispondenza di testo

    La corrispondenza testuale può essere utilizzata anche per il filtraggio scalare nelle operazioni di query. Specificando un'espressione TEXT_MATCH nel parametro expr del metodo query(), è possibile recuperare i documenti che corrispondono ai termini indicati.

    +

    L'esempio seguente recupera i documenti in cui il campo text contiene entrambi i termini keyword1 e keyword2.

    +
    # Match entities with both `keyword1` and `keyword2`​
     filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')"​
     ​
    @@ -161,6 +356,39 @@ result = MilvusClient.query(​
         output_fields=["id", "text"]​
     )​
     
    +
    +
    String filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
    +
    +QueryResp queryResp = client.query(QueryReq.builder()
    +        .collectionName("YOUR_COLLECTION_NAME")
    +        .filter(filter)
    +        .outputFields(Arrays.asList("id", "text"))
    +        .build()
    +);
    +
    +
    // Match entities with both `keyword1` and `keyword2`
    +const filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
    +
    +const result = await client.query(
    +    collection_name: "YOUR_COLLECTION_NAME",
    +    filter: filter, 
    +    output_fields: ["id", "text"]
    +)
    +
    +
    export filter="\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\""
    +
    +export CLUSTER_ENDPOINT="http://localhost:19530"
    +export TOKEN="root:Milvus"
    +
    +curl --request POST \
    +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
    +--header "Authorization: Bearer ${TOKEN}" \
    +--header "Content-Type: application/json" \
    +-d '{
    +    "collectionName": "demo2",
    +    "filter": '"$filter"',
    +    "outputFields": ["id", "text"]
    +}'
     

    Considerazioni

      -
    • L'abilitazione della corrispondenza delle parole chiave per un campo attiva la creazione di un indice invertito, che consuma risorse di archiviazione. Considerare l'impatto sullo storage quando si decide di abilitare questa funzione, poiché varia in base alla dimensione del testo, ai token unici e all'analizzatore utilizzato.

    • +
    • L'abilitazione della corrispondenza del testo per un campo attiva la creazione di un indice invertito, che consuma risorse di memorizzazione. Considerare l'impatto sullo storage quando si decide di abilitare questa funzione, poiché varia in base alla dimensione del testo, ai token unici e all'analizzatore utilizzato.

    • Una volta definito un analizzatore nello schema, le sue impostazioni diventano permanenti per quella raccolta. Se si decide che un analizzatore diverso è più adatto alle proprie esigenze, si può decidere di abbandonare la raccolta esistente e crearne una nuova con la configurazione dell'analizzatore desiderato.

    diff --git a/localization/v2.5.x/site/it/userGuide/search-query-get/multi-vector-search.md b/localization/v2.5.x/site/it/userGuide/search-query-get/multi-vector-search.md index d22ac5ac8..f4ba14998 100644 --- a/localization/v2.5.x/site/it/userGuide/search-query-get/multi-vector-search.md +++ b/localization/v2.5.x/site/it/userGuide/search-query-get/multi-vector-search.md @@ -39,10 +39,10 @@ title: Ricerca ibrida >

    La ricerca ibrida è adatta ai due scenari seguenti.

    -

    Ricerca vettoriale sparsa e densa

    Diversi tipi di vettori possono rappresentare informazioni diverse e l'uso di vari modelli di incorporamento può rappresentare in modo più completo diverse caratteristiche e aspetti dei dati. Ad esempio, l'uso di diversi modelli di incorporamento per la stessa frase può generare un vettore denso per rappresentare il significato semantico e un vettore rado per rappresentare la frequenza delle parole nella frase.

    +

    Ricerca vettoriale sparsa e densa

    Diversi tipi di vettori possono rappresentare informazioni diverse e l'uso di vari modelli di incorporazione può rappresentare in modo più completo diverse caratteristiche e aspetti dei dati. Ad esempio, l'uso di diversi modelli di incorporamento per la stessa frase può generare un vettore denso per rappresentare il significato semantico e un vettore rado per rappresentare la frequenza delle parole nella frase.

      -
    • Vettori sparsi: I vettori sparsi sono caratterizzati da un'elevata dimensionalità del vettore e dalla presenza di pochi valori non nulli. Questa struttura li rende particolarmente adatti alle applicazioni tradizionali di information retrieval. Nella maggior parte dei casi, il numero di dimensioni utilizzate nei vettori sparsi corrisponde a diversi token in una o più lingue. A ogni dimensione viene assegnato un valore che indica l'importanza relativa di quel token all'interno del documento. Questa disposizione si rivela vantaggiosa per i compiti che prevedono la corrispondenza di parole chiave.

    • -
    • Vettori densi: I vettori densi sono incorporazioni derivate dalle reti neurali. Disposti in un array ordinato, questi vettori catturano l'essenza semantica del testo in ingresso. Si noti che i vettori densi non sono limitati all'elaborazione del testo; sono anche ampiamente utilizzati nella computer vision per rappresentare la semantica dei dati visivi. Questi vettori densi, di solito generati da modelli di incorporazione del testo, sono caratterizzati dalla maggior parte o da tutti gli elementi non nulli. Pertanto, i vettori densi sono particolarmente efficaci per le applicazioni di ricerca semantica, in quanto possono restituire i risultati più simili in base alla distanza vettoriale anche in assenza di corrispondenze esatte tra le parole chiave. Questa capacità consente di ottenere risultati di ricerca più sfumati e consapevoli del contesto, spesso cogliendo relazioni tra concetti che potrebbero sfuggire agli approcci basati sulle parole chiave.

    • +
    • Vettori sparsi: I vettori sparsi sono caratterizzati da un'elevata dimensionalità del vettore e dalla presenza di pochi valori non nulli. Questa struttura li rende particolarmente adatti alle applicazioni tradizionali di information retrieval. Nella maggior parte dei casi, il numero di dimensioni utilizzate nei vettori sparsi corrisponde a diversi token in una o più lingue. A ogni dimensione viene assegnato un valore che indica l'importanza relativa di quel token all'interno del documento. Questa disposizione si rivela vantaggiosa per i compiti che prevedono la corrispondenza del testo.

    • +
    • Vettori densi: I vettori densi sono incorporazioni derivate dalle reti neurali. Disposti in un array ordinato, questi vettori catturano l'essenza semantica del testo in ingresso. Si noti che i vettori densi non sono limitati all'elaborazione del testo; sono anche ampiamente utilizzati nella computer vision per rappresentare la semantica dei dati visivi. Questi vettori densi, di solito generati da modelli di incorporazione del testo, sono caratterizzati dalla maggior parte o da tutti gli elementi non nulli. Pertanto, i vettori densi sono particolarmente efficaci per le applicazioni di ricerca semantica, in quanto possono restituire i risultati più simili in base alla distanza vettoriale anche in assenza di corrispondenze testuali esatte. Questa capacità consente di ottenere risultati di ricerca più sfumati e consapevoli del contesto, spesso cogliendo relazioni tra concetti che potrebbero sfuggire agli approcci basati sulle parole chiave.

    Per maggiori dettagli, consultare Sparse Vector e Dense Vector.

    Ricerca multimodale

    La ricerca multimodale si riferisce alla ricerca di similarità di dati non strutturati attraverso più modalità (come immagini, video, audio, testo, ecc.). Ad esempio, una persona può essere rappresentata utilizzando diverse modalità di dati, come le impronte digitali, le impronte vocali e i tratti del viso. La ricerca ibrida supporta più ricerche simultanee. Ad esempio, la ricerca di una persona con impronte digitali e impronte vocali simili.

    @@ -228,7 +228,7 @@ schema.addField(AddFieldReq.builder()​

    Durante le ricerche di vettori sparsi, è possibile semplificare il processo di generazione di vettori di incorporamento sparsi sfruttando le funzionalità di Full Text Search. Per maggiori dettagli, vedere Ricerca a testo completo.

    -

    Creare l'indice

    Dopo aver definito lo schema della collezione, è necessario impostare gli indici dei vettori e le metriche di similarità. In questo esempio, viene creato un indice IVF_FLAT per il campo vettoriale denso dense, e uno SPARSE_INVERTED_INDEX per il campo vettoriale rado sparse. Per conoscere i tipi di indici supportati, vedere Indice spiegato.

    +

    Creare l'indice

    Dopo aver definito lo schema della collezione, è necessario impostare gli indici dei vettori e le metriche di somiglianza. In questo esempio, viene creato un indice IVF_FLAT per il campo vettoriale denso dense, e uno SPARSE_INVERTED_INDEX per il campo vettoriale rado sparse. Per conoscere i tipi di indici supportati, vedere Indice spiegato.

    from pymilvus import MilvusClient​
    diff --git a/localization/v2.5.x/site/it/userGuide/search-query-get/single-vector-search.md b/localization/v2.5.x/site/it/userGuide/search-query-get/single-vector-search.md
    index 63efcd456..88d363f5b 100644
    --- a/localization/v2.5.x/site/it/userGuide/search-query-get/single-vector-search.md
    +++ b/localization/v2.5.x/site/it/userGuide/search-query-get/single-vector-search.md
    @@ -38,7 +38,7 @@ title: Ricerca ANN di base
             >
           
         

    La ricerca ANN e la ricerca k-Nearest Neighbors (kNN) sono i metodi abituali nelle ricerche di similarità vettoriale. In una ricerca kNN, è necessario confrontare tutti i vettori di uno spazio vettoriale con il vettore di query contenuto nella richiesta di ricerca prima di individuare quelli più simili, il che richiede molto tempo e risorse.

    -

    A differenza delle ricerche kNN, un algoritmo di ricerca ANN richiede un file di indice che registra l'ordine ordinato delle incorporazioni vettoriali. Quando arriva una richiesta di ricerca, è possibile utilizzare il file di indice come riferimento per individuare rapidamente un sottogruppo contenente probabilmente le incorporazioni vettoriali più simili al vettore interrogato. Quindi, è possibile utilizzare il tipo di metrica specificato per misurare la somiglianza tra il vettore di query e quelli del sottogruppo, ordinare i membri del gruppo in base alla somiglianza con il vettore di query e individuare i primi K membri del gruppo.

    +

    A differenza delle ricerche kNN, un algoritmo di ricerca ANN richiede un file di indice che registra l'ordine ordinato delle incorporazioni vettoriali. Quando arriva una richiesta di ricerca, è possibile utilizzare il file di indice come riferimento per individuare rapidamente un sottogruppo contenente probabilmente le incorporazioni vettoriali più simili al vettore interrogato. Quindi, si può usare il tipo di metrica specificato per misurare la somiglianza tra il vettore di query e quelli del sottogruppo, ordinare i membri del gruppo in base alla somiglianza con il vettore di query e determinare i membri del gruppo top-K.

    Le ricerche ANN dipendono da indici precostituiti e la velocità di ricerca, l'utilizzo della memoria e la correttezza della ricerca possono variare a seconda del tipo di indice scelto. È necessario bilanciare le prestazioni e la correttezza della ricerca.

    Per ridurre la curva di apprendimento, Milvus offre AUTOINDEX. Con AUTOINDEX, Milvus è in grado di analizzare la distribuzione dei dati all'interno della collezione durante la creazione dell'indice e di impostare i parametri dell'indice più ottimizzati in base all'analisi per raggiungere un equilibrio tra prestazioni di ricerca e correttezza.

    Per informazioni dettagliate su AUTOINDEX e sui tipi di metriche applicabili, consultare AUTOINDEX e tipi di metriche. In questa sezione sono riportate informazioni dettagliate sui seguenti argomenti.

    @@ -884,7 +884,7 @@ curl --request POST \​ d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

    AUTOINDEX appiattisce notevolmente la curva di apprendimento delle ricerche ANN. Tuttavia, i risultati della ricerca potrebbero non essere sempre corretti con l'aumentare del top-K. Riducendo la portata della ricerca, migliorando la pertinenza dei risultati e diversificando i risultati, Milvus elabora i seguenti miglioramenti della ricerca.

    +

    AUTOINDEX appiattisce notevolmente la curva di apprendimento delle ricerche di RNA. Tuttavia, i risultati della ricerca potrebbero non essere sempre corretti con l'aumentare del top-K. Riducendo la portata della ricerca, migliorando la pertinenza dei risultati e diversificando i risultati, Milvus elabora i seguenti miglioramenti della ricerca.

    • Ricerca filtrata

      È possibile includere condizioni di filtraggio in una richiesta di ricerca, in modo che Milvus esegua un filtraggio dei metadati prima di eseguire ricerche sull'RNA, riducendo l'ambito di ricerca dall'intera raccolta alle sole entità che corrispondono alle condizioni di filtraggio specificate.

      @@ -904,9 +904,9 @@ curl --request POST \​
    • Ricerca a tutto testo

      La ricerca full text è una funzione che recupera i documenti contenenti termini o frasi specifiche in set di dati di testo, classificando poi i risultati in base alla rilevanza. Questa funzione supera le limitazioni della ricerca semantica, che potrebbe trascurare termini precisi, garantendo la ricezione dei risultati più accurati e contestualmente rilevanti. Inoltre, semplifica le ricerche vettoriali accettando input di testo grezzo, convertendo automaticamente i dati di testo in embedding sparsi senza dover generare manualmente embedding vettoriali.

      Per maggiori informazioni sulla ricerca full-text, consultare la sezione Ricerca full-text.

    • -
    • Corrispondenza di parole chiave

      -

      La corrispondenza delle parole chiave in Milvus consente di recuperare documenti precisi in base a termini specifici. Questa funzione è utilizzata principalmente per la ricerca filtrata per soddisfare condizioni specifiche e può incorporare un filtro scalare per affinare i risultati della query, consentendo ricerche di similarità all'interno di vettori che soddisfano criteri scalari.

      -

      Per informazioni dettagliate sulla corrispondenza delle parole chiave, consultare la sezione Corrispondenza delle parole chiave.

    • +
    • Corrispondenza del testo

      +

      La corrispondenza del testo in Milvus consente di recuperare documenti precisi in base a termini specifici. Questa funzione è utilizzata principalmente per la ricerca filtrata per soddisfare condizioni specifiche e può incorporare un filtro scalare per affinare i risultati della query, consentendo la ricerca di similarità all'interno di vettori che soddisfano criteri scalari.

      +

      Per informazioni dettagliate sulla corrispondenza del testo, consultare la sezione Corrispondenza del testo.

    • Utilizzare la chiave di partizione

      Il coinvolgimento di più campi scalari nel filtraggio dei metadati e l'uso di una condizione di filtraggio piuttosto complicata possono influire sull'efficienza della ricerca. Se si imposta un campo scalare come chiave di partizione e si utilizza una condizione di filtraggio che coinvolge la chiave di partizione nella richiesta di ricerca, si può limitare l'ambito di ricerca alle partizioni corrispondenti ai valori della chiave di partizione specificati.

      Per informazioni dettagliate sulla chiave di partizione, consultare la sezione Uso della chiave di partizione.

    • @@ -914,6 +914,6 @@ curl --request POST \​

      In Milvus, i file mappati in memoria consentono la mappatura diretta del contenuto dei file nella memoria. Questa funzione migliora l'efficienza della memoria, in particolare nelle situazioni in cui la memoria disponibile è scarsa ma il caricamento completo dei dati non è fattibile. Questo meccanismo di ottimizzazione può aumentare la capacità dei dati garantendo le prestazioni fino a un certo limite; tuttavia, quando la quantità di dati supera di troppo la memoria, le prestazioni delle ricerche e delle interrogazioni possono subire un grave degrado.

      Per maggiori dettagli sulle impostazioni di mmap, consultare la sezione Uso di mmap.

    • Compattazione dei cluster

      -

      La compattazione del clustering è progettata per migliorare le prestazioni di ricerca e ridurre i costi di collezioni di grandi dimensioni. Questa guida vi aiuterà a capire la compattazione del clustering e come questa funzione può migliorare le prestazioni di ricerca.

      +

      La compattazione dei cluster è progettata per migliorare le prestazioni di ricerca e ridurre i costi di collezioni di grandi dimensioni. Questa guida vi aiuterà a capire la compattazione del clustering e come questa funzione può migliorare le prestazioni di ricerca.

      Per informazioni dettagliate sulle compattazioni di clustering, consultare Compattazione di clustering.

    diff --git a/localization/v2.5.x/site/ja/adminGuide/upgrade-pulsar-v3.json b/localization/v2.5.x/site/ja/adminGuide/upgrade-pulsar-v3.json index 4af5b3dd3..ace60cdee 100644 --- a/localization/v2.5.x/site/ja/adminGuide/upgrade-pulsar-v3.json +++ b/localization/v2.5.x/site/ja/adminGuide/upgrade-pulsar-v3.json @@ -1 +1 @@ -{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n\"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\ncreationTimestamp: \"2024-11-22T08:06:59Z\"​\nfinalizers:​\n- milvus.milvus.io/finalizer​\ngeneration: 3​\nlabels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\ncomponents:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\ndependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\nenabled: false # set to false​\n# you may also clean up rest fields under pulsar field​\n# it's ok to keep them though.​\npulsarv3:​\nenabled: true​\n# append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations: null # this field should be removed or set to null​\nresourceVersion: null # this field should be removed or set to null​\nuid: null # this field should be removed or set to null​\nspec:​\ndependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Pulsarのアップグレード","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"ロードマップ","href":"Roadmap","type":2,"isActive":false},{"label":"手順","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n \"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n ]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\n creationTimestamp: \"2024-11-22T08:06:59Z\"​\n finalizers:​\n - milvus.milvus.io/finalizer​\n generation: 3​\n labels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\n components:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\n dependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\n enabled: false # set to false​\n # you may also clean up rest fields under pulsar field​\n # it's ok to keep them though.​\npulsarv3:​\n enabled: true​\n # append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations: null # this field should be removed or set to null​\n resourceVersion: null # this field should be removed or set to null​\n uid: null # this field should be removed or set to null​\nspec:​\n dependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Pulsarのアップグレード","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"ロードマップ","href":"Roadmap","type":2,"isActive":false},{"label":"手順","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ja/adminGuide/upgrade-pulsar-v3.md b/localization/v2.5.x/site/ja/adminGuide/upgrade-pulsar-v3.md index 108ebf219..8edcb5963 100644 --- a/localization/v2.5.x/site/ja/adminGuide/upgrade-pulsar-v3.md +++ b/localization/v2.5.x/site/ja/adminGuide/upgrade-pulsar-v3.md @@ -25,10 +25,10 @@ title: MilvusのパルサーをV2からV3にアップグレード
  • アップグレード作業には短時間のサービス停止が必要です(データ量にもよりますが、通常数分から10分以上かかります)。

  • 作業の前に、全ての稼働中のクライアントからmilvusへのデータ書き込みを停止する必要があります。さもないと、書き込まれたデータが失われる可能性があります。

  • 本記事では、Milvusが名前空間default 、名前my-release にインストールされていることを前提としています。このページからコピーしたコマンドを実行する際には、パラメータをご自身の名前空間とリリース名に変更してください。

  • -
  • 作業環境がKubernetesクラスタ内の上記の名前空間配下にパーミッションがあり、以下のコマンドがインストールされていることを確認してください。

    +
  • 作業環境がKubernetesクラスタの上記の名前空間配下にパーミッションがあり、以下のコマンドがインストールされていることを確認してください。

    a.kubectl >= 1.20

    b.helm >= 3.14.0

    -

    c.catb.grep>= 1.20 b. >= 3.14.0 c. , ,awk 文字列操作用

    +

    c.catb.awk >= 1.20 b.grep>= 3.14.0 c. , , 文字列操作用

    d. milvus管理APIと対話するためのcurl またはAttu v2.4+

  • @@ -69,7 +69,7 @@ title: MilvusのパルサーをV2からV3にアップグレード >

    このセクションでは、MilvusでPulsarをV2からV3にアップグレードする詳細手順を説明します。

    -

    Pulsarで消費されなかったデータの永続化

    この手順では、Pulsar内の既存データがオブジェクト・ストレージ・サービスに永続化されていることを確認する必要があります。 2つのアプローチがあり、ニーズに合わせて選択することができます。

    +

    Pulsarで消費されなかったデータの永続化

    この手順では、Pulsar内の既存データがオブジェクト・ストレージ・サービスに永続化されていることを確認する必要があります。 2つのアプローチがあり、ニーズに合わせてお選びいただけます。

    アプローチ1:Attuを使う

    Milvusデプロイメントのコレクション数が少なく、セグメント数もそれほど多くない場合は、Attuを使用してデータをオブジェクト・ストレージ・サービスに永続化することができます。

    1. 全てのデータベースで全てのコレクションを選択し、Segments パネルに入り、Flush ボタンをクリックします。

      @@ -111,11 +111,11 @@ Forwarding from 127.

    出力。

    {​
    -"segmentIDs": [​
    +  "segmentIDs": [​
         454097953998181000,​
         454097953999383600,​
         454097953998180800​
    -]​
    +  ]​
     }​
     
     
    @@ -221,15 +221,15 @@ head milvus.yaml -n 20
    apiVersion: milvus.io/v1beta1​
     kind: Milvus​
     metadata:​
    -annotations:​
    +  annotations:​
         milvus.io/dependency-values-merged: "true"​
         milvus.io/pod-service-label-added: "true"​
         milvus.io/querynode-current-group-id: "0"​
    -creationTimestamp: "2024-11-22T08:06:59Z"​
    -finalizers:​
    -- milvus.milvus.io/finalizer​
    -generation: 3​
    -labels:​
    +  creationTimestamp: "2024-11-22T08:06:59Z"​
    +  finalizers:​
    +  - milvus.milvus.io/finalizer​
    +  generation: 3​
    +  labels:​
         app: milvus​
         milvus.io/operator-version: 1.1.2​
     name: my-release​
    @@ -237,23 +237,23 @@ namespace: default​
     resourceVersion: "692217324"​
     uid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​
     spec:​
    -components:​
    +  components:​
     
     
  • 以下の内容を含むpatch.yaml Fileを作成します。

    # a patch to retain etcd & storage data and delete pulsar data while delete milvus​
     spec:​
    -dependencies:​
    +  dependencies:​
         etcd:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         storage:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Retain​
             pvcDeletion: false​
         pulsar:​
    -    inCluster:​
    +      inCluster:​
             deletionPolicy: Delete​
             pvcDeletion: true​
     
    @@ -300,12 +300,12 @@ milvus.milvus.io "my-release" deleted
     
  • 前のステップで保存したvalues.yaml

    # change the following:​
     pulsar:​
    -enabled: false # set to false​
    -# you may also clean up rest fields under pulsar field​
    -# it's ok to keep them though.​
    +  enabled: false # set to false​
    +  # you may also clean up rest fields under pulsar field​
    +  # it's ok to keep them though.​
     pulsarv3:​
    -enabled: true​
    -# append other values for pulsar v3 chart if needs​
    +  enabled: true​
    +  # append other values for pulsar v3 chart if needs​
     
     
  • ローカルのhelmレポを更新する

    @@ -366,13 +366,13 @@ my-release-pulsarv3-zookeeper-2 < apiVersion: milvus.io/v1beta1​ kind: Milvus​ metadata:​ -annotations: null # this field should be removed or set to null​ -resourceVersion: null # this field should be removed or set to null​ -uid: null # this field should be removed or set to null​ + annotations: null # this field should be removed or set to null​ + resourceVersion: null # this field should be removed or set to null​ + uid: null # this field should be removed or set to null​ spec:​ -dependencies:​ + dependencies:​ pulsar:​ - inCluster:​ + inCluster:​ chartVersion: pulsar-v3​ # delete all previous values for pulsar v2 and set it to null.​ # you may add additional values here for pulsar v3 if you're sure about it.​ diff --git a/localization/v2.5.x/site/ja/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.5.x/site/ja/getstarted/run-milvus-k8s/install_cluster-helm.json index 6012d8dd7..d06f4ff73 100644 --- a/localization/v2.5.x/site/ja/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.5.x/site/ja/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"KubernetesでHelmを使ってMilvusを起動する","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"概要","href":"Overview","type":2,"isActive":false},{"label":"前提条件","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus Helm Chartのインストール","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"オンラインインストール","href":"Online-install","type":2,"isActive":false},{"label":"オフラインインストール","href":"Offline-install","type":2,"isActive":false},{"label":"稼働中のMilvusクラスタのアップグレード","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvusのアンインストール","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"次の作業","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"KubernetesでHelmを使ってMilvusを起動する","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"概要","href":"Overview","type":2,"isActive":false},{"label":"前提条件","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus Helm Chartのインストール","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"オンラインインストール","href":"Online-install","type":2,"isActive":false},{"label":"オフラインインストール","href":"Offline-install","type":2,"isActive":false},{"label":"稼働中のMilvusクラスタのアップグレード","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvusのアンインストール","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"次の作業","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ja/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.5.x/site/ja/getstarted/run-milvus-k8s/install_cluster-helm.md index 4dcd5dc60..cf39c1d85 100644 --- a/localization/v2.5.x/site/ja/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.5.x/site/ja/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

    Milvus Helm Chartsをインストールする前に、Milvus Helmリポジトリを追加する必要があります。

    -
    $ helm repo add milvus https://github.com/zilliztech/milvus-helm
    +
    $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
     

    https://github.com/milvus-io/milvus-helm にある Milvus Helm Charts リポジトリはアーカイブされており、https://github.com/zilliztech/milvus-helm から以下のようにアップデートを入手することができます:

    -
    helm repo add zilliztech https://github.com/zilliztech/milvus-helm
    +
    helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
     helm repo update
     # upgrade existing helm release
     helm upgrade my-release zilliztech/milvus
    diff --git a/localization/v2.5.x/site/ja/home/home.md b/localization/v2.5.x/site/ja/home/home.md
    index 460b40f58..bd512c16e 100644
    --- a/localization/v2.5.x/site/ja/home/home.md
    +++ b/localization/v2.5.x/site/ja/home/home.md
    @@ -80,7 +80,7 @@ Milvusドキュメントへようこそ!

  • Milvusの設定
  • 依存関係の管理
  • クラウドへのデプロイ
  • -
  • Milvusクラスターのスケール
  • +
  • Milvusクラスタのスケール
  • 監視とアラート
  • @@ -114,7 +114,7 @@ Milvusドキュメントへようこそ!

    2024年11月 - Milvus 2.5.0リリース

    • 全文検索のガイダンスを追加しました。
    • -
    • キーワード検索の方法に関するガイダンスを追加しました。
    • +
    • テキストマッチの方法に関するガイダンスを追加しました。
    • NULL値およびデフォルト値の有効化方法に関するガイダンスを追加しました。
    • アナライザーの説明を追加。
    • ビットマップインデックスの説明を追加。
    • diff --git a/localization/v2.5.x/site/ja/menuStructure/ja.json b/localization/v2.5.x/site/ja/menuStructure/ja.json index a2cf2c68c..6dd593591 100644 --- a/localization/v2.5.x/site/ja/menuStructure/ja.json +++ b/localization/v2.5.x/site/ja/menuStructure/ja.json @@ -286,6 +286,18 @@ "order": 8, "children": [] }, + { + "label": "メートル法", + "id": "metric.md", + "order": 9, + "children": [] + }, + { + "label": "一貫性レベル", + "id": "consistency.md", + "order": 10, + "children": [] + }, { "label": "インメモリーレプリカ", "id": "replica.md", @@ -602,31 +614,39 @@ ] }, { - "label": "インデックスの管理", + "label": "インデックス", "id": "manage_indexes", "order": 4, "isMenu": true, "children": [ { - "label": "インデックス・ベクトル・フィールド", + "label": "ベクトル・インデックス", "id": "index-vector-fields.md", "order": 0, "children": [] }, { - "label": "インデックス・スカラー・フィールド", - "id": "index-scalar-fields.md", + "label": "スカラー・インデックス", + "id": "scalar-index", "order": 1, - "children": [] - }, - { - "label": "BITMAPインデックス", - "id": "bitmap.md", - "order": 2, - "children": [] + "isMenu": true, + "children": [ + { + "label": "インデックス・スカラー・フィールド", + "id": "index-scalar-fields.md", + "order": 1, + "children": [] + }, + { + "label": "ビットマップインデックス", + "id": "bitmap.md", + "order": 2, + "children": [] + } + ] }, { - "label": "GPU付きインデックス", + "label": "GPU対応インデックス", "id": "index-with-gpu.md", "order": 3, "children": [] @@ -682,7 +702,7 @@ "children": [] }, { - "label": "キーワード・マッチ", + "label": "テキストマッチ", "id": "keyword-match.md", "order": 7, "children": [] @@ -699,30 +719,6 @@ "order": 9, "children": [] }, - { - "label": "mmapを使用する", - "id": "mmap.md", - "order": 10, - "children": [] - }, - { - "label": "クラスタリング・コンパクション", - "id": "clustering-compaction.md", - "order": 11, - "children": [] - }, - { - "label": "一貫性レベル", - "id": "consistency.md", - "order": 12, - "children": [] - }, - { - "label": "メートル法", - "id": "metric.md", - "order": 13, - "children": [] - }, { "label": "メタデータのフィルタリング", "id": "boolean.md", @@ -736,26 +732,6 @@ "children": [] } ] - }, - { - "label": "データインポート", - "id": "data_import", - "order": 6, - "isMenu": true, - "children": [ - { - "label": "ソースデータの準備", - "id": "prepare-source-data.md", - "order": 0, - "children": [] - }, - { - "label": "インポートデータ", - "id": "import-data.md", - "order": 1, - "children": [] - } - ] } ] }, @@ -826,7 +802,7 @@ "children": [] }, { - "label": "講師", + "label": "インストラクター", "id": "embed-with-instructor.md", "order": 9, "children": [] @@ -897,11 +873,31 @@ } ] }, + { + "label": "データインポート", + "id": "data_import", + "order": 5, + "isMenu": true, + "children": [ + { + "label": "ソースデータの準備", + "id": "prepare-source-data.md", + "order": 0, + "children": [] + }, + { + "label": "インポートデータ", + "id": "import-data.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Milvus移住", "id": "milvus_migration", "isMenu": true, - "order": 5, + "order": 6, "children": [ { "label": "概要", @@ -1321,10 +1317,30 @@ } ] }, + { + "label": "ストレージの最適化", + "id": "storage_optimization", + "order": 10, + "isMenu": true, + "children": [ + { + "label": "mmapを使用する", + "id": "mmap.md", + "order": 0, + "children": [] + }, + { + "label": "クラスタリング・コンパクション", + "id": "clustering-compaction.md", + "order": 1, + "children": [] + } + ] + }, { "label": "セキュリティ", "id": "security", - "order": 10, + "order": 11, "isMenu": true, "children": [ { diff --git a/localization/v2.5.x/site/ja/release_notes.md b/localization/v2.5.x/site/ja/release_notes.md index e236da589..1a092a217 100644 --- a/localization/v2.5.x/site/ja/release_notes.md +++ b/localization/v2.5.x/site/ja/release_notes.md @@ -43,14 +43,14 @@ title: リリースノート
  • 2.5.0ベータ2.5.02.5.02.5.0
    -

    Milvus 2.5.0-betaは、ベクトル検索や大規模なデータ管理を扱うユーザーにとって、ユーザビリティ、スケーラビリティ、パフォーマンスを向上させるための大きな進歩をもたらします。本リリースにより、Milvusはタームベース検索、最適化されたクエリのためのクラスタリングコンパクション、スパースおよびデンスベクトル検索メソッドの多目的なサポートといった強力な新機能を統合しました。クラスタ管理、インデックス作成、データ処理の強化により、Milvusは新たなレベルの柔軟性と使いやすさを導入し、より堅牢で使いやすいベクトルデータベースとなりました。

    +

    Milvus 2.5.0-betaは、ベクトル検索や大規模なデータ管理を扱うユーザーのために、ユーザビリティ、スケーラビリティ、パフォーマンスを向上させる重要な進歩をもたらします。本リリースにより、Milvusはタームベース検索、最適化されたクエリのためのクラスタリングコンパクション、スパースおよびデンスベクトル検索メソッドの多様なサポートといった強力な新機能を統合しました。クラスタ管理、インデックス作成、データ処理の強化により、Milvusは新たなレベルの柔軟性と使いやすさを導入し、より堅牢で使いやすいベクトルデータベースとなりました。

    主な機能

    Milvus2.5はSparse-BM25で実装された全文検索に対応しています!この機能は、Milvusの強力なセマンティック検索機能を補完する重要な機能であり、特に希少語や専門用語が含まれるシナリオで威力を発揮します。以前のバージョンでは、Milvusはキーワード検索シナリオを支援するためにスパースベクトルをサポートしていました。これらのスパースベクトルはSPLADEv2/BGE-M3のようなニューラルモデルやBM25アルゴリズムのような統計モデルによってMilvusの外部で生成されていました。

    Milvus 2.5ではトークン化とスパースベクトル抽出が組み込まれ、APIは入力としてベクトルを受け取るだけでなく、テキストを直接受け取れるように拡張されました。BM25統計情報は、データが挿入されるとリアルタイムで更新され、ユーザビリティと精度が向上します。さらに、近似最近傍(ANN)アルゴリズムに基づくスパースベクトルは、標準的なキーワード検索システムよりも強力なパフォーマンスを提供します。

    詳細は全文検索を参照。

    クラスタ管理WebUI(ベータ版)

    膨大なデータと豊富な機能をより良くサポートするために、Milvusの洗練された設計には様々な依存関係、多数のノードの役割、複雑なデータ構造などが含まれています。このような側面は、使用やメンテナンスに困難をもたらす可能性があります。

    Milvus 2.5では、組み込みのクラスタ管理WebUIを導入し、Milvusの複雑な実行環境情報を可視化することで、システムメンテナンスの難易度を下げています。これにはデータベースやコレクション、セグメント、チャネル、依存関係、ノードのヘルスステータス、タスク情報、スロークエリなどの詳細が含まれます。

    テキストマッチ

    Milvus2.5は、Tantivyのアナライザとインデックスを活用してテキストの前処理とインデックスを構築し、特定の用語に基づいたテキストデータの正確な自然言語マッチングをサポートします。この機能は、主に特定の条件を満たすフィルタリング検索に使用され、スカラーフィルタリングを組み込んでクエリ結果を絞り込むことができるため、スカラー条件を満たすベクトル内の類似検索が可能です。

    -

    詳細については、キーワードマッチを参照してください。

    +

    詳細はテキストマッチを参照。

    ビットマップインデックス

    Milvusファミリーに新しいスカラーデータインデックスが追加されました。BitMap インデックスは行数と同じ長さのビットの配列を使用して値の存在を表し、検索を高速化します。

    ビットマップインデックスは伝統的に、値の数が少ない、つまり、性別情報を含むカラムの値が男性と女性の2つしかないような、カーディナリティの低いフィールドに有効であった。

    詳細はビットマップインデックスを参照してください。

    @@ -60,7 +60,7 @@ title: リリースノート

    FaissベースのHNSW SQ/PQ/PRQ

    Faissコミュニティとの緊密な連携により、FaissのHNSWアルゴリズムは、機能と性能の両面で大幅に改善されました。安定性と保守性を考慮し、Milvus 2.5はHNSWのサポートをhnswlibからFaissに正式に移行しました。

    Faissに基づき、Milvus 2.5はHNSWの複数の量子化方式をサポートし、様々なシナリオのニーズに応えます:SQ (Scalar Quantizers)、PQ (Product Quantizer)、PRQ (Product Residual Quantizer)です。SQとPQはより一般的で、SQは優れたクエリ性能と構築速度を提供し、PQは同じ圧縮率でより優れたリコールを提供する。多くのベクトルデータベースでは、SQ量子化の単純な形式であるバイナリ量子化が一般的に使用されている。

    PRQはPQとAQ(Additive Quantizer)の融合である。PQと比較すると、特にバイナリ圧縮と言って、高い圧縮率でより良いリコールを実現するために、より長い構築時間を必要とする。

    -

    クラスタリング圧縮(ベータ)

    Milvus2.5では、大規模なコレクションの検索を高速化し、コストを削減するために、クラスタリングコンパクションが導入された。クラスタリングキーとしてスカラーフィールドを指定することで、データを範囲ごとに再分散し、保存と検索を最適化します。グローバルインデックスのように動作するこの機能により、Milvusはクラスタリングメタデータに基づいたクエリ時に効率的にデータを刈り込み、スカラーフィルタが適用された際の検索パフォーマンスを向上させることができます。

    +

    クラスタリング圧縮(ベータ)

    Milvus2.5では、大規模なコレクションの検索を高速化し、コストを削減するために、クラスタリングコンパクションが導入された。クラスタリングキーとしてスカラーフィールドを指定することで、データを範囲ごとに再分散し、保存と検索を最適化します。グローバルインデックスのように機能するこの機能により、Milvusはクラスタリングメタデータに基づいたクエリ時に効率的にデータを刈り込み、スカラーフィルタが適用された際の検索パフォーマンスを向上させることができます。

    詳細はクラスタリング・コンパクションをご参照ください。

    その他の機能

    ストリーミングノード(ベータ版)

    Milvus 2.5では、Write-Ahead Logging (WAL)サービスを提供するストリーミングノードという新しいコンポーネントが導入されました。これにより、Milvusはチャネルの読み書きの前後でコンセンサスを得ることができるようになり、新たな機能、特徴、最適化を実現します。この機能はMilvus 2.5ではデフォルトで無効になっており、バージョン3.0で正式に利用可能になる。

    IPv6サポート

    MilvusはIPv6をサポートし、ネットワーク接続と互換性の拡張を可能にしました。

    diff --git a/localization/v2.5.x/site/ja/tutorials/hybrid_search_with_milvus.md b/localization/v2.5.x/site/ja/tutorials/hybrid_search_with_milvus.md index b4715a6f3..8555d2d99 100644 --- a/localization/v2.5.x/site/ja/tutorials/hybrid_search_with_milvus.md +++ b/localization/v2.5.x/site/ja/tutorials/hybrid_search_with_milvus.md @@ -25,7 +25,7 @@ title: Milvusを使ったハイブリッド検索

    Milvusは密検索、疎検索、ハイブリッド検索をサポートしています:

    • 密検索:クエリの背後にある意味を理解するためにセマンティックコンテキストを利用します。
    • -
    • スパース検索:キーワードのマッチングを重視し、全文検索に相当する特定の用語に基づいた検索結果を得る。
    • +
    • スパース検索:全文検索に相当する、特定の用語に基づいた結果を見つけるためのテキストマッチングに重点を置く。
    • ハイブリッド検索:DenseとSparseの両アプローチを組み合わせ、包括的な検索結果のために完全な文脈と特定のキーワードを捕捉する。

    Milvusハイブリッド検索は、これらの手法を統合することで、意味的な類似性と語彙的な類似性のバランスをとり、検索結果の全体的な関連性を向上させます。このノートブックでは、これらの検索ストラテジーのセットアップと使用方法を説明し、様々な検索シナリオにおける有効性を強調します。

    diff --git a/localization/v2.5.x/site/ja/userGuide/collections/manage-collections.md b/localization/v2.5.x/site/ja/userGuide/collections/manage-collections.md index 3fd1e7f21..068976557 100644 --- a/localization/v2.5.x/site/ja/userGuide/collections/manage-collections.md +++ b/localization/v2.5.x/site/ja/userGuide/collections/manage-collections.md @@ -161,9 +161,9 @@ title: コレクションの説明
  • 検索反復子

  • クエリー

  • 全文検索

  • -
  • キーワードマッチ

  • +
  • テキストマッチ

  • -

    また、Milvusでは検索のパフォーマンスと効率を向上させるための拡張機能も提供しております。これらはデフォルトでは無効になっており、お客様のサービス要件に応じて有効にしたり、使用したりすることができます。それらは以下の通りです。

    +

    さらに、Milvusは検索パフォーマンスと効率を向上させるための拡張機能も提供しています。これらはデフォルトでは無効になっており、お客様のサービス要件に応じて有効にしたり、使用したりすることができます。それらは以下の通りです。

    • パーティションキーの使用

    • mmapの使用

    • diff --git a/localization/v2.5.x/site/ja/userGuide/manage-indexes/index-with-gpu.md b/localization/v2.5.x/site/ja/userGuide/manage-indexes/index-with-gpu.md index 20cf8d2af..87b0a0bb8 100644 --- a/localization/v2.5.x/site/ja/userGuide/manage-indexes/index-with-gpu.md +++ b/localization/v2.5.x/site/ja/userGuide/manage-indexes/index-with-gpu.md @@ -67,7 +67,7 @@ title: GPUによるインデックス

      インデックスパラメータの準備

      GPU インデックスパラメータを設定する際に、index_typemetric_typeparams を定義します:

      • index_type(文字列):index_type (string): ベクトル探索を加速するために使用するインデックスのタイプ。有効なオプションはGPU_CAGRAGPU_IVF_FLATGPU_IVF_PQGPU_BRUTE_FORCEです。

      • -
      • metric_type(文字列):ベクトルの類似度を測定するために使用されるメトリクスのタイプ。有効なオプションはIPL2 です。

      • +
      • metric_type(文字列):ベクトルの類似度を測定するために使用するメトリクスのタイプ。有効なオプションはIPL2 です。

      • params(dict):インデックス固有の構築パラメータ。このパラメータに有効なオプションは、インデックスの種類に依存します。

      以下は、異なるインデックス・タイプの構成例です:

      @@ -102,7 +102,7 @@ title: GPUによるインデックス } }
      -

      paramsオプションは、IVF_FLATIVF_PQ で使われているものと同じです。

      +

      paramsオプションはIVF_FLATおよびIVF_PQ で使用されるものと同じです。

    • GPU_BRUTE_FORCEインデックス

      index_params = {
           'index_type': 'GPU_BRUTE_FORCE',
      @@ -201,7 +201,7 @@ collection.search(
             
           

      GPU インデックスを使用する場合、特定の制約に注意してください:

        -
      • GPU_IVF_FLAT の場合、limitの最大値は 256 です。

      • +
      • GPU_IVF_FLAT の場合、limitの最大値は 1024 です。

      • GPU_IVF_PQGPU_CAGRA の場合、limitの最大値は 1024 です。

      • GPU_BRUTE_FORCE にはlimitの上限は設定されていませんが、潜在的なパ フォーマンスの問題を避けるために 4096 を超えないことが推奨されます。

      • 現在、GPUインデックスはCOSINE距離をサポートしていません。COSINE 距離が必要な場合は、まずデータを正規化し、それから内積 (IP) 距離で代用することができます。

      • diff --git a/localization/v2.5.x/site/ja/userGuide/schema/analyzer/analyzer-overview.md b/localization/v2.5.x/site/ja/userGuide/schema/analyzer/analyzer-overview.md index 57e7837f5..647eaec54 100644 --- a/localization/v2.5.x/site/ja/userGuide/schema/analyzer/analyzer-overview.md +++ b/localization/v2.5.x/site/ja/userGuide/schema/analyzer/analyzer-overview.md @@ -20,12 +20,12 @@ summary: >- >

        テキスト処理において、アナライザーは生テキストを構造化された検索可能な形式に変換する重要なコンポーネントである。アナライザーは通常、トークナイザーと フィルターという2つのコア要素で構成される。これらは共に入力テキストをトークンに変換し、トークンを洗練させ、効率的なインデックス作成と検索に備えます。

        -

        Milvusでは、アナライザはコレクション作成時にVARCHAR フィールドをコレクションスキーマに追加する際に設定されます。アナライザによって生成されたトークンは、キーワードマッチングのためのインデックスを構築するために使用したり、全文検索のためにスパース埋め込みに変換したりすることができます。詳細については、キーワード・マッチまたは全文検索を参照してください。

        +

        Milvusでは、アナライザはコレクション作成時にVARCHAR フィールドをコレクションスキーマに追加する際に設定されます。アナライザによって生成されたトークンは、テキストマッチングのためのインデックスを構築するために使用したり、全文検索のためにスパース埋め込みに変換したりすることができます。詳細については、テキスト・マッチまたは全文検索を参照してください。

        -

        アナライザの使用は、パフォーマンスに影響を与える場合があります。

        +

        アナライザーの使用はパフォーマンスに影響する場合があります。

        • 全文検索:全文検索:全文検索の場合、DataNodeとQueryNodeチャネルはトークン化の完了を待つ必要があるため、データの消費が遅くなります。その結果、新しく取り込まれたデータが検索に利用できるようになるまでに時間がかかる。

        • -
        • キーワードマッチ:キーワードマッチの場合、インデックスを構築する前にトークン化が完了する必要があるため、インデックス作成も遅くなります。

        • +
        • テキストマッチ:テキスト照合の場合、インデックスを構築する前にトークン化を完了する必要があるため、インデックス作成も遅くなります。

        アナライザーの構造

        Milvusでは、様々なテキスト処理のニーズに対応するため、2種類のアナライザを提供しています。

          -
        • 内蔵アナライザ:ビルトイン アナライザ: 最小限のセットアップで一般的なテキスト処理タスクをカバーする、定義済みのコンフィギュレーションです。複雑な設定が不要なため、汎用的な検索に最適です。

        • +
        • 内蔵アナライザ:ビルトイン アナライザ: 最小限のセットアップで一般的なテキスト処理タスクに対応する定義済みの設定です。複雑な設定が不要なため、汎用的な検索に最適です。

        • カスタムアナライザー:より高度な要件に対応するカスタム・アナライザでは、トークナイザとゼロ個以上のフィルタの両方を指定することで、独自の設定を定義できます。このレベルのカスタマイズは、テキスト処理を正確に制御する必要がある特殊なユースケースで特に役立ちます。

        コレクション作成時にアナライザ設定を省略した場合、Milvusはデフォルトですべてのテキスト処理にstandard アナライザを使用します。詳細については、「標準」を参照してください。

        内蔵アナライザ

        Milvusのビルトインアナライザは、特定のトークナイザやフィルタがあらかじめ設定されており、これらのコンポーネントを自分で定義することなく、すぐに使用することができます。各ビルトインアナライザは、予め設定されたトークナイザーとフィルタを含むテンプレートとして機能し、カスタマイズのためのオプションパラメータが用意されています。

        -

        たとえば、standard 組み込み解析器を使用するには、standard という名前をtype と指定し、オプションでstop_words など、この解析器タイプに固有の追加設定を含めるだけです。

        +

        たとえば、standard 組み込み解析器を使用するには、standard という名前をtype として指定し、オプションでstop_words など、この解析器タイプに固有の追加設定を含めるだけです。

        analyzer_params = {​
             "type": "standard", # Uses the standard built-in analyzer​
             "stop_words": ["a", "an", "for"] # Defines a list of common words (stop words) to exclude from tokenization​
        @@ -101,7 +101,7 @@ summary: >-
         
      • chinese:中国語のテキスト処理に特化し、中国語の言語構造に適応したトークン化を含む。

      カスタムアナライザー

      より高度なテキスト処理のために、Milvusのカスタムアナライザーでは、トークナイザーとフィルターの両方を指定することで、独自のテキスト処理パイプラインを構築することができます。この設定は、精密な制御が必要な特殊なユースケースに最適です。

      -

      トークナイザー

      トークナイザーはカスタムアナライザーに必須のコンポーネントで、入力テキストを個別の単位(トークン)に分解することでアナライザーパイプラインを開始します。トークン化は、トークナイザーのタイプに応じて、空白や句読点による分割など、特定のルールに従います。この処理により、各単語や語句をより正確かつ独立して処理できるようになります。

      +

      トークナイザー

      トークナイザーはカスタムアナライザーに必須のコンポーネントで、入力テキストを個別の単位(トークン)に分解することでアナライザーパイプラインを開始します。トークン化は、トークナイザーのタイプに応じて、空白や句読点による分割など、特定のルールに従います。この処理により、各単語や語句をより正確かつ独立に扱うことができます。

      たとえば、トークナイザーはテキスト"Vector Database Built for Scale" を個別のトークンに変換します。

      ["Vector", "Database", "Built", "for", "Scale"]​
       
      diff --git a/localization/v2.5.x/site/ja/userGuide/schema/sparse_vector.json b/localization/v2.5.x/site/ja/userGuide/schema/sparse_vector.json index e7afd45fb..4e6e3bb01 100644 --- a/localization/v2.5.x/site/ja/userGuide/schema/sparse_vector.json +++ b/localization/v2.5.x/site/ja/userGuide/schema/sparse_vector.json @@ -1 +1 @@ -{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"スパース・ベクトル","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"概要","href":"Overview​","type":2,"isActive":false},{"label":"Milvusでスパースベクトルを使う","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"スパース・ベクトル","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"概要","href":"Overview​","type":2,"isActive":false},{"label":"Milvusでスパースベクトルを使う","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false},{"label":"限界","href":"Limits","type":2,"isActive":false},{"label":"よくある質問","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ja/userGuide/schema/sparse_vector.md b/localization/v2.5.x/site/ja/userGuide/schema/sparse_vector.md index 9714f83ba..e7ef63038 100644 --- a/localization/v2.5.x/site/ja/userGuide/schema/sparse_vector.md +++ b/localization/v2.5.x/site/ja/userGuide/schema/sparse_vector.md @@ -48,13 +48,13 @@ summary: >- 疎ベクトル表現

      スパースベクトルは、テキスト処理におけるTF-IDF(項頻度-逆文書頻度)やBM25など、様々な手法を用いて生成することができます。さらに、Milvusはスパースベクトルの生成と処理を支援する便利なメソッドを提供しています。詳細は埋め込みをご参照ください。

      Milvusはテキストデータの全文検索機能も提供しており、スパースベクトルを生成するために外部の埋め込みモデルを使用することなく、生のテキストデータに対して直接ベクトル検索を行うことができます。詳細は全文検索をご参照ください。

      -

      ベクトル化後、データはMilvusに保存され、管理およびベクトル検索を行うことができます。下図は基本的なプロセスを示しています。

      +

      ベクトル化されたデータはMilvusに保存され、管理やベクトル検索に利用することができます。下図は基本的なプロセスを示しています。

      Use sparse vector in Milvus Milvusでスパースベクトルを使用する。

      -

      Milvusはスパースベクトル以外にも、デンスベクトルやバイナリベクトルにも対応しています。密なベクトルは深い意味的関係を把握するのに適しており、バイナリベクトルは迅速な類似性比較やコンテンツの重複排除などのシナリオに優れています。詳細については、密なベクトルと バイナリベクトルを参照してください。

      +

      Milvusはスパースベクトル以外にも、デンスベクトルやバイナリベクトルにも対応しています。密なベクトルは深い意味的関係を把握するのに理想的であり、バイナリベクトルは迅速な類似性比較やコンテンツの重複排除などのシナリオに優れています。詳細については、密なベクトルと バイナリベクトルを参照してください。

      Milvusでスパースベクトルを使う

      -

      類似検索パラメーターの詳細については、「ANN検索の基本」を参照のこと。

      +

      類似検索パラメーターの詳細については、「基本的なANN検索」を参照のこと。

      +

      限界

      Milvusでスパースベクトルを使用する場合、以下の制限を考慮してください:

      +
        +
      • 現在、スパースベクトルではIP距離メトリックのみがサポートされています。スパースベクトルは次元が高いため、L2距離や余弦距離は実用的ではありません。

      • +
      • 疎なベクトル・フィールドでは、SPARSE_INVERTED_INDEXSPARSE_WANDインデックス型のみがサポートされています。

      • +
      • スパース・ベクトルでサポートされるデータ型:

        +
          +
        • 次元部は符号なし32ビット整数でなければならない;
        • +
        • 値部は非負32ビット浮動小数点数。
        • +
      • +
      • スパース・ベクトルは、挿入と検索に関して以下の要件を満たす必要があります:

        +
          +
        • ベクトル内の少なくとも1つの値が非ゼロである;
        • +
        • ベクトルの添字が非負であること。
        • +
      • +
      +

      よくある質問

        +
      • SPARSE_INVERTED_INDEX と SPARSE_WAND の違いと、その選択方法を教えてください。

        +

        SPARSE_INVERTED_INDEXは伝統的な転置インデックスで、SPARSE_WANDは Weak-ANDアルゴリズムを使用して検索中のフルIP距離評価数を減らします。SPARSE_WANDは一般的に高速ですが、ベクトル密度が高くなるにつれて性能が低下する可能性があります。どちらかを選択するには、特定のデータセットとユースケースに基づいた実験とベンチマークを実施してください。

      • +
      • drop_ratio_buildとdrop_ratio_searchパラメータはどのように選択すればよいですか?

        +

        drop_ratio_buildと drop_ratio_searchの選択は、データの特性や、検索レイテンシー/スループット、精度に対する要件に依存します。

      • +
      • スパース埋込みの次元は、uint32空間内の任意の離散値にすることができますか?

        +

        はい,ただし1つの例外があります.スパース埋込みの次元は,[0, maximum of uint32) の範囲内の任意の値にすることができます. つまり,uint32の最大値を使うことはできません.

      • +
      • 成長しているセグメントの検索は、インデックスを使って行うのですか?

        +

        成長中のセグメントを検索する際には、セグメントインデックスと同じ型のインデックスを使用します。インデックスが作成される前の新しい成長中のセグメントについては、 総当たり検索を使用します。

      • +
      • 1つのコレクションに、疎なベクトルと密なベクトルの両方を持つことは可能ですか?

        +

        はい、複数のベクトル型をサポートしているため、疎なベクトル列と密なベクトル列の両方を持つコレクションを作成し、それに対してハイブリッド検索を実行することができます。

      • +
      diff --git a/localization/v2.5.x/site/ja/userGuide/search-query-get/boolean.md b/localization/v2.5.x/site/ja/userGuide/search-query-get/boolean.md index ebe47ada2..13e61a4e0 100644 --- a/localization/v2.5.x/site/ja/userGuide/search-query-get/boolean.md +++ b/localization/v2.5.x/site/ja/userGuide/search-query-get/boolean.md @@ -696,9 +696,9 @@ curl --request POST \​

      Match operators​

      Match operators include:​

      • like: Match constants or prefixes (prefix%), infixes (%infix%), and suffixes (%suffix) within constants. It relies on a brute-force search mechanism using wildcards and does not involve text tokenization. While it can achieve exact matches, its query efficiency is relatively low, making it suitable for simple matching tasks or queries on smaller datasets.​

      • -
      • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​ -

        -

        To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to ​Keyword Match.​

        +
      • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​

        +

        +

        To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to Text Match.​

      Example 1: Apply filter on scalar field​

      The following example demonstrates how to filter products whose color is red. In this case, you can quickly filter all red products by matching the prefix 'red%’. Similarly, you can use the expression color in ['red_7025’, 'red_4794’, ‘red_9392’] to filter all red products. However, when the data is more complex, we recommend using the like operator for more efficient filtering.​

      @@ -853,8 +853,8 @@ curl --request POST \​ ]​
      -

      Example 3: Keyword match on VARCHAR fields​

      The TEXT_MATCH expression is used for keyword match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to ​Keyword Match.​

      -

      The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the keyword "Apple" or "iPhone":​

      +

      Example 3: Text match on VARCHAR fields​

      The TEXT_MATCH expression is used for text match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to Text Match.​

      +

      The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the term "Apple" or "iPhone":​

      Python Java diff --git a/localization/v2.5.x/site/ja/userGuide/search-query-get/full-text-search.json b/localization/v2.5.x/site/ja/userGuide/search-query-get/full-text-search.json index c2e4693f2..49acea461 100644 --- a/localization/v2.5.x/site/ja/userGuide/search-query-get/full-text-search.json +++ b/localization/v2.5.x/site/ja/userGuide/search-query-get/full-text-search.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","MilvusClient.insert('demo', [​\n {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​\n {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​\n {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​\n])​\n\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['Who started AI research?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"全文検索","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"概要","href":"Overview​","type":2,"isActive":false},{"label":"全文検索用のコレクションの作成","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"テキストデータの挿入","href":"Insert-text-data","type":2,"isActive":false},{"label":"全文検索の実行","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .build();\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"sparse\")\n .dataType(DataType.SparseFloatVector)\n .build());\n","import { MilvusClient, DataType } from \"@zilliz/milvus2-sdk-node\";\n\nconst address = \"http://localhost:19530\";\nconst token = \"root:Milvus\";\nconst client = new MilvusClient({address, token});\nconst schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n\nconsole.log(res.results)\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","import io.milvus.common.clientenum.FunctionType;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq.Function;\n\nimport java.util.*;\n\nschema.addFunction(Function.builder()\n .functionType(FunctionType.BM25)\n .name(\"text_bm25_emb\")\n .inputFieldNames(Collections.singletonList(\"text\"))\n .outputFieldNames(Collections.singletonList(\"vector\"))\n .build());\n","const functions = [\n {\n name: 'text_bm25_emb',\n description: 'bm25 function',\n type: FunctionType.BM25,\n input_field_names: ['text'],\n output_field_names: ['vector'],\n params: {},\n },\n];\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ],\n \"functions\": [\n {\n \"name\": \"text_bm25_emb\",\n \"type\": \"BM25\",\n \"inputFieldNames\": [\"text\"],\n \"outputFieldNames\": [\"sparse\"],\n \"params\": {}\n }\n ]\n }'\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","import io.milvus.v2.common.IndexParam;\n\nList indexes = new ArrayList<>();\nindexes.add(IndexParam.builder()\n .fieldName(\"sparse\")\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)\n .metricType(IndexParam.MetricType.BM25)\n .build());\n","const index_params = [\n {\n fieldName: \"sparse\",\n metricType: \"BM25\",\n indexType: \"AUTOINDEX\",\n },\n];\n","export indexParams='[\n {\n \"fieldName\": \"sparse\",\n \"metricType\": \"BM25\",\n \"indexType\": \"AUTOINDEX\"\n }\n ]'\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()\n .collectionName(\"demo\")\n .collectionSchema(schema)\n .indexParams(indexes)\n .build();\nclient.createCollection(requestCreate);\n","await client.create_collection(\n collection_name: 'demo', \n schema: schema, \n index_params: index_params\n);\n","export CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d \"{\n \\\"collectionName\\\": \\\"demo\\\",\n \\\"schema\\\": $schema,\n \\\"indexParams\\\": $indexParams\n}\"\n","client.insert('demo', [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n])\n\n","import com.google.gson.Gson;\nimport com.google.gson.JsonObject;\n\nimport io.milvus.v2.service.vector.request.InsertReq;\n\nGson gson = new Gson();\nList rows = Arrays.asList(\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval is a field of study.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval focuses on finding relevant information in large datasets.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"data mining and information retrieval overlap in research.\\\"}\", JsonObject.class)\n);\n\nclient.insert(InsertReq.builder()\n .collectionName(\"demo\")\n .data(rows)\n .build());\n","await client.insert({\ncollection_name: 'demo', \ndata: [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n]);\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"data\": [\n {\"text\": \"information retrieval is a field of study.\"},\n {\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"},\n {\"text\": \"data mining and information retrieval overlap in research.\"} \n ],\n \"collectionName\": \"demo\"\n}'\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['whats the focus of information retrieval?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n","import io.milvus.v2.service.vector.request.SearchReq;\nimport io.milvus.v2.service.vector.request.data.EmbeddedText;\nimport io.milvus.v2.service.vector.response.SearchResp;\n\nMap searchParams = new HashMap<>();\nsearchParams.put(\"drop_ratio_search\", 0.6);\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"demo\")\n .data(Collections.singletonList(new EmbeddedText(\"whats the focus of information retrieval?\")))\n .annsField(\"sparse\")\n .topK(3)\n .searchParams(searchParams)\n .outputFields(Collections.singletonList(\"text\"))\n .build());\n","await client.search(\n collection_name: 'demo', \n data: ['whats the focus of information retrieval?'],\n anns_field: 'sparse',\n limit: 3,\n params: {'drop_ratio_search': 0.6},\n)\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"demo\",\n \"data\": [\n \"whats the focus of information retrieval?\"\n ],\n \"annsField\": \"sparse\",\n \"limit\": 3,\n \"outputFields\": [\n \"text\"\n ],\n \"searchParams\":{\n \"params\":{\n \"drop_ratio_search\":0.6\n }\n }\n}'\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"全文検索","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"概要","href":"Overview​","type":2,"isActive":false},{"label":"全文検索用のコレクションの作成","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"テキストデータの挿入","href":"Insert-text-data","type":2,"isActive":false},{"label":"全文検索の実行","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ja/userGuide/search-query-get/full-text-search.md b/localization/v2.5.x/site/ja/userGuide/search-query-get/full-text-search.md index 9e864fb99..1bc087e0d 100644 --- a/localization/v2.5.x/site/ja/userGuide/search-query-get/full-text-search.md +++ b/localization/v2.5.x/site/ja/userGuide/search-query-get/full-text-search.md @@ -44,7 +44,7 @@ summary: 全文検索とは、テキストデータセット中の特定の語
    • テキスト入力:テキスト入力: 生のテキスト文書を挿入するか、クエリーテキストを提供します。

    • テキスト分析:Milvusはアナライザーを使って、入力テキストを検索可能な個々の用語にトークン化します。

    • 関数処理:組み込み関数がトークン化された用語を受け取り、スパースベクトル表現に変換します。

    • -
    • コレクションストア:Milvusはこれらのスパース埋め込みをコレクションに保存し、効率的な検索を可能にする。

    • +
    • コレクションストア:Milvusは効率的な検索のために、これらのスパース埋め込みをコレクションに保存する。

    • BM25スコアリング:検索中、MilvusはBM25アルゴリズムを適用して保存された文書のスコアを計算し、クエリテキストとの関連性に基づいてマッチした結果をランク付けします。

    • @@ -75,10 +75,12 @@ summary: 全文検索とは、テキストデータセット中の特定の語

      全文検索を有効にするには、特定のスキーマを持つコレクションを作成します。このスキーマには3つの必要なフィールドが含まれていなければなりません。

      • コレクション内の各エンティティを一意に識別するプライマリ・フィールド。

      • -
      • 生のテキスト文書を格納するVARCHAR フィールド。enable_analyzer 属性はTrue に設定されている。これにより、milvus はテキストを機能処理のために特定の用語にトークン化することができる。

      • +
      • 生のテキスト文書を格納するVARCHAR フィールド。enable_analyzer 属性はTrue に設定されている。これにより、milvus はテキストを特定の用語にトークン化し、機能処理を行うことができる。

      • MilvusがVARCHAR フィールド用に自動生成するスパース埋め込みを格納するために予約されたSPARSE_FLOAT_VECTOR フィールド。

      コレクションスキーマの定義

      まず、スキーマを作成し、必要なフィールドを追加する。

      +
      from pymilvus import MilvusClient, DataType, Function, FunctionType​
       ​
       schema = MilvusClient.create_schema()​
      @@ -88,13 +90,89 @@ schema.add_field(field_name="text", d
       schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)​
       
       
      -

      この構成では

      +
      import io.milvus.v2.common.DataType;
      +import io.milvus.v2.service.collection.request.AddFieldReq;
      +import io.milvus.v2.service.collection.request.CreateCollectionReq;
      +
      +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
      +        .build();
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("id")
      +        .dataType(DataType.Int64)
      +        .isPrimaryKey(true)
      +        .autoID(true)
      +        .build());
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("text")
      +        .dataType(DataType.VarChar)
      +        .maxLength(1000)
      +        .enableAnalyzer(true)
      +        .build());
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("sparse")
      +        .dataType(DataType.SparseFloatVector)
      +        .build());
      +
      +
      import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
      +
      +const address = "http://localhost:19530";
      +const token = "root:Milvus";
      +const client = new MilvusClient({address, token});
      +const schema = [
      +  {
      +    name: "id",
      +    data_type: DataType.Int64,
      +    is_primary_key: true,
      +  },
      +  {
      +    name: "text",
      +    data_type: "VarChar",
      +    enable_analyzer: true,
      +    enable_match: true,
      +    max_length: 1000,
      +  },
      +  {
      +    name: "sparse",
      +    data_type: DataType.SparseFloatVector,
      +  },
      +];
      +
      +
      +console.log(res.results)
      +
      +
      export schema='{
      +        "autoId": true,
      +        "enabledDynamicField": false,
      +        "fields": [
      +            {
      +                "fieldName": "id",
      +                "dataType": "Int64",
      +                "isPrimary": true
      +            },
      +            {
      +                "fieldName": "text",
      +                "dataType": "VarChar",
      +                "elementTypeParams": {
      +                    "max_length": 1000,
      +                    "enable_analyzer": true
      +                }
      +            },
      +            {
      +                "fieldName": "sparse",
      +                "dataType": "SparseFloatVector"
      +            }
      +        ]
      +    }'
      +
      +

      この設定では

      • id: は主キーとして機能し、auto_id=True で自動的に生成される。

      • -
      • textこの構成では、 : が主キーとなり、 で自動的に生成されます。 : には、全文検索操作のための生のテキストデータが格納されます。データ型はVARCHARVARCHAR はMilvusのテキスト保存用の文字列データ型です。Milvusがテキストをトークン化できるようにするには、enable_analyzer=True を設定します。デフォルトでは、Milvusはテキスト分析に標準アナライザを使用します。別の解析器を設定するには、概要を参照してください。

      • +
      • textこの構成では、 : が主キーとなり、 で自動的に生成されます。 : には、全文検索操作のための生のテキスト・データが格納されます。データ型はVARCHAR でなければなりません。VARCHAR はMilvusのテキスト保存用の文字列データ型です。Milvus がテキストをトークン化できるようにするにはenable_analyzer=True を設定します。デフォルトでは、Milvusはテキスト分析に標準アナライザを使用します。別の解析器を設定するには、概要を参照してください。

      • sparse全文検索操作のために内部で生成されたスパース埋め込みを格納するために予約されたベクトルフィールド。データ型はSPARSE_FLOAT_VECTOR でなければなりません。

      次に、テキストをスパース・ベクトル表現に変換する関数を定義し、スキーマに追加します。

      +
      bm25_function = Function(​
           name="text_bm25_emb", # Function name​
           input_field_names=["text"], # Name of the VARCHAR field containing raw text data​
      @@ -104,12 +182,68 @@ schema.add_field(field_name="sparse",
       ​
       schema.add_function(bm25_function)​
       
      +
      +
      import io.milvus.common.clientenum.FunctionType;
      +import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
      +
      +import java.util.*;
      +
      +schema.addFunction(Function.builder()
      +        .functionType(FunctionType.BM25)
      +        .name("text_bm25_emb")
      +        .inputFieldNames(Collections.singletonList("text"))
      +        .outputFieldNames(Collections.singletonList("vector"))
      +        .build());
      +
      +
      const functions = [
      +    {
      +      name: 'text_bm25_emb',
      +      description: 'bm25 function',
      +      type: FunctionType.BM25,
      +      input_field_names: ['text'],
      +      output_field_names: ['vector'],
      +      params: {},
      +    },
      +];
      +
      +
      export schema='{
      +        "autoId": true,
      +        "enabledDynamicField": false,
      +        "fields": [
      +            {
      +                "fieldName": "id",
      +                "dataType": "Int64",
      +                "isPrimary": true
      +            },
      +            {
      +                "fieldName": "text",
      +                "dataType": "VarChar",
      +                "elementTypeParams": {
      +                    "max_length": 1000,
      +                    "enable_analyzer": true
      +                }
      +            },
      +            {
      +                "fieldName": "sparse",
      +                "dataType": "SparseFloatVector"
      +            }
      +        ],
      +        "functions": [
      +            {
      +                "name": "text_bm25_emb",
      +                "type": "BM25",
      +                "inputFieldNames": ["text"],
      +                "outputFieldNames": ["sparse"],
      +                "params": {}
      +            }
      +        ]
      +    }'
       

      パラメータ

      説明

      name

      -

      関数の名前。この関数は、text フィールドの生テキストを、sparse フィールドに格納される検索可能なベクトルに変換します。

      +

      関数の名前。この関数は、text フィールドの生のテキストを、sparse フィールドに格納される検索可能なベクトルに変換します。

      input_field_names

      テキストからスパース・ベクトルへの変換を必要とするVARCHAR フィールドの名前。FunctionType.BM25 の場合、このパラメータは1つのフィールド名のみを受け付けます。

      output_field_names

      @@ -120,7 +254,9 @@ schema.add_function(bm25_function)​

      テキストからスパース・ベクトルへの変換が必要な複数のVARCHAR フィールドを持つコレクションの場合は、コレクション・スキーマに個別の関数を追加し、各関数が一意の名前とoutput_field_names 値を持つようにします。

      -

      インデックスの構成

      必要なフィールドと組み込み関数でスキーマを定義した後、コレクションのインデックスを設定します。このプロセスを簡素化するために、index_type としてAUTOINDEX を使用します。このオプションにより、milvus はデータの構造に基づいて最適なインデックスタイプを選択し、設定することができます。

      +

      インデックスの構成

      必要なフィールドと組み込み関数でスキーマを定義した後、コレクションのインデックスを設定します。このプロセスを簡素化するために、AUTOINDEXindex_type として使用します。このオプションを使用すると、Milvusはデータの構造に基づいて最適なインデックスタイプを選択し、設定することができます。

      +
      index_params = MilvusClient.prepare_index_params()​
       ​
       index_params.add_index(​
      @@ -129,23 +265,78 @@ index_params.add_index(​
           metric_type="BM25"​
       )​
       
      +
      +
      import io.milvus.v2.common.IndexParam;
      +
      +List<IndexParam> indexes = new ArrayList<>();
      +indexes.add(IndexParam.builder()
      +        .fieldName("sparse")
      +        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
      +        .metricType(IndexParam.MetricType.BM25)
      +        .build());
      +
      +
      const index_params = [
      +  {
      +    fieldName: "sparse",
      +    metricType: "BM25",
      +    indexType: "AUTOINDEX",
      +  },
      +];
      +
      +
      export indexParams='[
      +        {
      +            "fieldName": "sparse",
      +            "metricType": "BM25",
      +            "indexType": "AUTOINDEX"
      +        }
      +    ]'
       

      パラメータ

      説明

      field_name

      -

      インデックスを作成するベクトルフィールドの名前。全文検索の場合は、生成されたスパース・ベクトルを格納するフィールドである必要があります。この例では、値をsparse に設定します。

      +

      インデックスを作成するベクトルフィールドの名前。全文検索の場合は、生成されたスパース・ベクトルを格納するフィールドでなければなりません。この例では、値をsparse に設定します。

      index_type

      作成するインデックスのタイプです。AUTOINDEX 、Milvusは自動的にインデックス設定を最適化します。インデックス設定をより細かく制御する必要がある場合は、Milvusのスパースベクタで利用可能な様々なインデックスタイプから選択することができます。詳細はMilvusでサポートされるインデックスを参照してください。

      metric_type

      全文検索機能を使用する場合は、このパラメータの値をBM25 に設定する必要があります。

      コレクションの作成

      定義したスキーマとインデックスパラメータを使用してコレクションを作成します。

      +
      MilvusClient.create_collection(​
           collection_name='demo', ​
           schema=schema, ​
           index_params=index_params​
       )​
       
      +
      +
      import io.milvus.v2.service.collection.request.CreateCollectionReq;
      +
      +CreateCollectionReq requestCreate = CreateCollectionReq.builder()
      +        .collectionName("demo")
      +        .collectionSchema(schema)
      +        .indexParams(indexes)
      +        .build();
      +client.createCollection(requestCreate);
      +
      +
      await client.create_collection(
      +    collection_name: 'demo', 
      +    schema: schema, 
      +    index_params: index_params
      +);
      +
      +
      export CLUSTER_ENDPOINT="http://localhost:19530"
      +export TOKEN="root:Milvus"
      +
      +curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +-d "{
      +    \"collectionName\": \"demo\",
      +    \"schema\": $schema,
      +    \"indexParams\": $indexParams
      +}"
       

      テキストデータの挿入

      コレクションとインデックスを設定したら、テキストデータを挿入する準備が整いました。このプロセスでは、生のテキストを提供するだけです。先ほど定義した組み込み関数が、各テキスト・エントリに対応するスパース・ベクトルを自動生成します。

      -
      MilvusClient.insert('demo', [​
      -    {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​
      -    {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​
      -    {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​
      -])​
      +    

      コレクションとインデックスを設定したら、テキストデータを挿入する準備ができた。このプロセスでは、生のテキストを提供するだけでよい。先ほど定義した組み込み関数が、各テキスト・エントリに対応するスパース・ベクトルを自動的に生成します。

      + +
      client.insert('demo', [
      +    {'text': 'information retrieval is a field of study.'},
      +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
      +    {'text': 'data mining and information retrieval overlap in research.'},
      +])
       
      +
      +
      import com.google.gson.Gson;
      +import com.google.gson.JsonObject;
      +
      +import io.milvus.v2.service.vector.request.InsertReq;
      +
      +Gson gson = new Gson();
      +List<JsonObject> rows = Arrays.asList(
      +        gson.fromJson("{\"text\": \"information retrieval is a field of study.\"}", JsonObject.class),
      +        gson.fromJson("{\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"}", JsonObject.class),
      +        gson.fromJson("{\"text\": \"data mining and information retrieval overlap in research.\"}", JsonObject.class)
      +);
      +
      +client.insert(InsertReq.builder()
      +        .collectionName("demo")
      +        .data(rows)
      +        .build());
      +
      +
      await client.insert({
      +collection_name: 'demo', 
      +data: [
      +    {'text': 'information retrieval is a field of study.'},
      +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
      +    {'text': 'data mining and information retrieval overlap in research.'},
      +]);
      +
      +
      curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +-d '{
      +    "data": [
      +        {"text": "information retrieval is a field of study."},
      +        {"text": "information retrieval focuses on finding relevant information in large datasets."},
      +        {"text": "data mining and information retrieval overlap in research."}       
      +    ],
      +    "collectionName": "demo"
      +}'
       

      データをコレクションに挿入したら、生テキストクエリを使用して全文検索を実行できます。milvusは自動的にクエリをスパースベクトルに変換し、マッチした検索結果をBM25アルゴリズムを使ってランク付けし、topK (limit) の結果を返します。

      +

      データをコレクションに挿入したら、生テキストクエリを使用して全文検索を実行できます。Milvusは自動的にクエリをスパースベクトルに変換し、マッチした検索結果をBM25アルゴリズムを使ってランク付けし、トップK(limit)の結果を返します。

      +
      search_params = {​
           'params': {'drop_ratio_search': 0.6},​
       }​
       ​
       MilvusClient.search(​
           collection_name='demo', ​
      -    data=['Who started AI research?'],​
      +    data=['whats the focus of information retrieval?'],​
           anns_field='sparse',​
           limit=3,​
           search_params=search_params​
       )​
       
      +
      +
      import io.milvus.v2.service.vector.request.SearchReq;
      +import io.milvus.v2.service.vector.request.data.EmbeddedText;
      +import io.milvus.v2.service.vector.response.SearchResp;
      +
      +Map<String,Object> searchParams = new HashMap<>();
      +searchParams.put("drop_ratio_search", 0.6);
      +SearchResp searchResp = client.search(SearchReq.builder()
      +        .collectionName("demo")
      +        .data(Collections.singletonList(new EmbeddedText("whats the focus of information retrieval?")))
      +        .annsField("sparse")
      +        .topK(3)
      +        .searchParams(searchParams)
      +        .outputFields(Collections.singletonList("text"))
      +        .build());
      +
      +
      await client.search(
      +    collection_name: 'demo', 
      +    data: ['whats the focus of information retrieval?'],
      +    anns_field: 'sparse',
      +    limit: 3,
      +    params: {'drop_ratio_search': 0.6},
      +)
      +
      +
      curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +--data-raw '{
      +    "collectionName": "demo",
      +    "data": [
      +        "whats the focus of information retrieval?"
      +    ],
      +    "annsField": "sparse",
      +    "limit": 3,
      +    "outputFields": [
      +        "text"
      +    ],
      +    "searchParams":{
      +        "params":{
      +            "drop_ratio_search":0.6
      +        }
      +    }
      +}'
       

      パラメータ

      説明

      diff --git a/localization/v2.5.x/site/ja/userGuide/search-query-get/keyword-match.json b/localization/v2.5.x/site/ja/userGuide/search-query-get/keyword-match.json index 93538525a..7f69a57be 100644 --- a/localization/v2.5.x/site/ja/userGuide/search-query-get/keyword-match.json +++ b/localization/v2.5.x/site/ja/userGuide/search-query-get/keyword-match.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n"],"headingContent":"Keyword Match​","anchorList":[{"label":"キーワードマッチ","href":"Keyword-Match​","type":1,"isActive":false},{"label":"概要","href":"Overview","type":2,"isActive":false},{"label":"キーワード一致を有効にする","href":"Enable-keyword-match","type":2,"isActive":false},{"label":"キーワード一致の使用","href":"Use-keyword-match","type":2,"isActive":false},{"label":"注意点","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .enableDynamicField(false)\n .build();\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true,\n \"enable_match\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","Map analyzerParams = new HashMap<>();\nanalyzerParams.put(\"type\", \"english\");\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(200)\n .enableAnalyzer(true)\n .analyzerParams(analyzerParams)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n analyzer_params: { type: 'english' },\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 200,\n \"enable_analyzer\": true,\n \"enable_match\": true,\n \"analyzer_params\": {\"type\": \"english\"}\n }\n },\n {\n \"fieldName\": \"my_vector\",\n \"dataType\": \"FloatVector\",\n \"elementTypeParams\": {\n \"dim\": \"5\"\n }\n }\n ]\n }'\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine deep')\";\n","export filter=\"\\\"TEXT_MATCH(text, 'machine deep')\\\"\"\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"\n","export filter=\"\\\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\\\"\"\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .annsField(\"embeddings\")\n .data(Collections.singletonList(queryVector)))\n .filter(filter)\n .topK(10)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build());\n","// Match entities with `keyword1` or `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\n// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field\nconst result = await client.search(\n collection_name: \"YOUR_COLLECTION_NAME\", // Your collection name\n anns_field: \"embeddings\", // Vector field name\n data: [query_vector], // Query vector\n filter: filter,\n params: {\"nprobe\": 10},\n limit: 10, // Max. number of results to return\n output_fields: [\"id\", \"text\"] //Fields to return\n);\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1 keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"annsField\": \"my_vector\",\n \"data\": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],\n \"filter\": '\"$filter\"',\n \"searchParams\": {\n \"params\": {\n \"nprobe\": 10\n }\n },\n \"limit\": 3,\n \"outputFields\": [\"text\",\"id\"]\n}'\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nQueryResp queryResp = client.query(QueryReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .filter(filter)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build()\n);\n","// Match entities with both `keyword1` and `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nconst result = await client.query(\n collection_name: \"YOUR_COLLECTION_NAME\",\n filter: filter, \n output_fields: [\"id\", \"text\"]\n)\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/query\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"filter\": '\"$filter\"',\n \"outputFields\": [\"id\", \"text\"]\n}'\n"],"headingContent":"Text Match​","anchorList":[{"label":"テキストマッチ","href":"Text-Match​","type":1,"isActive":false},{"label":"概要","href":"Overview","type":2,"isActive":false},{"label":"テキストマッチを有効にする","href":"Enable-text-match","type":2,"isActive":false},{"label":"テキストマッチの使用","href":"Use-text-match","type":2,"isActive":false},{"label":"注意点","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ja/userGuide/search-query-get/keyword-match.md b/localization/v2.5.x/site/ja/userGuide/search-query-get/keyword-match.md index 61f835d23..362cd5127 100644 --- a/localization/v2.5.x/site/ja/userGuide/search-query-get/keyword-match.md +++ b/localization/v2.5.x/site/ja/userGuide/search-query-get/keyword-match.md @@ -1,10 +1,10 @@ --- id: keyword-match.md summary: >- - Milvusのキーワードマッチは、特定の用語に基づいた正確な文書検索を可能にする。この機能は主に特定の条件を満たすフィルタリング検索に使用され、スカラーフィルタリングを組み込んでクエリ結果を絞り込むことができるため、スカラー条件を満たすベクトル内の類似検索が可能である。 -title: キーワードマッチ + Milvusのテキストマッチは、特定の用語に基づいた正確な文書検索を可能にします。この機能は、主に特定の条件を満たすフィルタリング検索に使用され、スカラーフィルタリングを組み込んでクエリ結果を絞り込むことができるため、スカラー条件を満たすベクトル内の類似検索が可能です。 +title: テキストマッチ --- -

      キーワードマッチ

      Milvusのキーワードマッチは、特定の用語に基づいた正確な文書検索を可能にします。この機能は主に特定の条件を満たすためのフィルタリング検索に使用され、クエリ結果を絞り込むためにスカラーフィルタリングを組み込むことができ、スカラー条件を満たすベクトル内の類似検索を可能にします。

      +

      Milvusのテキストマッチは、特定の用語に基づいた正確な文書検索を可能にします。この機能は主に特定の条件を満たすためのフィルタリング検索に使用され、クエリ結果を絞り込むためにスカラーフィルタリングを組み込むことができ、スカラー条件を満たすベクトル内の類似検索を可能にします。

      -

      キーワードマッチは、マッチした文書の関連性をスコアリングすることなく、クエリー用語の正確な出現箇所を見つけることに重点を置いています。クエリー用語の意味や重要性に基づいて最も関連性の高い文書を検索したい場合は、Full Text Searchを使用することをお勧めします。

      +

      テキストマッチは、マッチした文書の関連性をスコアリングすることなく、クエリー用語の正確な出現箇所を見つけることに重点を置いています。クエリー用語の意味や重要性に基づいて最も関連性の高い文書を検索したい場合は、Full Text Searchを使用することをお勧めします。

      概要

      MilvusはTantivyを統合し、転置インデックスとキーワード検索を実現しています。Milvusは各テキストエントリに対して、以下の手順でインデックスを作成します。

      +

      MilvusはTantivyを統合し、転置インデックスと用語ベースのテキスト検索を実現しています。Milvusは各テキストエントリに対して、以下の手順でインデックスを作成します。

        -
      1. アナライザー:アナライザは入力テキストを個々の単語(トークン)にトークン化し、必要に応じてフィルタを適用して処理します。これにより、Milvusはこれらのトークンに基づいたインデックスを構築することができる。

      2. -
      3. インデックス作成:テキスト解析後、Milvusは各トークンを含む文書に対応付ける転置インデックスを作成する。

      4. +
      5. アナライザー:アナライザは、入力テキストを個々の単語(トークン)にトークン化し、必要に応じてフィルタを適用することで処理します。これにより、Milvusはこれらのトークンに基づいたインデックスを構築することができる。

      6. +
      7. インデックス作成:テキスト解析後、Milvusは各トークンを含む文書に対応付ける転置インデックスを作成します。

      -

      ユーザがキーワードマッチを実行すると、転置インデックスがキーワードを含む全ての文書を素早く検索するために使用される。これは、各文書を個別にスキャンするよりもはるかに高速です。

      +

      ユーザがテキストマッチを実行すると、転置インデックスがその用語を含む全ての文書を素早く検索するために使用される。これは、各文書を個別にスキャンするよりもはるかに高速です。

      - Keyword Match - キーワードマッチ

      -

      キーワード一致を有効にする

      キーワードマッチはVARCHAR フィールドタイプで機能します。これは基本的にmilvusの文字列データ型です。キーワード照合を有効にするには、enable_analyzerenable_match の両方をTrue に設定し、コレクションスキーマを定義する際にオプションでテキスト分析用のアナライザを設定します。

      -

      enable_analyzer およびenable_matchを設定します。

      特定のVARCHAR フィールドのキーワード照合を有効にするには、フィールドスキーマの定義時にenable_analyzerenable_match パラメータの両方をTrue に設定します。これにより、Milvusはテキストをトークン化し、指定されたフィールドに対して転置インデックスを作成し、高速かつ効率的なキーワードマッチを可能にします。

      +

      テキストマッチはVARCHAR フィールドタイプで機能します。これは基本的にmilvusの文字列データ型です。テキスト照合を有効にするには、enable_analyzerenable_match の両方をTrue に設定し、コレクションスキーマを定義するときにオプションでテキスト分析用のアナライザを設定します。

      +

      enable_analyzer およびenable_matchを設定します。

      特定のVARCHAR フィールドのテキスト照合を有効にするには、フィールドスキーマの定義時にenable_analyzerenable_match パラメータの両方をTrue に設定します。これにより、Milvusはテキストをトークン化し、指定されたフィールドに対して転置インデックスを作成し、高速で効率的なテキストマッチを可能にします。

      +
      from pymilvus import MilvusClient, DataType​
       ​
       schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​
      @@ -78,9 +80,74 @@ schema.add_field(​
       )​
       
       
      -

      オプション:アナライザの設定

      キーワード・マッチングのパフォーマンスと精度は、選択したアナライザに依存します。さまざまなアナライザは、さまざまな言語やテキスト構造に合わせて調整されているため、適切なアナライザを選択することで、特定のユースケースの検索結果に大きな影響を与えることができます。

      +
      import io.milvus.v2.common.DataType;
      +import io.milvus.v2.service.collection.request.AddFieldReq;
      +import io.milvus.v2.service.collection.request.CreateCollectionReq;
      +
      +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
      +        .enableDynamicField(false)
      +        .build();
      +
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("text")
      +        .dataType(DataType.VarChar)
      +        .maxLength(1000)
      +        .enableAnalyzer(true)
      +        .enableMatch(true)
      +        .build());
      +
      +
      +
      const schema = [
      +  {
      +    name: "id",
      +    data_type: DataType.Int64,
      +    is_primary_key: true,
      +  },
      +  {
      +    name: "text",
      +    data_type: "VarChar",
      +    enable_analyzer: true,
      +    enable_match: true,
      +    max_length: 1000,
      +  },
      +  {
      +    name: "sparse",
      +    data_type: DataType.SparseFloatVector,
      +  },
      +];
      +
      +
      +
      export schema='{
      +        "autoId": true,
      +        "enabledDynamicField": false,
      +        "fields": [
      +            {
      +                "fieldName": "id",
      +                "dataType": "Int64",
      +                "isPrimary": true
      +            },
      +            {
      +                "fieldName": "text",
      +                "dataType": "VarChar",
      +                "elementTypeParams": {
      +                    "max_length": 1000,
      +                    "enable_analyzer": true,
      +                    "enable_match": true
      +                }
      +            },
      +            {
      +                "fieldName": "sparse",
      +                "dataType": "SparseFloatVector"
      +            }
      +        ]
      +    }'
      +
      +
      +

      オプション:アナライザーの設定

      テキスト・マッチングのパフォーマンスと精度は、選択したアナライザーに依存します。異なるアナライザは様々な言語やテキスト構造に合わせて調整されているため、適切なアナライザを選択することで、特定のユースケースの検索結果に大きな影響を与えることができます。

      デフォルトでは、Milvusはstandard アナライザーを使用します。このアナライザーは、空白と句読点に基づいてテキストをトークン化し、40文字以上のトークンを削除し、テキストを小文字に変換します。このデフォルト設定を適用するために追加のパラメータは必要ありません。詳細については、「標準」を参照してください。

      -

      別のアナライザが必要な場合は、analyzer_params パラメータを使用してアナライザを設定できます。例えば、英語のテキストを処理するためにenglish アナライザを適用する場合などです。

      +

      別のアナライザが必要な場合は、analyzer_params パラメータを使用してアナライザを設定できます。例えば、英語のテキストを処理するためにenglish アナライザを適用する場合。

      +
      analyzer_params={​
           "type": "english"​
       }​
      @@ -94,9 +161,72 @@ schema.add_field(​
           enable_match=True, ​
       )​
       
      +
      +
      Map<String, Object> analyzerParams = new HashMap<>();
      +analyzerParams.put("type", "english");
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("text")
      +        .dataType(DataType.VarChar)
      +        .maxLength(200)
      +        .enableAnalyzer(true)
      +        .analyzerParams(analyzerParams)
      +        .enableMatch(true)
      +        .build());
      +
      +
      +
      const schema = [
      +  {
      +    name: "id",
      +    data_type: DataType.Int64,
      +    is_primary_key: true,
      +  },
      +  {
      +    name: "text",
      +    data_type: "VarChar",
      +    enable_analyzer: true,
      +    enable_match: true,
      +    max_length: 1000,
      +    analyzer_params: { type: 'english' },
      +  },
      +  {
      +    name: "sparse",
      +    data_type: DataType.SparseFloatVector,
      +  },
      +];
      +
      +
      +
      export schema='{
      +        "autoId": true,
      +        "enabledDynamicField": false,
      +        "fields": [
      +            {
      +                "fieldName": "id",
      +                "dataType": "Int64",
      +                "isPrimary": true
      +            },
      +            {
      +                "fieldName": "text",
      +                "dataType": "VarChar",
      +                "elementTypeParams": {
      +                    "max_length": 200,
      +                    "enable_analyzer": true,
      +                    "enable_match": true,
      +                    "analyzer_params": {"type": "english"}
      +                }
      +            },
      +            {
      +                "fieldName": "my_vector",
      +                "dataType": "FloatVector",
      +                "elementTypeParams": {
      +                    "dim": "5"
      +                }
      +            }
      +        ]
      +    }'
      +
       

      Milvusは他にも様々な言語やシナリオに適したアナライザを提供しています。詳細については、「概要」を参照してください。

      -

      キーワード一致の使用

      コレクションスキーマのVARCHARフィールドでキーワードマッチを有効にすると、TEXT_MATCH 式を使用してキーワードマッチを実行できます。

      -

      TEXT_MATCH 式の構文

      TEXT_MATCH 式を使用して、検索するフィールドとキーワードを指定します。その構文は以下のとおりです。

      -
      TEXT_MATCH(field_name, text)​
      +    

      コレクションスキーマでVARCHARフィールドのテキストマッチを有効にすると、TEXT_MATCH 式を使用してテキストマッチを実行できます。

      +

      TEXT_MATCH 式の構文

      TEXT_MATCH 式を使用して、検索するフィールドと用語を指定します。その構文は以下のとおりです。

      +
      TEXT_MATCH(field_name, text)​
       
       
      • field_name:検索するVARCHARフィールドの名前。

      • -
      • text:検索するキーワード。複数のキーワードは、言語や設定されている解析器に応じて、スペースやその他の適切な区切り文字で区切ることができます。

      • +
      • text:検索する条件。複数の用語は、言語や設定されている解析器に応じて、スペースやその他の適切な区切り文字で区切ることができます。

      -

      デフォルトでは、TEXT_MATCHORマッチング・ロジックを使用します。つまり、指定したキーワードのいずれかを含む文書を返します。たとえば、text フィールドにmachine またはdeep というキーワードを含む文書を検索するには、次の式を使用します。

      +

      デフォルトでは、TEXT_MATCHORマッチング・ロジックを使用します。つまり、指定された用語のいずれかを含む文書を返します。たとえば、text フィールドにmachine またはdeep という用語を含むドキュメントを検索するには、次の式を使用します。

      +
      filter = "TEXT_MATCH(text, 'machine deep')"​
      -
       
      -

      また、論理演算子を使用して複数のTEXT_MATCH 式を組み合わせ、ANDマッチングを実行することもできます。たとえば、text フィールドにmachinedeep の両方を含む文書を検索するには、次の式を使用します。

      +
      String filter = "TEXT_MATCH(text, 'machine deep')";
      +
      +
      const filter = "TEXT_MATCH(text, 'machine deep')";
      +
      +
      export filter="\"TEXT_MATCH(text, 'machine deep')\""
      +
      +

      また、論理演算子を使用して複数のTEXT_MATCH 式を組み合わせ、ANDマッチを実行することもできます。たとえば、text フィールドにmachinedeep の両方を含む文書を検索するには、次の式を使用します。

      +
      filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"​
      -
       
      -

      キーワードマッチで検索

      キーワードマッチは、ベクトル類似性検索と組み合わせて使用することで、検索範囲を狭め、検索パフォーマンスを向上させることができます。ベ ク ト ル類似検索の前にキーワー ド 整合を使っ て コ レ ク シ ョ ン を フ ィ ル タ リ ン グす る こ と で、 検索す る 必要があ る 文書の数を減 ら す こ と がで き 、 結果 と し て ク エ リ 時間が短縮 さ れます。

      -

      こ の例では、filter 式が検索結果を フ ィ ル タ し て、 指定 さ れたキー ワー ドkeyword1 またはkeyword2 に一致す る 文書のみを含めます。 次に、 ベ ク ト ル類似性検索は、 こ の フ ィ ル タ さ れた部分集合の文書に対 し て実行 さ れます。

      +
      String filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')";
      +
      +
      const filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"
      +
      +
      export filter="\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\""
      +
      +

      テキストマッチによる検索

      テキスト・マッチをベクトル類似性検索と組み合わせて使用することで、検索範囲を狭め、検索パフォーマンスを向上させることができます。ベクトル類似検索の前にテキストマッチを使ってコレクションをフィルタリングすることで、検索が必要なドキュメントの数を減らすことができ、結果的にクエリー時間を短縮することができます。

      +

      この例では、filter 式は、指定された用語keyword1 またはkeyword2 に一致する文書のみを含むように検索結果をフィルタリングします。その後、ベクトル類似性検索は、このフィルタリングされた文書のサブセットに対して実行されます。

      +
      # Match entities with `keyword1` or `keyword2`​
       filter = "TEXT_MATCH(text, 'keyword1 keyword2')"​
       ​
      @@ -145,8 +291,58 @@ result = MilvusClient.search(​
       )​
       
       
      -

      キーワードマッチによるクエリー

      キーワードマッチはクエリー操作のスカラーフィルタリングにも使うことができます。query() メソッドのexpr パラメータにTEXT_MATCH 式を指定することで、与えられたキーワードにマッチする文書を検索することができます。

      -

      以下の例は、text フィールドがキーワードkeyword1keyword2 の両方を含む文書を検索します。

      +
      String filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
      +
      +SearchResp searchResp = client.search(SearchReq.builder()
      +        .collectionName("YOUR_COLLECTION_NAME")
      +        .annsField("embeddings")
      +        .data(Collections.singletonList(queryVector)))
      +        .filter(filter)
      +        .topK(10)
      +        .outputFields(Arrays.asList("id", "text"))
      +        .build());
      +
      +
      // Match entities with `keyword1` or `keyword2`
      +const filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
      +
      +// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field
      +const result = await client.search(
      +    collection_name: "YOUR_COLLECTION_NAME", // Your collection name
      +    anns_field: "embeddings", // Vector field name
      +    data: [query_vector], // Query vector
      +    filter: filter,
      +    params: {"nprobe": 10},
      +    limit: 10, // Max. number of results to return
      +    output_fields: ["id", "text"] //Fields to return
      +);
      +
      +
      export filter="\"TEXT_MATCH(text, 'keyword1 keyword2')\""
      +
      +export CLUSTER_ENDPOINT="http://localhost:19530"
      +export TOKEN="root:Milvus"
      +
      +curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +-d '{
      +    "collectionName": "demo2",
      +    "annsField": "my_vector",
      +    "data": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],
      +    "filter": '"$filter"',
      +    "searchParams": {
      +        "params": {
      +            "nprobe": 10
      +        }
      +    },
      +    "limit": 3,
      +    "outputFields": ["text","id"]
      +}'
      +
      +

      テキストマッチによるクエリー

      テキストマッチは、クエリー操作のスカラーフィルタリングにも使うことができる。query() メソッドのexpr パラメータにTEXT_MATCH 式を指定することで、与えられた条件にマッチするドキュメントを取得することができます。

      +

      以下の例は、text フィールドがkeyword1keyword2 の両方の用語を含むドキュメントを検索します。

      +
      # Match entities with both `keyword1` and `keyword2`​
       filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')"​
       ​
      @@ -156,6 +352,39 @@ result = MilvusClient.query(​
           output_fields=["id", "text"]​
       )​
       
      +
      +
      String filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
      +
      +QueryResp queryResp = client.query(QueryReq.builder()
      +        .collectionName("YOUR_COLLECTION_NAME")
      +        .filter(filter)
      +        .outputFields(Arrays.asList("id", "text"))
      +        .build()
      +);
      +
      +
      // Match entities with both `keyword1` and `keyword2`
      +const filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
      +
      +const result = await client.query(
      +    collection_name: "YOUR_COLLECTION_NAME",
      +    filter: filter, 
      +    output_fields: ["id", "text"]
      +)
      +
      +
      export filter="\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\""
      +
      +export CLUSTER_ENDPOINT="http://localhost:19530"
      +export TOKEN="root:Milvus"
      +
      +curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +-d '{
      +    "collectionName": "demo2",
      +    "filter": '"$filter"',
      +    "outputFields": ["id", "text"]
      +}'
       

      注意点

        -
      • フィールドのキーワード・マッチを有効にすると、転置インデックスが作成され、 ストレージ・リソースが消費されます。この機能を有効にするかどうかは、テキスト・サイズ、一意のトークン、使用する解析器によって異なるため、ストレージへの影響を考慮してください。

      • -
      • スキーマでアナライザを定義すると、その設定はそのコレクションに対して永続的になります。別のアナライザの方がニーズに合っていると判断した場合は、既存のコレクションを削除して、希望するアナライザ設定で新しいコレクションを作成することを検討できます。

      • +
      • フィールドのテキスト・マッチを有効にすると、転置インデックスが作成され、 ストレージ・リソースが消費されます。この機能を有効にするかどうかは、テキストのサイズや一意なトークン、使用する解析器によって異なるので、ストレージへの影響を考慮してください。

      • +
      • スキーマでアナライザを定義すると、その設定はそのコレクションに対して永続的になります。別のアナライザの方がニーズに合っていると判断した場合は、既存のコレクションを削除して、必要なアナライザ設定で新しいコレクションを作成することを検討できます。

      diff --git a/localization/v2.5.x/site/ja/userGuide/search-query-get/multi-vector-search.md b/localization/v2.5.x/site/ja/userGuide/search-query-get/multi-vector-search.md index 2f0aec298..79472322e 100644 --- a/localization/v2.5.x/site/ja/userGuide/search-query-get/multi-vector-search.md +++ b/localization/v2.5.x/site/ja/userGuide/search-query-get/multi-vector-search.md @@ -39,10 +39,10 @@ title: ハイブリッド検索

      ハイブリッド検索は以下の2つのシナリオに適しています。

      疎密ベクトル検索

      異なるタイプのベクトルは異なる情報を表現することができ、様々な埋め込みモデルを使用することで、データの異なる特徴や側面をより包括的に表現することができます。例えば、同じ文に対して異なる埋め込みモデルを使用することで、意味的な意味を表す密なベクトルと、文中の単語頻度を表す疎なベクトルを生成することができる。

        -
      • スパース・ベクトル:スパースベクトルは、ベクトル次元が高く、非ゼロ値が少ないという特徴がある。この構造により、従来の情報検索アプリケーションに特に適している。ほとんどの場合、スパース・ベクトルで使用される次元数は、1つまたは複数の言語にわたる異なるトークンに対応します。各次元には、文書内のそのトークンの相対的な重要度を示す値が割り当てられます。このレイアウトは、キーワードのマッチングを伴うタスクに有利です。

      • -
      • 密なベクトル:密なベクトルは、ニューラルネットワークから派生した埋め込みである。順序付けられた配列に配置されたとき、これらのベクトルは入力テキストの意味的本質を捉える。密なベクトルはテキスト処理に限定されるものではなく、視覚データの意味を表現するためにコンピュータビジョンでも広く使用されている。これらの密なベクトルは、通常テキスト埋め込みモデルによって生成され、ほとんどの要素またはすべての要素が非ゼロであることを特徴とする。したがって、密なベクトルは意味検索アプリケーションに特に有効であり、キーワードの完全一致がない場合でも、ベクトル距離に基づいて最も類似した結果を返すことができる。この機能により、キーワードベースのアプローチでは見逃されがちな概念間の関係性を捉えることができ、よりニュアンスや文脈を考慮した検索結果を得ることができる。

      • +
      • スパース・ベクトル:スパースベクトルは、ベクトル次元が高く、非ゼロ値が少ないという特徴がある。この構造により、従来の情報検索アプリケーションに特に適している。ほとんどの場合、スパースベクトルで使用される次元数は、1つ以上の言語にわたる異なるトークンに対応します。各次元には、文書内のそのトークンの相対的な重要度を示す値が割り当てられます。このレイアウトは、テキストのマッチングを伴うタスクに有利です。

      • +
      • 密なベクトル:密なベクトルは、ニューラルネットワークに由来する埋め込みである。順序付けられた配列に配置されたとき、これらのベクトルは入力テキストの意味的本質を捉える。密なベクトルはテキスト処理に限定されるものではなく、視覚データの意味を表現するためにコンピュータビジョンでも広く使用されている。これらの密なベクトルは、通常テキスト埋め込みモデルによって生成され、ほとんどの要素またはすべての要素が非ゼロであることを特徴とする。したがって、密なベクトルは意味検索アプリケーションに特に効果的であり、テキストが完全に一致しない場合でも、ベクトル距離に基づいて最も類似した結果を返すことができる。この機能により、キーワードベースのアプローチでは見逃されがちな概念間の関係性を捉えることができ、よりニュアンスや文脈を考慮した検索結果を得ることができます。

      -

      詳しくは、Sparse Vectorと Dense Vectorを参照してください。

      +

      詳しくは、Sparse Vectorと Dense Vectorをご参照ください。

      マルチモーダル検索

      マルチモーダル検索とは、複数のモダリティ(画像、動画、音声、テキストなど)にまたがる非構造化データの類似検索を指す。例えば、人物は指紋、声紋、顔の特徴など様々なモダリティのデータを使って表現することができる。ハイブリッド検索は、複数の検索を同時にサポートする。例えば、指紋と声紋の両方が似ている人物を検索することができます。

      ワークフロー

      このセクションでは、テキスト検索の精度を高めるために、疎な密度を持つベクトルでハイブリッド検索を行う方法を、具体的な例を使って説明する。

      複数のベクトル・フィールドでコレクションを作成する

      コレクションを作成するプロセスには、コレクション・スキーマの定義、インデックス・パラメータの構 成、コレクションの作成の3つの部分があります。

      スキーマの定義

      この例では、コレクションスキーマ内で複数のベクトルフィールドを定義する必要があります。現在、各コレクションはデフォルトで最大4つのベクトルフィールドを含むことができます。しかし proxy.maxVectorFieldNum の値を変更することもできます。

      -

      以下の例では、densesparse の2つのベクトル・フィールドをコレクション・スキーマとして定義しています。

      +

      以下の例では、densesparse の2つのベクトル・フィールドをコレクション・スキーマに定義しています。

      • id:このフィールドは、テキストIDを格納するプライマリキーの役割を果たす。このフィールドのデータ型はINT64である。

      • text:このフィールドはテキストコンテンツを格納するために使用される。このフィールドのデータ型は VARCHAR で、最大長は 1000 文字である。

      • @@ -604,7 +604,7 @@ ranker = RRFRanker(
      -

      ハイブリッド検索の実行

      ハイブリッド検索を実行する前に、コレクションをメモリにロードする必要がある。コレクション内のベクトル・フィールドにインデックスがなかったり、ロードされていなかったりすると、Hybrid Searchメソッドを呼び出すときにエラーが発生する。

      +

      ハイブリッド検索の実行

      ハイブリッド検索を実行する前に、コレクションをメモリにロードする必要がある。コレクション内のベクトル・フィールドにインデックスがないか、ロードされていない場合、Hybrid Searchメソッドを呼び出すときにエラーが発生する。

      from pymilvus import MilvusClient​
      diff --git a/localization/v2.5.x/site/ja/userGuide/search-query-get/single-vector-search.md b/localization/v2.5.x/site/ja/userGuide/search-query-get/single-vector-search.md
      index 3c7b59be8..6ae03fcd7 100644
      --- a/localization/v2.5.x/site/ja/userGuide/search-query-get/single-vector-search.md
      +++ b/localization/v2.5.x/site/ja/userGuide/search-query-get/single-vector-search.md
      @@ -38,7 +38,7 @@ title: 基本ANN検索
           

      ANN検索とk-Nearest Neighbors (kNN)検索はベクトル類似検索の常套手段です。kNN検索では、ベクトル空間内のすべてのベクトルを検索リクエストに含まれるクエリーベクトルと比較し、最も類似したベクトルを抽出しなければなりません。

      kNN検索とは異なり、ANN検索アルゴリズムは、ベクトルの埋め込み順をソートしたインデックスファイルを要求します。検索要求が来たとき、インデックスファイルを参照として使用することで、クエリ・ベクトルに最も類似したベクトル埋め込みを含むサブグループを素早く見つけることができます。次に、指定されたメトリックタイプを使用して、クエリベクトルとサブグループ内の類似度を測定し、クエリベクトルとの類似度に基づいてグループメンバーをソートし、上位K個のグループメンバーを割り出すことができます。

      ANN検索は事前に構築されたインデックスに依存しており、検索スループット、メモリ使用量、検索の正しさは、選択したインデックスタイプによって異なる可能性があります。検索のパフォーマンスと正しさのバランスをとる必要がある。

      -

      Milvusは学習曲線を減らすために、AUTOINDEXを提供しています。AUTOINDEXにより、Milvusはインデックスを構築する際にコレクション内のデータ分布を分析し、その分析に基づいて最も最適化されたインデックスパラメータを設定し、検索性能と正しさのバランスを取ることができます。

      +

      Milvusは学習曲線を減らすためにAUTOINDEXを提供しています。AUTOINDEXにより、Milvusはインデックスを構築する際にコレクション内のデータ分布を分析し、その分析に基づいて最も最適化されたインデックスパラメータを設定し、検索性能と正しさのバランスを取ることができます。

      AUTOINDEXと適用可能なメトリックタイプの詳細については、AUTOINDEXと メトリックタイプをご参照ください。このセクションでは、以下のトピックに関する詳細情報を見つけることができる。

      • 単一ベクトル検索

      • @@ -484,7 +484,7 @@ curl --request POST \​ d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

        コレクション内に複数のパーティションを作成し、検索範囲を特定のパーティション数に絞り込むことができるとします。その場合、検索リクエストに対象のパーティション名を含めることで、指定したパーティション内に検索範囲を限定することができます。検索に関わるパーティション数を減らすことで、検索のパフォーマンスが向上します。

        +

        コレクション内に複数のパーティションを作成し、検索範囲を特定のパーティション数に絞り込むことができるとします。その場合、検索リクエストに対象となるパーティション名を含めることで、検索範囲を指定したパーティション内に制限することができます。検索に関わるパーティション数を減らすことで、検索のパフォーマンスが向上します。

        以下のコード・スニペットは、コレクションにPartitionAというパーティションを想定しています。

        @@ -902,9 +902,9 @@ curl --request POST \​
      • 全文検索

        全文検索は、テキストデータセット内の特定の語句を含む文書を検索し、関連性に基づいて結果をランク付けする機能である。この機能は、正確な用語を見落とす可能性のあるセマンティック検索の制限を克服し、最も正確で文脈に関連した結果を確実に受け取ることができます。さらに、生のテキスト入力を受け付けることでベクトル検索を簡素化し、ベクトル埋め込みを手動で生成することなく、テキストデータをスパース埋め込みに自動的に変換します。

        全文検索の詳細については、全文検索を参照してください。

      • -
      • キーワードマッチ

        -

        Milvusのキーワードマッチは、特定の用語に基づいた正確な文書検索を可能にします。この機能は、主に特定の条件を満たすフィルタリング検索に使用され、スカラーフィルタリングを組み込んでクエリ結果を絞り込むことができるため、スカラー条件を満たすベクトル内の類似検索が可能です。

        -

        キーワードマッチの詳細については、キーワードマッチを参照してください。

      • +
      • テキストマッチ

        +

        Milvusのテキストマッチは、特定の用語に基づいた正確な文書検索を可能にします。この機能は、主に特定の条件を満たすフィルタリング検索に使用され、スカラーフィルタリングを組み込んでクエリ結果を絞り込むことができるため、スカラー条件を満たすベクトル内の類似検索が可能です。

        +

        テキストマッチの詳細については、テキストマッチを参照してください。

      • パーティション・キーの使用

        複数のスカラー・フィールドをメタデータ・フィルタリングに関与させ、かなり複雑なフィルタリング条件を使用すると、検索効率に影響することがあります。スカラー・フィールドをパーティション・キーに設定し、検索リクエストでパーティション・キーを含むフィルタリング条件を使用すると、指定されたパーティション・キー値に対応するパーティション内に検索範囲を制限することができます。

        パーティション・キーの詳細については、「パーティション・キーの使用」を参照してください。

      • diff --git a/localization/v2.5.x/site/ko/adminGuide/upgrade-pulsar-v3.json b/localization/v2.5.x/site/ko/adminGuide/upgrade-pulsar-v3.json index 094273630..16a1f0a76 100644 --- a/localization/v2.5.x/site/ko/adminGuide/upgrade-pulsar-v3.json +++ b/localization/v2.5.x/site/ko/adminGuide/upgrade-pulsar-v3.json @@ -1 +1 @@ -{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n\"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\ncreationTimestamp: \"2024-11-22T08:06:59Z\"​\nfinalizers:​\n- milvus.milvus.io/finalizer​\ngeneration: 3​\nlabels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\ncomponents:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\ndependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\nenabled: false # set to false​\n# you may also clean up rest fields under pulsar field​\n# it's ok to keep them though.​\npulsarv3:​\nenabled: true​\n# append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations: null # this field should be removed or set to null​\nresourceVersion: null # this field should be removed or set to null​\nuid: null # this field should be removed or set to null​\nspec:​\ndependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Pulsar 업그레이드","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"로드맵","href":"Roadmap","type":2,"isActive":false},{"label":"절차","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n \"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n ]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\n creationTimestamp: \"2024-11-22T08:06:59Z\"​\n finalizers:​\n - milvus.milvus.io/finalizer​\n generation: 3​\n labels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\n components:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\n dependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\n enabled: false # set to false​\n # you may also clean up rest fields under pulsar field​\n # it's ok to keep them though.​\npulsarv3:​\n enabled: true​\n # append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations: null # this field should be removed or set to null​\n resourceVersion: null # this field should be removed or set to null​\n uid: null # this field should be removed or set to null​\nspec:​\n dependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Pulsar 업그레이드","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"로드맵","href":"Roadmap","type":2,"isActive":false},{"label":"절차","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ko/adminGuide/upgrade-pulsar-v3.md b/localization/v2.5.x/site/ko/adminGuide/upgrade-pulsar-v3.md index c4a3ad320..2066e9146 100644 --- a/localization/v2.5.x/site/ko/adminGuide/upgrade-pulsar-v3.md +++ b/localization/v2.5.x/site/ko/adminGuide/upgrade-pulsar-v3.md @@ -111,11 +111,11 @@ Forwarding from 127.

      출력.

      {​
      -"segmentIDs": [​
      +  "segmentIDs": [​
           454097953998181000,​
           454097953999383600,​
           454097953998180800​
      -]​
      +  ]​
       }​
       
       
      @@ -221,15 +221,15 @@ head milvus.yaml -n 20
      apiVersion: milvus.io/v1beta1​
       kind: Milvus​
       metadata:​
      -annotations:​
      +  annotations:​
           milvus.io/dependency-values-merged: "true"​
           milvus.io/pod-service-label-added: "true"​
           milvus.io/querynode-current-group-id: "0"​
      -creationTimestamp: "2024-11-22T08:06:59Z"​
      -finalizers:​
      -- milvus.milvus.io/finalizer​
      -generation: 3​
      -labels:​
      +  creationTimestamp: "2024-11-22T08:06:59Z"​
      +  finalizers:​
      +  - milvus.milvus.io/finalizer​
      +  generation: 3​
      +  labels:​
           app: milvus​
           milvus.io/operator-version: 1.1.2​
       name: my-release​
      @@ -237,23 +237,23 @@ namespace: default​
       resourceVersion: "692217324"​
       uid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​
       spec:​
      -components:​
      +  components:​
       
       
    • 다음 내용으로 patch.yaml 파일을 생성합니다.

      # a patch to retain etcd & storage data and delete pulsar data while delete milvus​
       spec:​
      -dependencies:​
      +  dependencies:​
           etcd:​
      -    inCluster:​
      +      inCluster:​
               deletionPolicy: Retain​
               pvcDeletion: false​
           storage:​
      -    inCluster:​
      +      inCluster:​
               deletionPolicy: Retain​
               pvcDeletion: false​
           pulsar:​
      -    inCluster:​
      +      inCluster:​
               deletionPolicy: Delete​
               pvcDeletion: true​
       
      @@ -300,12 +300,12 @@ milvus.milvus.io "my-release" deleted
       
    • 이전 단계에서 저장한 values.yaml 을 편집합니다.

      # change the following:​
       pulsar:​
      -enabled: false # set to false​
      -# you may also clean up rest fields under pulsar field​
      -# it's ok to keep them though.​
      +  enabled: false # set to false​
      +  # you may also clean up rest fields under pulsar field​
      +  # it's ok to keep them though.​
       pulsarv3:​
      -enabled: true​
      -# append other values for pulsar v3 chart if needs​
      +  enabled: true​
      +  # append other values for pulsar v3 chart if needs​
       
       
    • 로컬 헬름 리포지토리를 업데이트합니다.

      @@ -366,13 +366,13 @@ my-release-pulsarv3-zookeeper-2 < apiVersion: milvus.io/v1beta1​ kind: Milvus​ metadata:​ -annotations: null # this field should be removed or set to null​ -resourceVersion: null # this field should be removed or set to null​ -uid: null # this field should be removed or set to null​ + annotations: null # this field should be removed or set to null​ + resourceVersion: null # this field should be removed or set to null​ + uid: null # this field should be removed or set to null​ spec:​ -dependencies:​ + dependencies:​ pulsar:​ - inCluster:​ + inCluster:​ chartVersion: pulsar-v3​ # delete all previous values for pulsar v2 and set it to null.​ # you may add additional values here for pulsar v3 if you're sure about it.​ diff --git a/localization/v2.5.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.5.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.json index a7cc00102..aff122121 100644 --- a/localization/v2.5.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.5.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"헬름으로 쿠버네티스에서 밀버스 실행하기","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"개요","href":"Overview","type":2,"isActive":false},{"label":"전제 조건","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus 헬름 차트 설치","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"온라인 설치","href":"Online-install","type":2,"isActive":false},{"label":"오프라인 설치","href":"Offline-install","type":2,"isActive":false},{"label":"실행 중인 Milvus 클러스터 업그레이드","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvus 제거","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"다음 단계","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"헬름으로 쿠버네티스에서 밀버스 실행하기","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"개요","href":"Overview","type":2,"isActive":false},{"label":"전제 조건","href":"Prerequisites","type":2,"isActive":false},{"label":"Milvus 헬름 차트 설치","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"온라인 설치","href":"Online-install","type":2,"isActive":false},{"label":"오프라인 설치","href":"Offline-install","type":2,"isActive":false},{"label":"실행 중인 Milvus 클러스터 업그레이드","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Milvus 제거","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"다음 단계","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.5.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.md index 44777eb93..9151656fb 100644 --- a/localization/v2.5.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.5.x/site/ko/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -61,7 +61,7 @@ title: 헬름으로 Milvus 클러스터 설치 NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE standard (default) k8s.io/minikube-hostpath Delete Immediate false
    • -
    • 설치하기 전에 하드웨어 및 소프트웨어 요구 사항을 확인한다.

    • +
    • 설치 전 하드웨어 및 소프트웨어 요구 사항을 확인한다.

    • 밀버스를 설치하기 전에 밀버스 사이징 툴을 사용하여 데이터 크기에 따라 하드웨어 요구 사항을 추정하는 것을 권장합니다. 이렇게 하면 Milvus 설치를 위한 최적의 성능과 리소스 할당을 보장하는 데 도움이 됩니다.

    • @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

      밀버스 헬름 차트를 설치하기 전에 밀버스 헬름 리포지토리를 추가해야 합니다.

      -
      $ helm repo add milvus https://github.com/zilliztech/milvus-helm
      +
      $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
       

      Milvus Helm Charts 리포지토리( https://github.com/milvus-io/milvus-helm )는 아카이브되어 있으며, 다음과 같이 https://github.com/zilliztech/milvus-helm 에서 추가 업데이트를 받을 수 있습니다:

      -
      helm repo add zilliztech https://github.com/zilliztech/milvus-helm
      +
      helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
       helm repo update
       # upgrade existing helm release
       helm upgrade my-release zilliztech/milvus
      @@ -122,7 +122,7 @@ helm upgrade my-release zilliztech/milvus
         
      • 릴리스 이름에는 문자, 숫자 및 대시만 포함해야 합니다. 릴리스 이름에는 점을 사용할 수 없습니다.
      • 기본 명령줄은 헬름과 함께 Milvus를 설치하는 동안 클러스터 버전의 Milvus를 설치합니다. Milvus를 독립형으로 설치할 때는 추가 설정이 필요하다.
      • -
      • 쿠버네티스의 더 이상 사용되지 않는 API 마이그레이션 가이드에 따르면, v1.25부터 파드장애예산 정책/v1beta1 API 버전은 더 이상 제공되지 않는다. 대신 정책/v1 API 버전을 사용하도록 매니페스트와 API 클라이언트를 마이그레이션하는 것이 좋다.
        쿠버네티스 v1.25 이상에서 여전히 파드디스럽션버짓의 정책/v1beta1 API 버전을 사용하는 사용자를 위한 해결 방법으로, 다음 명령을 실행하여 밀버스를 설치할 수 있다:
        . helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
      • +
      • 쿠버네티스의 더 이상 사용되지 않는 API 마이그레이션 가이드에 따르면, v1.25 버전부터 파드장애예산 정책/v1beta1 API 버전은 더 이상 제공되지 않는다. 대신 정책/v1 API 버전을 사용하도록 매니페스트와 API 클라이언트를 마이그레이션하는 것이 좋다.
        쿠버네티스 v1.25 이상에서 여전히 정책/v1beta1 버전의 파드디스럽션예산 API 버전을 사용하는 사용자를 위한 해결 방법으로, 대신 다음 명령을 실행하여 밀버스를 설치할 수 있다:
        . helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
      • 자세한 내용은 밀버스 헬름 차트와 헬름을 참고한다.
      @@ -285,5 +285,5 @@ $ helm upgrade my-release zilliztech/milvus
    • Milvus 데이터 백업을 위한 오픈 소스 도구인 Milvus Backup을 살펴보세요.

    • Milvus 디버깅 및 동적 구성 업데이트를 위한 오픈 소스 도구인 Birdwatcher를 살펴보세요.

    • 직관적인 Milvus 관리를 위한 오픈 소스 GUI 도구인 Attu를 살펴보세요.

    • -
    • Prometheus로 Milvus 모니터링.

    • +
    • Prometheus로 Milvus 모니터링하기.

    • diff --git a/localization/v2.5.x/site/ko/home/home.md b/localization/v2.5.x/site/ko/home/home.md index d91fb89a6..78d40e17c 100644 --- a/localization/v2.5.x/site/ko/home/home.md +++ b/localization/v2.5.x/site/ko/home/home.md @@ -34,7 +34,7 @@ Milvus 문서에 오신 것을 환영합니다!

      icon -

      도커 컴포즈 또는 쿠버네티스를 사용하여 Milvus를 설치하는 방법을 알아보세요.

      +

      도커 컴포즈 또는 쿠버네티스를 사용하여 Milvus를 설치하는 방법을 알아보세요.

      @@ -114,8 +114,8 @@ Milvus 문서에 오신 것을 환영합니다!

      2024년 11월 - Milvus 2.5.0 출시

      diff --git a/localization/v2.5.x/site/ko/menuStructure/ko.json b/localization/v2.5.x/site/ko/menuStructure/ko.json index e35e152c5..dac5badbd 100644 --- a/localization/v2.5.x/site/ko/menuStructure/ko.json +++ b/localization/v2.5.x/site/ko/menuStructure/ko.json @@ -286,6 +286,18 @@ "order": 8, "children": [] }, + { + "label": "메트릭 유형", + "id": "metric.md", + "order": 9, + "children": [] + }, + { + "label": "일관성 수준", + "id": "consistency.md", + "order": 10, + "children": [] + }, { "label": "인메모리 복제본", "id": "replica.md", @@ -602,31 +614,39 @@ ] }, { - "label": "인덱스 관리", + "label": "색인", "id": "manage_indexes", "order": 4, "isMenu": true, "children": [ { - "label": "인덱스 벡터 필드", + "label": "벡터 인덱스", "id": "index-vector-fields.md", "order": 0, "children": [] }, { - "label": "인덱스 스칼라 필드", - "id": "index-scalar-fields.md", + "label": "스칼라 인덱스", + "id": "scalar-index", "order": 1, - "children": [] - }, - { - "label": "비트맵 색인", - "id": "bitmap.md", - "order": 2, - "children": [] + "isMenu": true, + "children": [ + { + "label": "인덱스 스칼라 필드", + "id": "index-scalar-fields.md", + "order": 1, + "children": [] + }, + { + "label": "비트맵 색인", + "id": "bitmap.md", + "order": 2, + "children": [] + } + ] }, { - "label": "GPU를 사용한 색인", + "label": "GPU 지원 인덱스", "id": "index-with-gpu.md", "order": 3, "children": [] @@ -682,7 +702,7 @@ "children": [] }, { - "label": "키워드 검색", + "label": "텍스트 일치", "id": "keyword-match.md", "order": 7, "children": [] @@ -699,30 +719,6 @@ "order": 9, "children": [] }, - { - "label": "mmap 사용", - "id": "mmap.md", - "order": 10, - "children": [] - }, - { - "label": "클러스터링 압축", - "id": "clustering-compaction.md", - "order": 11, - "children": [] - }, - { - "label": "일관성 수준", - "id": "consistency.md", - "order": 12, - "children": [] - }, - { - "label": "메트릭 유형", - "id": "metric.md", - "order": 13, - "children": [] - }, { "label": "메타데이터 필터링", "id": "boolean.md", @@ -736,26 +732,6 @@ "children": [] } ] - }, - { - "label": "데이터 가져오기", - "id": "data_import", - "order": 6, - "isMenu": true, - "children": [ - { - "label": "소스 데이터 준비", - "id": "prepare-source-data.md", - "order": 0, - "children": [] - }, - { - "label": "데이터 가져오기", - "id": "import-data.md", - "order": 1, - "children": [] - } - ] } ] }, @@ -897,11 +873,31 @@ } ] }, + { + "label": "데이터 가져오기", + "id": "data_import", + "order": 5, + "isMenu": true, + "children": [ + { + "label": "소스 데이터 준비", + "id": "prepare-source-data.md", + "order": 0, + "children": [] + }, + { + "label": "데이터 가져오기", + "id": "import-data.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Milvus 마이그레이션", "id": "milvus_migration", "isMenu": true, - "order": 5, + "order": 6, "children": [ { "label": "개요", @@ -1321,10 +1317,30 @@ } ] }, + { + "label": "스토리지 최적화", + "id": "storage_optimization", + "order": 10, + "isMenu": true, + "children": [ + { + "label": "mmap 사용", + "id": "mmap.md", + "order": 0, + "children": [] + }, + { + "label": "클러스터링 압축", + "id": "clustering-compaction.md", + "order": 1, + "children": [] + } + ] + }, { "label": "보안", "id": "security", - "order": 10, + "order": 11, "isMenu": true, "children": [ { diff --git a/localization/v2.5.x/site/ko/release_notes.md b/localization/v2.5.x/site/ko/release_notes.md index d89aa5b92..d5101e9da 100644 --- a/localization/v2.5.x/site/ko/release_notes.md +++ b/localization/v2.5.x/site/ko/release_notes.md @@ -50,9 +50,9 @@ title: 릴리스 노트

      클러스터 관리 웹 UI(베타)

      대규모 데이터와 풍부한 기능을 보다 효과적으로 지원하기 위해 Milvus의 정교한 설계에는 다양한 종속성, 수많은 노드 역할, 복잡한 데이터 구조 등이 포함되어 있습니다. 이러한 측면은 사용 및 유지 관리에 어려움을 초래할 수 있습니다.

      Milvus 2.5는 빌트인 클러스터 관리 웹UI를 도입하여 Milvus의 복잡한 런타임 환경 정보를 시각화함으로써 시스템 유지 관리의 어려움을 줄여줍니다. 여기에는 데이터베이스 및 컬렉션, 세그먼트, 채널, 종속성, 노드 상태, 작업 정보, 느린 쿼리 등에 대한 세부 정보가 포함됩니다.

      텍스트 일치

      Milvus 2.5는 텍스트 전처리 및 인덱스 구축을 위해 Tantivy의 분석기와 인덱싱을 활용하여 특정 용어를 기반으로 텍스트 데이터의 정확한 자연어 매칭을 지원합니다. 이 기능은 주로 특정 조건을 충족하는 필터링 검색에 사용되며, 스칼라 필터링을 통합하여 쿼리 결과를 구체화함으로써 스칼라 기준을 충족하는 벡터 내에서 유사성 검색을 할 수 있습니다.

      -

      자세한 내용은 키워드 검색을 참조하세요.

      -

      비트맵 색인

      새로운 스칼라 데이터 인덱스가 Milvus 제품군에 추가되었습니다. 비트맵 인덱스는 행 수와 동일한 길이의 비트 배열을 사용해 값의 존재를 나타내고 검색 속도를 높입니다.

      -

      비트맵 인덱스는 일반적으로 고유 값의 수가 적은 낮은 카디널리티 필드(예: 남성, 여성 두 가지 값만 있는 성별 정보가 포함된 열)에 효과적이었습니다.

      +

      자세한 내용은 텍스트 일치를 참조하세요.

      +

      비트맵 색인

      새로운 스칼라 데이터 인덱스가 Milvus 제품군에 추가되었습니다. 비트맵 인덱스는 행 수와 동일한 길이의 비트 배열을 사용하여 값의 존재를 나타내고 검색 속도를 높입니다.

      +

      비트맵 인덱스는 일반적으로 고유 값의 수가 적은 저카디널리티 필드(예: 남성, 여성 두 가지 값만 있는 성별 정보가 포함된 열)에 효과적이었습니다.

      자세한 내용은 비트맵 인덱스를 참조하세요.

      널 가능 및 기본값

      Milvus는 이제 기본 키 필드 이외의 스칼라 필드에 대해 널 가능 속성 및 기본값 설정을 지원합니다. nullable=True 로 표시된 스칼라 필드의 경우 사용자는 데이터를 삽입할 때 해당 필드를 생략할 수 있으며, 시스템에서는 오류를 발생시키지 않고 해당 필드를 널 값 또는 기본값(설정된 경우)으로 처리합니다.

      기본값과 널 가능 속성은 Milvus에 더 큰 유연성을 제공합니다. 사용자는 컬렉션을 만들 때 값이 불확실한 필드에 이 기능을 활용할 수 있습니다. 또한 다른 데이터베이스 시스템에서 Milvus로 데이터 마이그레이션을 간소화하여 원래의 기본값 설정을 유지하면서 null 값이 포함된 데이터 세트를 처리할 수 있습니다.

      @@ -62,7 +62,7 @@ title: 릴리스 노트

      PRQ는 PQ와 AQ(애디티브 퀀타이저)의 융합입니다. PQ와 비교했을 때, 특히 높은 압축률에서 더 나은 리콜을 제공하기 위해 더 긴 빌드 시간이 필요하며, 이진 압축이라고 합니다.

      클러스터링 압축(베타)

      Milvus 2.5에는 대규모 컬렉션에서 검색을 가속화하고 비용을 절감하기 위해 클러스터링 압축이 도입되었습니다. 스칼라 필드를 클러스터링 키로 지정하면 데이터를 범위별로 재분배하여 저장 및 검색을 최적화할 수 있습니다. 글로벌 인덱스처럼 작동하는 이 기능은 클러스터링 메타데이터를 기반으로 쿼리 중에 데이터를 효율적으로 정리하여 스칼라 필터를 적용할 때 검색 성능을 향상시킬 수 있습니다.

      자세한 내용은 클러스터링 압축을 참조하세요.

      -

      기타 기능

      스트리밍 노드(베타)

      Milvus 2.5에는 미리 쓰기 로깅(WAL) 서비스를 제공하는 스트리밍 노드라는 새로운 구성 요소가 도입되었습니다. 이를 통해 Milvus는 채널 읽기 및 쓰기 전후에 합의를 달성하여 새로운 특징, 기능 및 최적화를 실현할 수 있습니다. 이 기능은 Milvus 2.5에서는 기본적으로 비활성화되어 있으며 3.0 버전에서 공식적으로 제공될 예정입니다.

      +

      기타 기능

      스트리밍 노드(베타)

      Milvus 2.5에는 미리 쓰기 로깅(WAL) 서비스를 제공하는 스트리밍 노드라는 새로운 구성 요소가 도입되었습니다. 이를 통해 Milvus는 채널 읽기 및 쓰기 전후에 합의를 달성하여 새로운 특징, 기능 및 최적화를 실현할 수 있습니다. 이 기능은 Milvus 2.5에서 기본적으로 비활성화되어 있으며 3.0 버전에서 공식적으로 제공될 예정입니다.

      IPv6 지원

      Milvus는 이제 IPv6를 지원하여 네트워크 연결성과 호환성을 확장합니다.

      CSV 일괄 가져오기

      이제 Milvus는 JSON 및 Parquet 형식 외에도 CSV 형식의 데이터를 직접 대량으로 가져올 수 있습니다.

      쿼리 가속화를 위한 표현식 템플릿

      Milvus는 이제 표현식 템플릿을 지원하여 특히 복잡한 표현식이 있는 시나리오에서 표현식 구문 분석의 효율성을 향상시킵니다.

      @@ -78,6 +78,6 @@ title: 릴리스 노트

      종속성 업그레이드

      ETCD 3.5.16 및 Pulsar 3.0.7 LTS로 업그레이드하여 기존 CVE를 수정하고 보안을 강화했습니다. 참고: Pulsar 3.x로의 업그레이드는 이전 2.x 버전과 호환되지 않습니다.

      이미 Milvus를 배포한 사용자의 경우, 새로운 기능을 사용하려면 먼저 ETCD 및 Pulsar 구성 요소를 업그레이드해야 합니다. 자세한 내용은 Pulsar를 2.x에서 3.x로 업그레이드하기를 참조하세요.

      로컬 스토리지 V2

      Milvus 2.5에 새로운 로컬 파일 형식이 도입되어 스칼라 데이터의 로딩 및 쿼리 효율성이 향상되고 메모리 오버헤드가 줄어들며 향후 최적화를 위한 토대가 마련되었습니다.

      -

      표현식 구문 분석 최적화

      반복 표현식에 대한 캐싱 구현, ANTLR 업그레이드, NOT IN 절의 성능 최적화를 통해 표현식 구문 분석이 개선되었습니다.

      +

      표현식 구문 분석 최적화

      반복 표현식에 대한 캐싱을 구현하고, ANTLR을 업그레이드하고, NOT IN 절의 성능을 최적화하여 표현식 구문 분석이 개선되었습니다.

      DDL 동시성 성능 개선

      데이터 정의 언어(DDL) 작업의 동시성 성능을 최적화했습니다.

      RESTful API 기능 조정

      일관성을 위해 RESTful API의 기능을 다른 SDK와 정렬했습니다.

      diff --git a/localization/v2.5.x/site/ko/tutorials/hybrid_search_with_milvus.md b/localization/v2.5.x/site/ko/tutorials/hybrid_search_with_milvus.md index 2672adbc9..d54ba71a8 100644 --- a/localization/v2.5.x/site/ko/tutorials/hybrid_search_with_milvus.md +++ b/localization/v2.5.x/site/ko/tutorials/hybrid_search_with_milvus.md @@ -25,10 +25,10 @@ title: Milvus를 사용한 하이브리드 검색

      Milvus는 밀도, 스파스, 하이브리드 검색 방식을 지원합니다:

      • 밀도 검색: 시맨틱 컨텍스트를 활용하여 쿼리 뒤에 숨겨진 의미를 이해합니다.
      • -
      • 스파스 검색: 키워드 매칭을 강조하여 전체 텍스트 검색과 동일하게 특정 용어에 기반한 결과를 찾습니다.
      • +
      • 스파스 검색: 텍스트 매칭을 강조하여 전체 텍스트 검색과 동일하게 특정 용어를 기반으로 결과를 찾습니다.
      • 하이브리드 검색: 밀도 검색과 스파스 검색 방식을 모두 결합하여 전체 문맥과 특정 키워드를 파악하여 포괄적인 검색 결과를 제공합니다.
      -

      이러한 방법을 통합함으로써 Milvus 하이브리드 검색은 의미론적 유사성과 어휘적 유사성의 균형을 맞춰 검색 결과의 전반적인 관련성을 향상시킵니다. 이 노트북에서는 이러한 검색 전략을 설정하고 사용하는 과정을 안내하며, 다양한 검색 시나리오에서 그 효과를 강조합니다.

      +

      이러한 방법을 통합함으로써 Milvus 하이브리드 검색은 의미론적 유사성과 어휘적 유사성의 균형을 유지하여 검색 결과의 전반적인 관련성을 향상시킵니다. 이 노트북에서는 이러한 검색 전략을 설정하고 사용하는 과정을 안내하며, 다양한 검색 시나리오에서 그 효과를 강조합니다.

      종속성 및 환경

      $ pip install --upgrade pymilvus "pymilvus[model]"
       

      데이터 세트 다운로드

      검색을 시연하려면 문서 말뭉치가 필요합니다. Quora 중복 질문 데이터 집합을 사용하여 로컬 디렉터리에 배치해 보겠습니다.

      diff --git a/localization/v2.5.x/site/ko/userGuide/collections/manage-collections.md b/localization/v2.5.x/site/ko/userGuide/collections/manage-collections.md index 6d8ccafda..22b4bdbeb 100644 --- a/localization/v2.5.x/site/ko/userGuide/collections/manage-collections.md +++ b/localization/v2.5.x/site/ko/userGuide/collections/manage-collections.md @@ -78,7 +78,7 @@ title: 컬렉션 설명 >

      관계형 데이터베이스의 기본 필드와 유사하게 컬렉션에는 엔티티를 다른 엔티티와 구별하는 기본 필드가 있습니다. 기본 필드의 각 값은 전 세계적으로 고유하며 하나의 특정 엔티티에 해당합니다.

      -

      위 차트에서 ID라는 필드가 기본 필드로 사용되며, 첫 번째 ID 0은 제목이 코로나바이러스의 사망률은 중요하지 않다는 엔티티에 해당합니다. 기본 필드가 0인 다른 엔티티는 없습니다.

      +

      위 차트에서 ID라는 필드가 기본 필드로 사용되며, 첫 번째 ID 0은 제목이 ' 코로나바이러스의 사망률은 중요하지 않음'인 엔티티에 해당합니다. 기본 필드가 0인 다른 엔티티는 없습니다.

      기본 필드는 정수 또는 문자열만 허용합니다. 엔티티를 삽입할 때는 기본적으로 기본 필드 값을 포함해야 합니다. 그러나 컬렉션 생성 시 자동 ID를 활성화한 경우, 데이터 삽입 시 Milvus가 해당 값을 생성합니다. 이 경우 삽입할 엔티티에서 기본 필드 값을 제외하세요.

      자세한 내용은 기본 필드 및 자동 ID를 참조하세요.

      인덱스

      GPU 인덱스를 사용할 때는 특정 제약 조건에 유의해야 합니다:

        -
      • GPU_IVF_FLAT의 경우 제한의 최대값은 256입니다.

      • +
      • GPU_IVF_FLAT의 경우, 제한의 최대값은 1024입니다.

      • GPU_IVF_PQGPU_CAGRA의 경우, 제한의 최대값은 1024입니다.

      • GPU_BRUTE_FORCE에 대한 제한은 설정되어 있지 않지만 잠재적인 성능 문제를 피하기 위해 4096을 초과하지 않는 것이 좋습니다.

      • 현재 GPU 인덱스는 COSINE 거리를 지원하지 않습니다. COSINE 거리가 필요한 경우 먼저 데이터를 정규화한 다음 내부 곱(IP) 거리를 대체로 사용할 수 있습니다.

      • diff --git a/localization/v2.5.x/site/ko/userGuide/schema/analyzer/analyzer-overview.md b/localization/v2.5.x/site/ko/userGuide/schema/analyzer/analyzer-overview.md index d06b973d0..b155c1db4 100644 --- a/localization/v2.5.x/site/ko/userGuide/schema/analyzer/analyzer-overview.md +++ b/localization/v2.5.x/site/ko/userGuide/schema/analyzer/analyzer-overview.md @@ -22,12 +22,12 @@ summary: >- >

        텍스트 처리에서 분석기는 원시 텍스트를 구조화되고 검색 가능한 형식으로 변환하는 중요한 구성 요소입니다. 각 분석기는 일반적으로 토큰화기와 필터라는 두 가지 핵심 요소로 구성됩니다. 이들은 함께 입력 텍스트를 토큰으로 변환하고, 이러한 토큰을 정제하며, 효율적인 색인 및 검색을 위해 준비합니다.

        -

        Milvus에서 분석기는 컬렉션 스키마에 VARCHAR 필드를 추가할 때 컬렉션 생성 중에 구성됩니다. 분석기가 생성한 토큰은 키워드 매칭을 위한 인덱스를 구축하는 데 사용하거나 전체 텍스트 검색을 위해 스파스 임베딩으로 변환할 수 있습니다. 자세한 내용은 키워드 검색 또는 전체 텍스트 검색을 참조하세요.

        +

        Milvus에서 분석기는 컬렉션 스키마에 VARCHAR 필드를 추가할 때 컬렉션 생성 중에 구성됩니다. 분석기가 생성한 토큰은 텍스트 매칭을 위한 인덱스를 구축하는 데 사용하거나 전체 텍스트 검색을 위해 스파스 임베딩으로 변환할 수 있습니다. 자세한 내용은 텍스트 일치 또는 전체 텍스트 검색을 참조하세요.

        분석기를 사용하면 성능에 영향을 미칠 수 있습니다.

        • 전체 텍스트 검색: 전체 텍스트 검색의 경우, 토큰화가 완료될 때까지 기다려야 하기 때문에 DataNode 및 QueryNode 채널은 데이터를 더 느리게 소비합니다. 따라서 새로 수집된 데이터를 검색에 사용할 수 있게 되는 데 시간이 더 오래 걸립니다.

        • -
        • 키워드 일치: 키워드 매칭의 경우, 인덱스를 구축하기 전에 토큰화가 완료되어야 하므로 인덱스 생성도 더 느려집니다.

        • +
        • 텍스트 일치: 텍스트 매칭의 경우, 인덱스를 구축하기 전에 토큰화가 완료되어야 하므로 인덱스 생성도 더 느려집니다.

        분석기의 구조

        Milvus의 분석기는 정확히 하나의 토큰화 도구와 0개 이상의 필터로 구성됩니다.

        +

        Milvus의 분석기는 정확히 하나의 토큰화기와 0개 이상의 필터로 구성됩니다.

        • 토큰화 도구: 토큰화기는 입력 텍스트를 토큰이라는 개별 단위로 분해합니다. 이러한 토큰은 토큰라이저 유형에 따라 단어 또는 구문일 수 있습니다.

        • 필터: 필터: 토큰에 필터를 적용하여 토큰을 소문자로 만들거나 일반적인 단어를 제거하는 등 토큰을 더욱 세분화할 수 있습니다.

        • diff --git a/localization/v2.5.x/site/ko/userGuide/schema/sparse_vector.json b/localization/v2.5.x/site/ko/userGuide/schema/sparse_vector.json index 60b97c89c..30db626f4 100644 --- a/localization/v2.5.x/site/ko/userGuide/schema/sparse_vector.json +++ b/localization/v2.5.x/site/ko/userGuide/schema/sparse_vector.json @@ -1 +1 @@ -{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"스파스 벡터","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"개요","href":"Overview​","type":2,"isActive":false},{"label":"Milvus에서 스파스 벡터 사용","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"스파스 벡터","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"개요","href":"Overview​","type":2,"isActive":false},{"label":"Milvus에서 스파스 벡터 사용","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false},{"label":"제한 사항","href":"Limits","type":2,"isActive":false},{"label":"FAQ","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ko/userGuide/schema/sparse_vector.md b/localization/v2.5.x/site/ko/userGuide/schema/sparse_vector.md index 94191d8ce..942341468 100644 --- a/localization/v2.5.x/site/ko/userGuide/schema/sparse_vector.md +++ b/localization/v2.5.x/site/ko/userGuide/schema/sparse_vector.md @@ -40,7 +40,7 @@ summary: >-
          • 텍스트 분석: 각 차원이 단어에 해당하고 문서에 나타나는 단어만 0이 아닌 값을 갖는 단어 가방 벡터로 문서를 표현합니다.

          • 추천 시스템: 사용자-항목 상호 작용 행렬: 각 차원은 특정 항목에 대한 사용자의 평가를 나타내며, 대부분의 사용자는 몇 개의 항목과만 상호 작용합니다.

          • -
          • 이미지 처리: 이미지의 핵심 포인트에만 초점을 맞춘 로컬 특징 표현으로, 고차원 스파스 벡터를 생성합니다.

          • +
          • 이미지 처리: 이미지의 주요 포인트에만 초점을 맞춘 로컬 특징 표현으로, 고차원 스파스 벡터를 생성합니다.

          아래 다이어그램에서 볼 수 있듯이 고밀도 벡터는 일반적으로 각 위치에 값이 있는 연속 배열로 표현됩니다(예: [0.3, 0.8, 0.2, 0.3, 0.1]). 이와 대조적으로 희소 벡터는 0이 아닌 요소와 그 인덱스만 저장하며, 키-값 쌍으로 표현되는 경우가 많습니다(예: [{2: 0.2}, ..., {9997: 0.5}, {9999: 0.7}]). 이 표현은 저장 공간을 크게 줄이고 계산 효율성을 높이며, 특히 매우 고차원적인 데이터(예: 10,000차원)를 다룰 때 유용합니다.

          @@ -395,7 +395,7 @@ search_params = {​ query_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​

      -

      이 예에서 drop_ratio_search 은 희소 벡터를 위한 선택적 파라미터로, 검색 중에 쿼리 벡터의 작은 값을 미세 조정할 수 있습니다. 예를 들어 {"drop_ratio_search": 0.2} 을 사용하면 쿼리 벡터에서 가장 작은 20%의 값은 검색 중에 무시됩니다.

      +

      이 예에서 drop_ratio_search 은 희소 벡터를 위한 선택적 파라미터로, 검색 중에 쿼리 벡터의 작은 값을 미세 조정할 수 있습니다. 예를 들어 {"drop_ratio_search": 0.2} 을 사용하면 검색 중에 쿼리 벡터에서 가장 작은 20%의 값은 무시됩니다.

      그런 다음 search 메서드를 사용하여 유사도 검색을 실행합니다.

      @@ -475,3 +475,60 @@ sparse.put(1000L,

      유사도 검색 매개변수에 대한 자세한 내용은 기본 ANN 검색을 참조하세요.

      +

      제한 사항

      Milvus에서 스파스 벡터를 사용할 때는 다음과 같은 제한 사항을 고려하세요:

      +
        +
      • 현재 스파스 벡터에는 IP 거리 메트릭만 지원됩니다. 희소 벡터의 차원이 높기 때문에 L2 및 코사인 거리는 실용적이지 않습니다.

      • +
      • 스파스 벡터 필드의 경우, SPARSE_INVERTED_INDEXSPARSE_WAND 인덱스 유형만 지원됩니다.

      • +
      • 스파스 벡터에 지원되는 데이터 유형:

        +
          +
        • 차원 부분은 부호가 없는 32비트 정수여야 합니다;
        • +
        • 값 부분은 음수가 아닌 32비트 부동 소수점 숫자일 수 있습니다.
        • +
      • +
      • 스파스 벡터는 삽입 및 검색을 위해 다음 요구 사항을 충족해야 합니다:

        +
          +
        • 벡터의 값이 하나 이상 0이 아닐 것;
        • +
        • 벡터 인덱스는 음수가 아닙니다.
        • +
      • +
      +

      FAQ

        +
      • SPARSE_INVERTED_INDEX와 SPARSE_WAND의 차이점을 설명해 주시고, 둘 중 하나를 선택하려면 어떻게 해야 하나요?

        +

        SPARSE_INVERTED_INDEX는 기존의 반전 인덱스인 반면, SPARSE_WAND는 검색 시 전체 IP 거리 평가 횟수를 줄이기 위해 Weak-AND 알고리즘을 사용합니다. SPARSE_WAND는 일반적으로 더 빠르지만 벡터 밀도가 증가하면 성능이 저하될 수 있습니다. 이 중 하나를 선택하려면 특정 데이터 세트와 사용 사례에 따라 실험과 벤치마크를 수행하세요.

      • +
      • drop_ratio_build 및 drop_ratio_search 매개변수는 어떻게 선택해야 하나요?

        +

        drop_ratio_builddrop_ratio_search의 선택은 데이터의 특성과 검색 지연 시간/처리량 및 정확도에 대한 요구 사항에 따라 달라집니다.

      • +
      • 스파스 임베딩의 차원은 uint32 공간 내에서 임의의 불연속형 값이 될 수 있나요?

        +

        예, 한 가지 예외가 있습니다. 희소 임베딩의 차원은 [0, maximum of uint32) 범위의 모든 값을 사용할 수 있습니다. 즉, 최대값인 uint32를 사용할 수 없습니다.

      • +
      • 증가하는 세그먼트에 대한 검색은 인덱스를 통해 수행되나요 아니면 무차별 대입으로 수행되나요?

        +

        증가하는 세그먼트에 대한 검색은 봉인된 세그먼트 인덱스와 동일한 유형의 인덱스를 통해 수행됩니다. 인덱스가 구축되기 전에 새로 증가하는 세그먼트의 경우 무차별 대입 검색이 사용됩니다.

      • +
      • 하나의 컬렉션에 희소 벡터와 고밀도 벡터를 모두 포함할 수 있나요?

        +

        예, 여러 벡터 유형을 지원하므로 희소 벡터 열과 고밀도 벡터 열이 모두 포함된 컬렉션을 생성하고 하이브리드 검색을 수행할 수 있습니다.

      • +
      diff --git a/localization/v2.5.x/site/ko/userGuide/search-query-get/boolean.md b/localization/v2.5.x/site/ko/userGuide/search-query-get/boolean.md index 13f4a0396..b0a95df2a 100644 --- a/localization/v2.5.x/site/ko/userGuide/search-query-get/boolean.md +++ b/localization/v2.5.x/site/ko/userGuide/search-query-get/boolean.md @@ -697,9 +697,9 @@ curl --request POST \​

      Match operators​

      Match operators include:​

      • like: Match constants or prefixes (prefix%), infixes (%infix%), and suffixes (%suffix) within constants. It relies on a brute-force search mechanism using wildcards and does not involve text tokenization. While it can achieve exact matches, its query efficiency is relatively low, making it suitable for simple matching tasks or queries on smaller datasets.​

      • -
      • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​ -

        -

        To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to ​Keyword Match.​

        +
      • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​

        +

        +

        To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to Text Match.​

      Example 1: Apply filter on scalar field​

      The following example demonstrates how to filter products whose color is red. In this case, you can quickly filter all red products by matching the prefix 'red%’. Similarly, you can use the expression color in ['red_7025’, 'red_4794’, ‘red_9392’] to filter all red products. However, when the data is more complex, we recommend using the like operator for more efficient filtering.​

      @@ -854,8 +854,8 @@ curl --request POST \​ ]​
      -

      Example 3: Keyword match on VARCHAR fields​

      The TEXT_MATCH expression is used for keyword match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to ​Keyword Match.​

      -

      The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the keyword "Apple" or "iPhone":​

      +

      Example 3: Text match on VARCHAR fields​

      The TEXT_MATCH expression is used for text match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to Text Match.​

      +

      The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the term "Apple" or "iPhone":​

      Python Java diff --git a/localization/v2.5.x/site/ko/userGuide/search-query-get/full-text-search.json b/localization/v2.5.x/site/ko/userGuide/search-query-get/full-text-search.json index e69628490..849192ac2 100644 --- a/localization/v2.5.x/site/ko/userGuide/search-query-get/full-text-search.json +++ b/localization/v2.5.x/site/ko/userGuide/search-query-get/full-text-search.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","MilvusClient.insert('demo', [​\n {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​\n {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​\n {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​\n])​\n\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['Who started AI research?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"전체 텍스트 검색","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"개요","href":"Overview​","type":2,"isActive":false},{"label":"전체 텍스트 검색을 위한 컬렉션 만들기","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"텍스트 데이터 삽입","href":"Insert-text-data","type":2,"isActive":false},{"label":"전체 텍스트 검색 수행","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .build();\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"sparse\")\n .dataType(DataType.SparseFloatVector)\n .build());\n","import { MilvusClient, DataType } from \"@zilliz/milvus2-sdk-node\";\n\nconst address = \"http://localhost:19530\";\nconst token = \"root:Milvus\";\nconst client = new MilvusClient({address, token});\nconst schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n\nconsole.log(res.results)\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","import io.milvus.common.clientenum.FunctionType;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq.Function;\n\nimport java.util.*;\n\nschema.addFunction(Function.builder()\n .functionType(FunctionType.BM25)\n .name(\"text_bm25_emb\")\n .inputFieldNames(Collections.singletonList(\"text\"))\n .outputFieldNames(Collections.singletonList(\"vector\"))\n .build());\n","const functions = [\n {\n name: 'text_bm25_emb',\n description: 'bm25 function',\n type: FunctionType.BM25,\n input_field_names: ['text'],\n output_field_names: ['vector'],\n params: {},\n },\n];\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ],\n \"functions\": [\n {\n \"name\": \"text_bm25_emb\",\n \"type\": \"BM25\",\n \"inputFieldNames\": [\"text\"],\n \"outputFieldNames\": [\"sparse\"],\n \"params\": {}\n }\n ]\n }'\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","import io.milvus.v2.common.IndexParam;\n\nList indexes = new ArrayList<>();\nindexes.add(IndexParam.builder()\n .fieldName(\"sparse\")\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)\n .metricType(IndexParam.MetricType.BM25)\n .build());\n","const index_params = [\n {\n fieldName: \"sparse\",\n metricType: \"BM25\",\n indexType: \"AUTOINDEX\",\n },\n];\n","export indexParams='[\n {\n \"fieldName\": \"sparse\",\n \"metricType\": \"BM25\",\n \"indexType\": \"AUTOINDEX\"\n }\n ]'\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()\n .collectionName(\"demo\")\n .collectionSchema(schema)\n .indexParams(indexes)\n .build();\nclient.createCollection(requestCreate);\n","await client.create_collection(\n collection_name: 'demo', \n schema: schema, \n index_params: index_params\n);\n","export CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d \"{\n \\\"collectionName\\\": \\\"demo\\\",\n \\\"schema\\\": $schema,\n \\\"indexParams\\\": $indexParams\n}\"\n","client.insert('demo', [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n])\n\n","import com.google.gson.Gson;\nimport com.google.gson.JsonObject;\n\nimport io.milvus.v2.service.vector.request.InsertReq;\n\nGson gson = new Gson();\nList rows = Arrays.asList(\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval is a field of study.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval focuses on finding relevant information in large datasets.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"data mining and information retrieval overlap in research.\\\"}\", JsonObject.class)\n);\n\nclient.insert(InsertReq.builder()\n .collectionName(\"demo\")\n .data(rows)\n .build());\n","await client.insert({\ncollection_name: 'demo', \ndata: [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n]);\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"data\": [\n {\"text\": \"information retrieval is a field of study.\"},\n {\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"},\n {\"text\": \"data mining and information retrieval overlap in research.\"} \n ],\n \"collectionName\": \"demo\"\n}'\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['whats the focus of information retrieval?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n","import io.milvus.v2.service.vector.request.SearchReq;\nimport io.milvus.v2.service.vector.request.data.EmbeddedText;\nimport io.milvus.v2.service.vector.response.SearchResp;\n\nMap searchParams = new HashMap<>();\nsearchParams.put(\"drop_ratio_search\", 0.6);\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"demo\")\n .data(Collections.singletonList(new EmbeddedText(\"whats the focus of information retrieval?\")))\n .annsField(\"sparse\")\n .topK(3)\n .searchParams(searchParams)\n .outputFields(Collections.singletonList(\"text\"))\n .build());\n","await client.search(\n collection_name: 'demo', \n data: ['whats the focus of information retrieval?'],\n anns_field: 'sparse',\n limit: 3,\n params: {'drop_ratio_search': 0.6},\n)\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"demo\",\n \"data\": [\n \"whats the focus of information retrieval?\"\n ],\n \"annsField\": \"sparse\",\n \"limit\": 3,\n \"outputFields\": [\n \"text\"\n ],\n \"searchParams\":{\n \"params\":{\n \"drop_ratio_search\":0.6\n }\n }\n}'\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"전체 텍스트 검색","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"개요","href":"Overview​","type":2,"isActive":false},{"label":"전체 텍스트 검색을 위한 컬렉션 만들기","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"텍스트 데이터 삽입","href":"Insert-text-data","type":2,"isActive":false},{"label":"전체 텍스트 검색 수행","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ko/userGuide/search-query-get/full-text-search.md b/localization/v2.5.x/site/ko/userGuide/search-query-get/full-text-search.md index d55d3c76b..0775a2751 100644 --- a/localization/v2.5.x/site/ko/userGuide/search-query-get/full-text-search.md +++ b/localization/v2.5.x/site/ko/userGuide/search-query-get/full-text-search.md @@ -79,6 +79,8 @@ summary: 전체 텍스트 검색은 텍스트 데이터 세트에서 특정 용
    • VARCHAR 필드에 대해 Milvus가 자동으로 생성하는 스파스 임베딩을 저장하기 위해 예약된 SPARSE_FLOAT_VECTOR 필드.

    • 컬렉션 스키마 정의하기

      먼저 스키마를 생성하고 필요한 필드를 추가합니다.

      +
      from pymilvus import MilvusClient, DataType, Function, FunctionType​
       ​
       schema = MilvusClient.create_schema()​
      @@ -87,14 +89,90 @@ schema.add_field(field_name="id", dat
       schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​
       schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)​
       
      +
      +
      import io.milvus.v2.common.DataType;
      +import io.milvus.v2.service.collection.request.AddFieldReq;
      +import io.milvus.v2.service.collection.request.CreateCollectionReq;
      +
      +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
      +        .build();
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("id")
      +        .dataType(DataType.Int64)
      +        .isPrimaryKey(true)
      +        .autoID(true)
      +        .build());
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("text")
      +        .dataType(DataType.VarChar)
      +        .maxLength(1000)
      +        .enableAnalyzer(true)
      +        .build());
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("sparse")
      +        .dataType(DataType.SparseFloatVector)
      +        .build());
      +
      +
      import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
      +
      +const address = "http://localhost:19530";
      +const token = "root:Milvus";
      +const client = new MilvusClient({address, token});
      +const schema = [
      +  {
      +    name: "id",
      +    data_type: DataType.Int64,
      +    is_primary_key: true,
      +  },
      +  {
      +    name: "text",
      +    data_type: "VarChar",
      +    enable_analyzer: true,
      +    enable_match: true,
      +    max_length: 1000,
      +  },
      +  {
      +    name: "sparse",
      +    data_type: DataType.SparseFloatVector,
      +  },
      +];
      +
      +
      +console.log(res.results)
      +
      +
      export schema='{
      +        "autoId": true,
      +        "enabledDynamicField": false,
      +        "fields": [
      +            {
      +                "fieldName": "id",
      +                "dataType": "Int64",
      +                "isPrimary": true
      +            },
      +            {
      +                "fieldName": "text",
      +                "dataType": "VarChar",
      +                "elementTypeParams": {
      +                    "max_length": 1000,
      +                    "enable_analyzer": true
      +                }
      +            },
      +            {
      +                "fieldName": "sparse",
      +                "dataType": "SparseFloatVector"
      +            }
      +        ]
      +    }'
       

      이 구성에서는

        -
      • id는 기본 키 역할을 하며 auto_id=True 로 자동 생성됩니다.

      • +
      • id은 기본 키로 사용되며 auto_id=True 으로 자동 생성됩니다.

      • text는 전체 텍스트 검색 작업을 위한 원시 텍스트 데이터를 저장합니다. 데이터 유형은 VARCHAR 이 Milvus의 텍스트 저장용 문자열 데이터 유형이므로 VARCHAR 이어야 합니다. enable_analyzer=True 를 설정하면 Milvus가 텍스트를 토큰화할 수 있습니다. 기본적으로 Milvus는 텍스트 분석에 표준 분석기를 사용합니다. 다른 분석기를 구성하려면 개요를 참조하세요.

      • sparse전체 텍스트 검색 작업을 위해 내부적으로 생성된 스파스 임베딩을 저장하기 위해 예약된 벡터 필드입니다. 데이터 유형은 SPARSE_FLOAT_VECTOR 여야 합니다.

      이제 텍스트를 스파스 벡터 표현으로 변환하는 함수를 정의한 다음 스키마에 추가합니다.

      +
      bm25_function = Function(​
           name="text_bm25_emb", # Function name​
           input_field_names=["text"], # Name of the VARCHAR field containing raw text data​
      @@ -104,6 +182,62 @@ schema.add_field(field_name="sparse",
       ​
       schema.add_function(bm25_function)​
       
      +
      +
      import io.milvus.common.clientenum.FunctionType;
      +import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
      +
      +import java.util.*;
      +
      +schema.addFunction(Function.builder()
      +        .functionType(FunctionType.BM25)
      +        .name("text_bm25_emb")
      +        .inputFieldNames(Collections.singletonList("text"))
      +        .outputFieldNames(Collections.singletonList("vector"))
      +        .build());
      +
      +
      const functions = [
      +    {
      +      name: 'text_bm25_emb',
      +      description: 'bm25 function',
      +      type: FunctionType.BM25,
      +      input_field_names: ['text'],
      +      output_field_names: ['vector'],
      +      params: {},
      +    },
      +];
      +
      +
      export schema='{
      +        "autoId": true,
      +        "enabledDynamicField": false,
      +        "fields": [
      +            {
      +                "fieldName": "id",
      +                "dataType": "Int64",
      +                "isPrimary": true
      +            },
      +            {
      +                "fieldName": "text",
      +                "dataType": "VarChar",
      +                "elementTypeParams": {
      +                    "max_length": 1000,
      +                    "enable_analyzer": true
      +                }
      +            },
      +            {
      +                "fieldName": "sparse",
      +                "dataType": "SparseFloatVector"
      +            }
      +        ],
      +        "functions": [
      +            {
      +                "name": "text_bm25_emb",
      +                "type": "BM25",
      +                "inputFieldNames": ["text"],
      +                "outputFieldNames": ["sparse"],
      +                "params": {}
      +            }
      +        ]
      +    }'
       

      매개변수

      설명

      @@ -120,7 +254,9 @@ schema.add_function(bm25_function)​

      텍스트에서 스파스 벡터로 변환해야 하는 VARCHAR 필드가 여러 개 있는 컬렉션의 경우 컬렉션 스키마에 별도의 함수를 추가하여 각 함수에 고유한 이름과 output_field_names 값을 갖도록 합니다.

      -

      인덱스 구성

      필요한 필드와 기본 제공 함수로 스키마를 정의한 후에는 컬렉션의 색인을 설정하세요. 이 과정을 간소화하기 위해 AUTOINDEXindex_type 으로 사용하면 Milvus가 데이터 구조에 따라 가장 적합한 인덱스 유형을 선택하고 구성할 수 있습니다.

      +

      인덱스 구성

      필요한 필드와 기본 제공 함수로 스키마를 정의한 후 컬렉션의 색인을 설정하세요. 이 과정을 간소화하기 위해 AUTOINDEXindex_type 으로 사용하면 Milvus가 데이터 구조에 따라 가장 적합한 인덱스 유형을 선택하고 구성할 수 있는 옵션이 제공됩니다.

      +
      index_params = MilvusClient.prepare_index_params()​
       ​
       index_params.add_index(​
      @@ -129,6 +265,31 @@ index_params.add_index(​
           metric_type="BM25"​
       )​
       
      +
      +
      import io.milvus.v2.common.IndexParam;
      +
      +List<IndexParam> indexes = new ArrayList<>();
      +indexes.add(IndexParam.builder()
      +        .fieldName("sparse")
      +        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
      +        .metricType(IndexParam.MetricType.BM25)
      +        .build());
      +
      +
      const index_params = [
      +  {
      +    fieldName: "sparse",
      +    metricType: "BM25",
      +    indexType: "AUTOINDEX",
      +  },
      +];
      +
      +
      export indexParams='[
      +        {
      +            "fieldName": "sparse",
      +            "metricType": "BM25",
      +            "indexType": "AUTOINDEX"
      +        }
      +    ]'
       

      파라미터

      설명

      @@ -139,13 +300,43 @@ index_params.add_index(​

      metric_type

      특히 전체 텍스트 검색 기능을 사용하려면 이 매개변수의 값을 BM25 으로 설정해야 합니다.

      -

      컬렉션 만들기

      이제 정의된 스키마 및 인덱스 매개변수를 사용하여 컬렉션을 생성합니다.

      +

      컬렉션 만들기

      이제 정의한 스키마 및 인덱스 매개변수를 사용하여 컬렉션을 생성합니다.

      +
      MilvusClient.create_collection(​
           collection_name='demo', ​
           schema=schema, ​
           index_params=index_params​
       )​
       
      +
      +
      import io.milvus.v2.service.collection.request.CreateCollectionReq;
      +
      +CreateCollectionReq requestCreate = CreateCollectionReq.builder()
      +        .collectionName("demo")
      +        .collectionSchema(schema)
      +        .indexParams(indexes)
      +        .build();
      +client.createCollection(requestCreate);
      +
      +
      await client.create_collection(
      +    collection_name: 'demo', 
      +    schema: schema, 
      +    index_params: index_params
      +);
      +
      +
      export CLUSTER_ENDPOINT="http://localhost:19530"
      +export TOKEN="root:Milvus"
      +
      +curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +-d "{
      +    \"collectionName\": \"demo\",
      +    \"schema\": $schema,
      +    \"indexParams\": $indexParams
      +}"
       

      텍스트 데이터 삽입

      컬렉션과 인덱스를 설정했으면 텍스트 데이터를 삽입할 준비가 된 것입니다. 이 과정에서는 원시 텍스트만 제공하면 됩니다. 앞서 정의한 내장 함수가 각 텍스트 항목에 해당하는 스파스 벡터를 자동으로 생성합니다.

      -
      MilvusClient.insert('demo', [​
      -    {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​
      -    {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​
      -    {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​
      -])​
      +    

      컬렉션과 인덱스를 설정했으면 텍스트 데이터를 삽입할 준비가 되었습니다. 이 과정에서는 원시 텍스트만 제공하면 됩니다. 앞서 정의한 내장 함수가 각 텍스트 항목에 해당하는 스파스 벡터를 자동으로 생성합니다.

      + +
      client.insert('demo', [
      +    {'text': 'information retrieval is a field of study.'},
      +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
      +    {'text': 'data mining and information retrieval overlap in research.'},
      +])
       
      +
      +
      import com.google.gson.Gson;
      +import com.google.gson.JsonObject;
      +
      +import io.milvus.v2.service.vector.request.InsertReq;
      +
      +Gson gson = new Gson();
      +List<JsonObject> rows = Arrays.asList(
      +        gson.fromJson("{\"text\": \"information retrieval is a field of study.\"}", JsonObject.class),
      +        gson.fromJson("{\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"}", JsonObject.class),
      +        gson.fromJson("{\"text\": \"data mining and information retrieval overlap in research.\"}", JsonObject.class)
      +);
      +
      +client.insert(InsertReq.builder()
      +        .collectionName("demo")
      +        .data(rows)
      +        .build());
      +
      +
      await client.insert({
      +collection_name: 'demo', 
      +data: [
      +    {'text': 'information retrieval is a field of study.'},
      +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
      +    {'text': 'data mining and information retrieval overlap in research.'},
      +]);
      +
      +
      curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +-d '{
      +    "data": [
      +        {"text": "information retrieval is a field of study."},
      +        {"text": "information retrieval focuses on finding relevant information in large datasets."},
      +        {"text": "data mining and information retrieval overlap in research."}       
      +    ],
      +    "collectionName": "demo"
      +}'
       

      컬렉션에 데이터를 삽입한 후에는 원시 텍스트 쿼리를 사용하여 전체 텍스트 검색을 수행할 수 있습니다. Milvus는 쿼리를 자동으로 스파스 벡터로 변환하고 BM25 알고리즘을 사용하여 일치하는 검색 결과의 순위를 매긴 다음 상위 K (limit) 결과를 반환합니다.

      +

      컬렉션에 데이터를 삽입한 후에는 원시 텍스트 쿼리를 사용하여 전체 텍스트 검색을 수행할 수 있습니다. Milvus는 자동으로 쿼리를 스파스 벡터로 변환하고 BM25 알고리즘을 사용하여 일치하는 검색 결과의 순위를 매긴 다음 상위 K (limit) 결과를 반환합니다.

      +
      search_params = {​
           'params': {'drop_ratio_search': 0.6},​
       }​
       ​
       MilvusClient.search(​
           collection_name='demo', ​
      -    data=['Who started AI research?'],​
      +    data=['whats the focus of information retrieval?'],​
           anns_field='sparse',​
           limit=3,​
           search_params=search_params​
       )​
       
      +
      +
      import io.milvus.v2.service.vector.request.SearchReq;
      +import io.milvus.v2.service.vector.request.data.EmbeddedText;
      +import io.milvus.v2.service.vector.response.SearchResp;
      +
      +Map<String,Object> searchParams = new HashMap<>();
      +searchParams.put("drop_ratio_search", 0.6);
      +SearchResp searchResp = client.search(SearchReq.builder()
      +        .collectionName("demo")
      +        .data(Collections.singletonList(new EmbeddedText("whats the focus of information retrieval?")))
      +        .annsField("sparse")
      +        .topK(3)
      +        .searchParams(searchParams)
      +        .outputFields(Collections.singletonList("text"))
      +        .build());
      +
      +
      await client.search(
      +    collection_name: 'demo', 
      +    data: ['whats the focus of information retrieval?'],
      +    anns_field: 'sparse',
      +    limit: 3,
      +    params: {'drop_ratio_search': 0.6},
      +)
      +
      +
      curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +--data-raw '{
      +    "collectionName": "demo",
      +    "data": [
      +        "whats the focus of information retrieval?"
      +    ],
      +    "annsField": "sparse",
      +    "limit": 3,
      +    "outputFields": [
      +        "text"
      +    ],
      +    "searchParams":{
      +        "params":{
      +            "drop_ratio_search":0.6
      +        }
      +    }
      +}'
       

      파라미터

      설명

      diff --git a/localization/v2.5.x/site/ko/userGuide/search-query-get/keyword-match.json b/localization/v2.5.x/site/ko/userGuide/search-query-get/keyword-match.json index 0bf292769..46ba355b2 100644 --- a/localization/v2.5.x/site/ko/userGuide/search-query-get/keyword-match.json +++ b/localization/v2.5.x/site/ko/userGuide/search-query-get/keyword-match.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n"],"headingContent":"Keyword Match​","anchorList":[{"label":"키워드 검색","href":"Keyword-Match​","type":1,"isActive":false},{"label":"개요","href":"Overview","type":2,"isActive":false},{"label":"키워드 검색 사용","href":"Enable-keyword-match","type":2,"isActive":false},{"label":"키워드 검색 사용","href":"Use-keyword-match","type":2,"isActive":false},{"label":"고려 사항","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .enableDynamicField(false)\n .build();\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true,\n \"enable_match\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","Map analyzerParams = new HashMap<>();\nanalyzerParams.put(\"type\", \"english\");\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(200)\n .enableAnalyzer(true)\n .analyzerParams(analyzerParams)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n analyzer_params: { type: 'english' },\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 200,\n \"enable_analyzer\": true,\n \"enable_match\": true,\n \"analyzer_params\": {\"type\": \"english\"}\n }\n },\n {\n \"fieldName\": \"my_vector\",\n \"dataType\": \"FloatVector\",\n \"elementTypeParams\": {\n \"dim\": \"5\"\n }\n }\n ]\n }'\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine deep')\";\n","export filter=\"\\\"TEXT_MATCH(text, 'machine deep')\\\"\"\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"\n","export filter=\"\\\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\\\"\"\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .annsField(\"embeddings\")\n .data(Collections.singletonList(queryVector)))\n .filter(filter)\n .topK(10)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build());\n","// Match entities with `keyword1` or `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\n// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field\nconst result = await client.search(\n collection_name: \"YOUR_COLLECTION_NAME\", // Your collection name\n anns_field: \"embeddings\", // Vector field name\n data: [query_vector], // Query vector\n filter: filter,\n params: {\"nprobe\": 10},\n limit: 10, // Max. number of results to return\n output_fields: [\"id\", \"text\"] //Fields to return\n);\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1 keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"annsField\": \"my_vector\",\n \"data\": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],\n \"filter\": '\"$filter\"',\n \"searchParams\": {\n \"params\": {\n \"nprobe\": 10\n }\n },\n \"limit\": 3,\n \"outputFields\": [\"text\",\"id\"]\n}'\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nQueryResp queryResp = client.query(QueryReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .filter(filter)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build()\n);\n","// Match entities with both `keyword1` and `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nconst result = await client.query(\n collection_name: \"YOUR_COLLECTION_NAME\",\n filter: filter, \n output_fields: [\"id\", \"text\"]\n)\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/query\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"filter\": '\"$filter\"',\n \"outputFields\": [\"id\", \"text\"]\n}'\n"],"headingContent":"Text Match​","anchorList":[{"label":"텍스트 일치","href":"Text-Match​","type":1,"isActive":false},{"label":"개요","href":"Overview","type":2,"isActive":false},{"label":"텍스트 일치 사용","href":"Enable-text-match","type":2,"isActive":false},{"label":"텍스트 일치 사용","href":"Use-text-match","type":2,"isActive":false},{"label":"고려 사항","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/ko/userGuide/search-query-get/keyword-match.md b/localization/v2.5.x/site/ko/userGuide/search-query-get/keyword-match.md index 0a53122b8..4c6b75ef4 100644 --- a/localization/v2.5.x/site/ko/userGuide/search-query-get/keyword-match.md +++ b/localization/v2.5.x/site/ko/userGuide/search-query-get/keyword-match.md @@ -1,11 +1,11 @@ --- id: keyword-match.md summary: >- - Milvus의 키워드 검색은 특정 용어를 기반으로 정확한 문서 검색을 가능하게 합니다. 이 기능은 주로 특정 조건을 충족하는 필터링 검색에 + Milvus의 텍스트 일치는 특정 용어를 기반으로 정확한 문서 검색을 가능하게 합니다. 이 기능은 주로 특정 조건을 충족하는 필터링 검색에 사용되며, 스칼라 필터링을 통합하여 쿼리 결과를 구체화함으로써 스칼라 기준을 충족하는 벡터 내에서 유사성 검색을 할 수 있습니다. -title: 키워드 검색 +title: 텍스트 일치 --- -

      키워드 검색

      Milvus의 키워드 검색은 특정 용어를 기반으로 정확한 문서 검색을 가능하게 합니다. 이 기능은 주로 특정 조건을 충족하는 필터링 검색에 사용되며, 스칼라 필터링을 통합하여 쿼리 결과를 구체화함으로써 스칼라 기준을 충족하는 벡터 내에서 유사성 검색을 할 수 있습니다.

      +

      Milvus의 텍스트 일치는 특정 용어를 기반으로 정확한 문서 검색을 가능하게 합니다. 이 기능은 주로 특정 조건을 충족하는 필터링 검색에 사용되며, 스칼라 필터링을 통합하여 쿼리 결과를 구체화함으로써 스칼라 기준을 충족하는 벡터 내에서 유사성 검색을 할 수 있습니다.

      -

      키워드 검색은 일치하는 문서의 관련성을 점수화하지 않고 쿼리 용어가 정확히 일치하는 것을 찾는 데 중점을 둡니다. 쿼리 용어의 의미론적 의미와 중요도에 따라 가장 관련성이 높은 문서를 검색하려면 전체 텍스트 검색을 사용하는 것이 좋습니다.

      +

      텍스트 일치는 일치하는 문서의 관련성에 점수를 매기지 않고 쿼리 용어의 정확한 표현을 찾는 데 중점을 둡니다. 쿼리 용어의 의미론적 의미와 중요도에 따라 가장 관련성이 높은 문서를 검색하려면 전체 텍스트 검색을 사용하는 것이 좋습니다.

      개요

      Milvus는 기본 반전 색인 및 키워드 검색을 강화하기 위해 Tantivy를 통합합니다. 각 텍스트 항목에 대해 Milvus는 절차에 따라 색인을 생성합니다.

      +

      Milvus는 기본 반전 색인 및 용어 기반 텍스트 검색을 강화하기 위해 Tantivy를 통합합니다. 각 텍스트 항목에 대해 Milvus는 절차에 따라 색인을 생성합니다.

      1. 분석기: 분석기는 입력 텍스트를 개별 단어 또는 토큰으로 토큰화한 다음 필요에 따라 필터를 적용하여 처리합니다. 이를 통해 Milvus는 이러한 토큰을 기반으로 색인을 구축할 수 있습니다.

      2. 인덱싱: 텍스트 분석 후, Milvus는 각 고유 토큰을 해당 토큰이 포함된 문서에 매핑하는 역 인덱스를 생성합니다.

      -

      사용자가 키워드 검색을 수행하면 반전된 색인을 사용해 해당 키워드가 포함된 모든 문서를 빠르게 검색합니다. 이는 각 문서를 개별적으로 스캔하는 것보다 훨씬 빠릅니다.

      +

      사용자가 텍스트 일치를 수행하면 반전된 색인을 사용하여 해당 용어가 포함된 모든 문서를 빠르게 검색합니다. 이는 각 문서를 개별적으로 스캔하는 것보다 훨씬 빠릅니다.

      - Keyword Match - 키워드 검색

      -

      키워드 검색 사용

      키워드 검색은 기본적으로 Milvus의 문자열 데이터 유형인 VARCHAR 필드 유형에서 작동합니다. 키워드 검색을 활성화하려면 enable_analyzerenable_match 을 모두 True 로 설정한 다음 수집 스키마를 정의할 때 텍스트 분석을 위한 분석기를 선택적으로 구성하세요.

      -

      enable_analyzerenable_match설정

      특정 VARCHAR 필드에 대해 키워드 검색을 사용하려면 필드 스키마를 정의할 때 enable_analyzerenable_match 매개 변수를 모두 True 으로 설정합니다. 이렇게 하면 Milvus가 텍스트를 토큰화하고 지정된 필드에 대한 반전 인덱스를 생성하도록 지시하여 빠르고 효율적인 키워드 매칭을 가능하게 합니다.

      +

      텍스트 일치는 기본적으로 Milvus의 문자열 데이터 유형인 VARCHAR 필드 유형에서 작동합니다. 텍스트 일치를 사용하려면 enable_analyzerenable_match 을 모두 True 로 설정한 다음 컬렉션 스키마를 정의할 때 텍스트 분석을 위한 분석기를 선택적으로 구성하세요.

      +

      enable_analyzerenable_match설정

      특정 VARCHAR 필드에 대해 텍스트 일치를 사용하려면 필드 스키마를 정의할 때 enable_analyzerenable_match 매개 변수를 모두 True 으로 설정합니다. 이렇게 하면 Milvus가 텍스트를 토큰화하고 지정된 필드에 대해 반전된 인덱스를 생성하도록 지시하여 빠르고 효율적인 텍스트 일치를 가능하게 합니다.

      +
      from pymilvus import MilvusClient, DataType​
       ​
       schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​
      @@ -79,9 +81,74 @@ schema.add_field(​
       )​
       
       
      -

      선택 사항입니다: 분석기 구성

      키워드 매칭의 성능과 정확도는 선택한 분석기에 따라 달라집니다. 다양한 분석기는 다양한 언어와 텍스트 구조에 맞춰져 있으므로 올바른 분석기를 선택하면 특정 사용 사례에 대한 검색 결과에 큰 영향을 미칠 수 있습니다.

      +
      import io.milvus.v2.common.DataType;
      +import io.milvus.v2.service.collection.request.AddFieldReq;
      +import io.milvus.v2.service.collection.request.CreateCollectionReq;
      +
      +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
      +        .enableDynamicField(false)
      +        .build();
      +
      +schema.addField(AddFieldReq.builder()
      +        .fieldName("text")
      +        .dataType(DataType.VarChar)
      +        .maxLength(1000)
      +        .enableAnalyzer(true)
      +        .enableMatch(true)
      +        .build());
      +
      +
      +
      const schema = [
      +  {
      +    name: "id",
      +    data_type: DataType.Int64,
      +    is_primary_key: true,
      +  },
      +  {
      +    name: "text",
      +    data_type: "VarChar",
      +    enable_analyzer: true,
      +    enable_match: true,
      +    max_length: 1000,
      +  },
      +  {
      +    name: "sparse",
      +    data_type: DataType.SparseFloatVector,
      +  },
      +];
      +
      +
      +
      export schema='{
      +        "autoId": true,
      +        "enabledDynamicField": false,
      +        "fields": [
      +            {
      +                "fieldName": "id",
      +                "dataType": "Int64",
      +                "isPrimary": true
      +            },
      +            {
      +                "fieldName": "text",
      +                "dataType": "VarChar",
      +                "elementTypeParams": {
      +                    "max_length": 1000,
      +                    "enable_analyzer": true,
      +                    "enable_match": true
      +                }
      +            },
      +            {
      +                "fieldName": "sparse",
      +                "dataType": "SparseFloatVector"
      +            }
      +        ]
      +    }'
      +
      +
      +

      선택 사항입니다: 분석기 구성

      텍스트 매칭의 성능과 정확도는 선택한 분석기에 따라 달라집니다. 다양한 분석기는 다양한 언어와 텍스트 구조에 맞춰져 있으므로 올바른 분석기를 선택하면 특정 사용 사례에 대한 검색 결과에 큰 영향을 미칠 수 있습니다.

      기본적으로 Milvus는 공백과 구두점을 기준으로 텍스트를 토큰화하고, 40자 이상의 토큰을 제거하며, 텍스트를 소문자로 변환하는 standard 분석기를 사용합니다. 이 기본 설정을 적용하는 데는 추가 매개변수가 필요하지 않습니다. 자세한 내용은 표준을 참조하세요.

      -

      다른 분석기가 필요한 경우에는 analyzer_params 매개변수를 사용하여 분석기를 구성할 수 있습니다. 예를 들어, 영어 텍스트 처리를 위해 english 분석기를 적용하는 것입니다.

      +

      다른 분석기가 필요한 경우에는 analyzer_params 매개변수를 사용하여 분석기를 구성할 수 있습니다. 예를 들어 영어 텍스트 처리를 위해 english 분석기를 적용합니다.

      +
      analyzer_params={​
           "type": "english"​
       }​
      @@ -96,8 +163,71 @@ schema.add_field(​
       )​
       
       
      -

      Milvus는 다른 언어 및 시나리오에 적합한 다양한 분석기도 제공합니다. 자세한 내용은 개요를 참조하세요.

      -

      키워드 검색 사용 +
      const schema = [
      +  {
      +    name: "id",
      +    data_type: DataType.Int64,
      +    is_primary_key: true,
      +  },
      +  {
      +    name: "text",
      +    data_type: "VarChar",
      +    enable_analyzer: true,
      +    enable_match: true,
      +    max_length: 1000,
      +    analyzer_params: { type: 'english' },
      +  },
      +  {
      +    name: "sparse",
      +    data_type: DataType.SparseFloatVector,
      +  },
      +];
      +
      +
      +
      export schema='{
      +        "autoId": true,
      +        "enabledDynamicField": false,
      +        "fields": [
      +            {
      +                "fieldName": "id",
      +                "dataType": "Int64",
      +                "isPrimary": true
      +            },
      +            {
      +                "fieldName": "text",
      +                "dataType": "VarChar",
      +                "elementTypeParams": {
      +                    "max_length": 200,
      +                    "enable_analyzer": true,
      +                    "enable_match": true,
      +                    "analyzer_params": {"type": "english"}
      +                }
      +            },
      +            {
      +                "fieldName": "my_vector",
      +                "dataType": "FloatVector",
      +                "elementTypeParams": {
      +                    "dim": "5"
      +                }
      +            }
      +        ]
      +    }'
      +
      +
      +

      Milvus는 그 외에도 다양한 언어와 시나리오에 적합한 다양한 분석기를 제공합니다. 자세한 내용은 개요를 참조하세요.

      +

      텍스트 일치 사용

      컬렉션 스키마에서 VARCHAR 필드에 대해 키워드 일치를 활성화한 후에는 TEXT_MATCH 표현식을 사용하여 키워드 일치를 수행할 수 있습니다.

      -

      TEXT_MATCH 표현식 구문

      TEXT_MATCH 표현식은 검색할 필드와 키워드를 지정하는 데 사용됩니다. 구문은 다음과 같습니다.

      -
      TEXT_MATCH(field_name, text)​
      +    

      컬렉션 스키마에서 VARCHAR 필드에 대해 텍스트 일치를 사용하도록 설정한 후에는 TEXT_MATCH 표현식을 사용하여 텍스트 일치를 수행할 수 있습니다.

      +

      TEXT_MATCH 표현식 구문

      TEXT_MATCH 표현식은 검색할 필드와 용어를 지정하는 데 사용됩니다. 구문은 다음과 같습니다.

      +
      TEXT_MATCH(field_name, text)​
       
       
      • field_name: 검색할 VARCHAR 필드의 이름입니다.

      • -
      • text: 검색할 키워드입니다. 여러 키워드는 언어 및 구성된 분석기에 따라 공백이나 기타 적절한 구분 기호로 구분할 수 있습니다.

      • +
      • text: 검색할 용어입니다. 언어 및 구성된 분석기에 따라 여러 용어를 공백이나 기타 적절한 구분 기호로 구분할 수 있습니다.

      -

      기본적으로 TEXT_MATCHOR 일치 로직을 사용하므로 지정된 키워드가 포함된 문서를 반환합니다. 예를 들어 text 필드에 machine 또는 deep 키워드가 포함된 문서를 검색하려면 다음 표현식을 사용합니다.

      +

      기본적으로 TEXT_MATCHOR 일치 논리를 사용하므로 지정된 용어가 포함된 문서를 반환합니다. 예를 들어 text 필드에 machine 또는 deep 이라는 용어가 포함된 문서를 검색하려면 다음 표현식을 사용합니다.

      +
      filter = "TEXT_MATCH(text, 'machine deep')"​
      -
       
      -

      논리 연산자를 사용하여 여러 개의 TEXT_MATCH 표현식을 결합하여 AND 일치를 수행할 수도 있습니다. 예를 들어 text 필드에 machinedeep 키워드가 모두 포함된 문서를 검색하려면 다음 표현식을 사용합니다.

      +
      String filter = "TEXT_MATCH(text, 'machine deep')";
      +
      +
      const filter = "TEXT_MATCH(text, 'machine deep')";
      +
      +
      export filter="\"TEXT_MATCH(text, 'machine deep')\""
      +
      +

      논리 연산자를 사용하여 여러 개의 TEXT_MATCH 표현식을 결합하여 AND 일치를 수행할 수도 있습니다. 예를 들어 text 필드에 machinedeep 이 모두 포함된 문서를 검색하려면 다음 표현식을 사용합니다.

      +
      filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"​
      -
       
      -

      키워드 일치로 검색

      키워드 일치를 벡터 유사도 검색과 함께 사용하면 검색 범위를 좁히고 검색 성능을 향상시킬 수 있습니다. 벡터 유사도 검색 전에 키워드 일치를 사용하여 컬렉션을 필터링하면 검색해야 하는 문서 수를 줄여 쿼리 시간을 단축할 수 있습니다.

      -

      이 예에서 filter 표현식은 지정된 키워드 keyword1 또는 keyword2 와 일치하는 문서만 포함하도록 검색 결과를 필터링합니다. 그런 다음 이 필터링된 문서 하위 집합에 대해 벡터 유사도 검색이 수행됩니다.

      +
      String filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')";
      +
      +
      const filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"
      +
      +
      export filter="\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\""
      +
      +

      텍스트 일치로 검색

      텍스트 일치를 벡터 유사도 검색과 함께 사용하면 검색 범위를 좁히고 검색 성능을 향상시킬 수 있습니다. 벡터 유사도 검색 전에 텍스트 일치를 사용하여 컬렉션을 필터링하면 검색해야 하는 문서 수를 줄여 쿼리 시간을 단축할 수 있습니다.

      +

      이 예에서 filter 표현식은 지정된 용어 keyword1 또는 keyword2 와 일치하는 문서만 포함하도록 검색 결과를 필터링합니다. 그런 다음 이 필터링된 문서의 하위 집합에서 벡터 유사성 검색이 수행됩니다.

      +
      # Match entities with `keyword1` or `keyword2`​
       filter = "TEXT_MATCH(text, 'keyword1 keyword2')"​
       ​
      @@ -146,8 +292,58 @@ result = MilvusClient.search(​
       )​
       
       
      -

      키워드 일치를 사용한 쿼리

      키워드 검색은 쿼리 작업에서 스칼라 필터링에도 사용할 수 있습니다. query() 메서드의 expr 파라미터에 TEXT_MATCH 표현식을 지정하면 주어진 키워드와 일치하는 문서를 검색할 수 있습니다.

      -

      아래 예는 text 필드에 keyword1keyword2 키워드를 모두 포함하는 문서를 검색합니다.

      +
      String filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
      +
      +SearchResp searchResp = client.search(SearchReq.builder()
      +        .collectionName("YOUR_COLLECTION_NAME")
      +        .annsField("embeddings")
      +        .data(Collections.singletonList(queryVector)))
      +        .filter(filter)
      +        .topK(10)
      +        .outputFields(Arrays.asList("id", "text"))
      +        .build());
      +
      +
      // Match entities with `keyword1` or `keyword2`
      +const filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
      +
      +// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field
      +const result = await client.search(
      +    collection_name: "YOUR_COLLECTION_NAME", // Your collection name
      +    anns_field: "embeddings", // Vector field name
      +    data: [query_vector], // Query vector
      +    filter: filter,
      +    params: {"nprobe": 10},
      +    limit: 10, // Max. number of results to return
      +    output_fields: ["id", "text"] //Fields to return
      +);
      +
      +
      export filter="\"TEXT_MATCH(text, 'keyword1 keyword2')\""
      +
      +export CLUSTER_ENDPOINT="http://localhost:19530"
      +export TOKEN="root:Milvus"
      +
      +curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +-d '{
      +    "collectionName": "demo2",
      +    "annsField": "my_vector",
      +    "data": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],
      +    "filter": '"$filter"',
      +    "searchParams": {
      +        "params": {
      +            "nprobe": 10
      +        }
      +    },
      +    "limit": 3,
      +    "outputFields": ["text","id"]
      +}'
      +
      +

      텍스트 일치를 사용한 쿼리

      텍스트 일치는 쿼리 작업에서 스칼라 필터링에도 사용할 수 있습니다. query() 메서드의 expr 매개변수에 TEXT_MATCH 표현식을 지정하면 주어진 용어와 일치하는 문서를 검색할 수 있습니다.

      +

      아래 예는 text 필드에 keyword1keyword2 라는 용어가 모두 포함된 문서를 검색합니다.

      +
      # Match entities with both `keyword1` and `keyword2`​
       filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')"​
       ​
      @@ -157,6 +353,39 @@ result = MilvusClient.query(​
           output_fields=["id", "text"]​
       )​
       
      +
      +
      String filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
      +
      +QueryResp queryResp = client.query(QueryReq.builder()
      +        .collectionName("YOUR_COLLECTION_NAME")
      +        .filter(filter)
      +        .outputFields(Arrays.asList("id", "text"))
      +        .build()
      +);
      +
      +
      // Match entities with both `keyword1` and `keyword2`
      +const filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
      +
      +const result = await client.query(
      +    collection_name: "YOUR_COLLECTION_NAME",
      +    filter: filter, 
      +    output_fields: ["id", "text"]
      +)
      +
      +
      export filter="\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\""
      +
      +export CLUSTER_ENDPOINT="http://localhost:19530"
      +export TOKEN="root:Milvus"
      +
      +curl --request POST \
      +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
      +--header "Authorization: Bearer ${TOKEN}" \
      +--header "Content-Type: application/json" \
      +-d '{
      +    "collectionName": "demo2",
      +    "filter": '"$filter"',
      +    "outputFields": ["id", "text"]
      +}'
       

      고려 사항

        -
      • 필드에 키워드 검색을 사용하도록 설정하면 역 인덱스가 생성되어 스토리지 리소스를 소모합니다. 이 기능을 사용하도록 설정할 때는 텍스트 크기, 고유 토큰 및 사용되는 분석기에 따라 달라지므로 스토리지 영향을 고려하세요.

      • +
      • 필드에 대해 텍스트 일치를 활성화하면 반전 인덱스가 생성되어 스토리지 리소스를 소모합니다. 이 기능을 사용하도록 설정할 때는 텍스트 크기, 고유 토큰 및 사용되는 분석기에 따라 달라지므로 스토리지 영향을 고려하세요.

      • 스키마에서 분석기를 정의하면 해당 컬렉션에 대해 해당 설정이 영구적으로 적용됩니다. 다른 분석기가 필요에 더 적합하다고 판단되면 기존 컬렉션을 삭제하고 원하는 분석기 구성으로 새 컬렉션을 만들 수 있습니다.

      diff --git a/localization/v2.5.x/site/ko/userGuide/search-query-get/multi-vector-search.md b/localization/v2.5.x/site/ko/userGuide/search-query-get/multi-vector-search.md index 7291fe43a..1cb6a534e 100644 --- a/localization/v2.5.x/site/ko/userGuide/search-query-get/multi-vector-search.md +++ b/localization/v2.5.x/site/ko/userGuide/search-query-get/multi-vector-search.md @@ -39,11 +39,11 @@ title: 하이브리드 검색

      하이브리드 검색은 다음 두 가지 시나리오에 적합합니다.

      희소 밀도 벡터 검색

      다양한 유형의 벡터는 서로 다른 정보를 나타낼 수 있으며, 다양한 임베딩 모델을 사용하면 데이터의 다양한 특징과 측면을 보다 포괄적으로 나타낼 수 있습니다. 예를 들어, 같은 문장에 대해 서로 다른 임베딩 모델을 사용하면 의미적 의미를 나타내는 고밀도 벡터와 문장의 단어 빈도를 나타내는 스파스 벡터를 생성할 수 있습니다.

        -
      • 스파스 벡터: 스파스 벡터는 벡터 차원이 높고 0이 아닌 값이 거의 없다는 특징이 있습니다. 이러한 구조는 기존의 정보 검색 애플리케이션에 특히 적합합니다. 대부분의 경우, 스파스 벡터에 사용되는 차원 수는 하나 이상의 언어에 걸쳐 서로 다른 토큰에 해당합니다. 각 차원에는 문서 내에서 해당 토큰의 상대적 중요성을 나타내는 값이 할당됩니다. 이 레이아웃은 키워드 매칭과 관련된 작업에 유리합니다.

      • -
      • 고밀도 벡터: 고밀도 벡터는 신경망에서 파생된 임베딩입니다. 정렬된 배열로 배열된 이 벡터는 입력 텍스트의 의미적 본질을 포착합니다. 고밀도 벡터는 텍스트 처리에만 국한되지 않고 컴퓨터 비전에서도 시각적 데이터의 의미를 표현하는 데 광범위하게 사용됩니다. 일반적으로 텍스트 임베딩 모델에 의해 생성되는 이러한 고밀도 벡터는 대부분 또는 모든 요소가 0이 아닌 것이 특징입니다. 따라서 고밀도 벡터는 정확한 키워드가 일치하지 않더라도 벡터 거리를 기반으로 가장 유사한 결과를 반환할 수 있기 때문에 시맨틱 검색 애플리케이션에 특히 효과적입니다. 이 기능을 사용하면 키워드 기반 접근 방식으로는 놓칠 수 있는 개념 간의 관계를 포착하여 보다 미묘하고 문맥을 인식하는 검색 결과를 얻을 수 있습니다.

      • +
      • 스파스 벡터: 스파스 벡터는 벡터 차원이 높고 0이 아닌 값이 거의 없다는 특징이 있습니다. 이러한 구조는 기존의 정보 검색 애플리케이션에 특히 적합합니다. 대부분의 경우, 스파스 벡터에 사용되는 차원 수는 하나 이상의 언어에 걸쳐 서로 다른 토큰에 해당합니다. 각 차원에는 문서 내에서 해당 토큰의 상대적 중요성을 나타내는 값이 할당됩니다. 이 레이아웃은 텍스트 매칭과 관련된 작업에 유리합니다.

      • +
      • 고밀도 벡터: 고밀도 벡터는 신경망에서 파생된 임베딩입니다. 이러한 벡터를 정렬된 배열로 배열하면 입력 텍스트의 의미적 본질을 포착할 수 있습니다. 고밀도 벡터는 텍스트 처리에만 국한되지 않고 컴퓨터 비전에서도 시각적 데이터의 의미를 표현하는 데 광범위하게 사용됩니다. 일반적으로 텍스트 임베딩 모델에 의해 생성되는 이러한 고밀도 벡터는 대부분 또는 모든 요소가 0이 아닌 것이 특징입니다. 따라서 고밀도 벡터는 정확한 텍스트가 일치하지 않더라도 벡터 거리를 기반으로 가장 유사한 결과를 반환할 수 있기 때문에 시맨틱 검색 애플리케이션에 특히 효과적입니다. 이 기능을 사용하면 키워드 기반 접근 방식에서는 놓칠 수 있는 개념 간의 관계를 포착하여 보다 미묘하고 문맥을 인식하는 검색 결과를 얻을 수 있습니다.

      자세한 내용은 스파스 벡터밀도 벡터를 참조하세요.

      -

      멀티모달 검색

      멀티모달 검색은 이미지, 동영상, 오디오, 텍스트 등 여러 양식에 걸쳐 비정형 데이터의 유사성 검색을 말합니다. 예를 들어 지문, 음성, 얼굴 특징 등 다양한 양식의 데이터를 사용하여 사람을 표현할 수 있습니다. 하이브리드 검색은 여러 검색을 동시에 지원합니다. 예를 들어 지문과 음성 지문이 비슷한 사람을 검색할 수 있습니다.

      +

      멀티모달 검색

      멀티모달 검색은 이미지, 동영상, 오디오, 텍스트 등 여러 양식에 걸쳐 비정형 데이터의 유사성 검색을 말합니다. 예를 들어, 지문, 음성, 얼굴 특징 등 다양한 양식의 데이터를 사용하여 사람을 표현할 수 있습니다. 하이브리드 검색은 여러 검색을 동시에 지원합니다. 예를 들어 지문과 음성 지문이 비슷한 사람을 검색할 수 있습니다.

      워크플로

      ANN과 kNN(k-Nearest Neighbors) 검색은 벡터 유사도 검색의 일반적인 방법입니다. kNN 검색에서는 벡터 공간의 모든 벡터를 검색 요청에 포함된 쿼리 벡터와 비교하여 가장 유사한 벡터를 찾아내야 하므로 시간과 리소스가 많이 소요됩니다.

      kNN 검색과 달리 ANN 검색 알고리즘은 벡터 임베딩의 정렬된 순서를 기록하는 인덱스 파일을 요청합니다. 검색 요청이 들어오면 인덱스 파일을 참조로 사용해 쿼리 벡터와 가장 유사한 벡터 임베딩이 포함된 하위 그룹을 빠르게 찾을 수 있습니다. 그런 다음 지정된 메트릭 유형을 사용하여 쿼리 벡터와 하위 그룹의 벡터 간의 유사성을 측정하고, 쿼리 벡터와의 유사성을 기준으로 그룹 구성원을 정렬하고, 상위 K 개의 그룹 구성원을 파악할 수 있습니다.

      ANN 검색은 미리 구축된 인덱스에 따라 달라지며, 검색 처리량, 메모리 사용량 및 검색 정확도는 선택한 인덱스 유형에 따라 달라질 수 있습니다. 검색 성능과 정확성 간의 균형을 맞춰야 합니다.

      -

      학습 곡선을 줄이기 위해 Milvus는 AUTOINDEX를 제공합니다. 자동 인덱스는 인덱스를 구축하는 동안 컬렉션 내의 데이터 분포를 분석하고 분석 결과에 따라 가장 최적화된 인덱스 파라미터를 설정하여 검색 성능과 정확성 간의 균형을 맞출 수 있습니다.

      +

      학습 곡선을 줄이기 위해 Milvus는 AUTOINDEX를 제공합니다. 자동 인덱스는 인덱스를 구축하는 동안 컬렉션 내의 데이터 분포를 분석하고 분석 결과에 따라 가장 최적화된 인덱스 매개변수를 설정하여 검색 성능과 정확성 간의 균형을 맞출 수 있습니다.

      자동 인덱스 및 적용 가능한 메트릭 유형에 대한 자세한 내용은 자동 인덱스메트릭 유형을 참조하세요. 이 섹션에서는 다음 주제에 대한 자세한 정보를 확인할 수 있습니다.

      • 단일 벡터 검색

      • @@ -888,7 +888,7 @@ curl --request POST \​

        검색 요청에 필터링 조건을 포함하면 Milvus가 ANN 검색을 수행하기 전에 메타데이터 필터링을 수행하여 검색 범위를 전체 컬렉션에서 지정된 필터링 조건과 일치하는 엔티티로만 축소할 수 있습니다.

        메타데이터 필터링 및 필터링 조건에 대한 자세한 내용은 필터링된 검색메타데이터 필터링을 참조하세요.

      • 범위 검색

        -

        특정 범위 내에서 반환되는 엔티티의 거리 또는 점수를 제한하여 검색 결과 관련성을 향상시킬 수 있습니다. Milvus에서 범위 검색은 쿼리 벡터와 가장 유사한 벡터가 포함된 동심원을 중심으로 두 개의 동심원을 그리는 방식으로 이루어집니다. 검색 요청은 두 원의 반지름을 지정하고 Milvus는 바깥쪽 원에 속하지만 안쪽 원에 속하지 않는 모든 벡터 임베딩을 반환합니다.

        +

        특정 범위 내에서 반환되는 엔티티의 거리 또는 점수를 제한하여 검색 결과 관련성을 향상시킬 수 있습니다. Milvus에서 범위 검색은 쿼리 벡터와 가장 유사한 벡터가 포함된 동심원을 중심으로 두 개의 동심원을 그리는 방식으로 이루어집니다. 검색 요청은 두 원의 반지름을 지정하며, Milvus는 바깥쪽 원에 속하지만 안쪽 원에 속하지 않는 모든 벡터 임베딩을 반환합니다.

        범위 검색에 대한 자세한 내용은 범위 검색을 참조하세요.

      • 그룹 검색

        반환된 엔티티가 특정 필드에서 동일한 값을 갖는 경우 검색 결과가 벡터 공간에 있는 모든 벡터 임베딩의 분포를 나타내지 않을 수 있습니다. 검색 결과를 다양화하려면 그룹화 검색을 사용해 보세요.

        @@ -902,9 +902,9 @@ curl --request POST \​
      • 전체 텍스트 검색

        전체 텍스트 검색은 텍스트 데이터 세트에서 특정 용어나 구문이 포함된 문서를 검색한 다음 관련성에 따라 결과의 순위를 매기는 기능입니다. 이 기능은 정확한 용어를 놓칠 수 있는 시맨틱 검색의 한계를 극복하여 가장 정확하고 문맥과 연관성이 높은 결과를 얻을 수 있도록 해줍니다. 또한, 원시 텍스트 입력을 받아 벡터 임베딩을 수동으로 생성할 필요 없이 텍스트 데이터를 스파스 임베딩으로 자동 변환하여 벡터 검색을 간소화합니다.

        전체 텍스트 검색에 대한 자세한 내용은 전체 텍스트 검색을 참조하세요.

      • -
      • 키워드 검색

        -

        Milvus의 키워드 검색은 특정 용어를 기반으로 정확한 문서 검색을 가능하게 합니다. 이 기능은 주로 특정 조건을 충족하는 필터링 검색에 사용되며, 스칼라 필터링을 통합하여 쿼리 결과를 구체화함으로써 스칼라 기준을 충족하는 벡터 내에서 유사성 검색을 할 수 있습니다.

        -

        키워드 검색에 대한 자세한 내용은 키워드 검색을 참조하세요.

      • +
      • 텍스트 일치

        +

        Milvus의 텍스트 일치를 사용하면 특정 용어를 기반으로 정확한 문서 검색이 가능합니다. 이 기능은 주로 특정 조건을 충족하는 필터링 검색에 사용되며, 스칼라 필터링을 통합하여 쿼리 결과를 구체화함으로써 스칼라 기준을 충족하는 벡터 내에서 유사성 검색을 할 수 있습니다.

        +

        텍스트 일치에 대한 자세한 내용은 텍스트 일치를 참조하세요.

      • 파티션 키 사용

        메타데이터 필터링에 여러 개의 스칼라 필드를 포함시키고 다소 복잡한 필터링 조건을 사용하면 검색 효율성에 영향을 미칠 수 있습니다. 스칼라 필드를 파티션 키로 설정하고 검색 요청에 파티션 키와 관련된 필터링 조건을 사용하면 지정된 파티션 키 값에 해당하는 파티션 내에서 검색 범위를 제한하는 데 도움이 될 수 있습니다.

        파티션 키에 대한 자세한 내용은 파티션 키 사용을 참조하세요.

      • diff --git a/localization/v2.5.x/site/pt/adminGuide/upgrade-pulsar-v3.json b/localization/v2.5.x/site/pt/adminGuide/upgrade-pulsar-v3.json index 1283941a2..697f3144c 100644 --- a/localization/v2.5.x/site/pt/adminGuide/upgrade-pulsar-v3.json +++ b/localization/v2.5.x/site/pt/adminGuide/upgrade-pulsar-v3.json @@ -1 +1 @@ -{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n\"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\ncreationTimestamp: \"2024-11-22T08:06:59Z\"​\nfinalizers:​\n- milvus.milvus.io/finalizer​\ngeneration: 3​\nlabels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\ncomponents:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\ndependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\nenabled: false # set to false​\n# you may also clean up rest fields under pulsar field​\n# it's ok to keep them though.​\npulsarv3:​\nenabled: true​\n# append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations: null # this field should be removed or set to null​\nresourceVersion: null # this field should be removed or set to null​\nuid: null # this field should be removed or set to null​\nspec:​\ndependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Atualizando o Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Roteiro","href":"Roadmap","type":2,"isActive":false},{"label":"Procedimentos","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n \"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n ]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\n creationTimestamp: \"2024-11-22T08:06:59Z\"​\n finalizers:​\n - milvus.milvus.io/finalizer​\n generation: 3​\n labels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\n components:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\n dependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\n enabled: false # set to false​\n # you may also clean up rest fields under pulsar field​\n # it's ok to keep them though.​\npulsarv3:​\n enabled: true​\n # append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations: null # this field should be removed or set to null​\n resourceVersion: null # this field should be removed or set to null​\n uid: null # this field should be removed or set to null​\nspec:​\n dependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"Atualizando o Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"Roteiro","href":"Roadmap","type":2,"isActive":false},{"label":"Procedimentos","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/pt/adminGuide/upgrade-pulsar-v3.md b/localization/v2.5.x/site/pt/adminGuide/upgrade-pulsar-v3.md index 758da47e1..593d09faf 100644 --- a/localization/v2.5.x/site/pt/adminGuide/upgrade-pulsar-v3.md +++ b/localization/v2.5.x/site/pt/adminGuide/upgrade-pulsar-v3.md @@ -26,7 +26,7 @@ title: Atualizar a Pulsar em Milvus de V2 para V3
        1. O processo de atualização requer uma breve interrupção do serviço (normalmente demora alguns minutos a mais de dez minutos, dependendo da quantidade de dados).

        2. Antes da operação, é necessário impedir que todos os clientes em execução escrevam dados no Milvus. Caso contrário, os dados escritos podem perder-se.

        3. -
        4. Este artigo assume que o Milvus está instalado no espaço de nomes default e tem o nome my-release. Por favor, altere os parâmetros para o seu próprio espaço de nomes e nome de lançamento enquanto executa os comandos copiados desta página.

        5. +
        6. Este artigo pressupõe que o Milvus está instalado no espaço de nomes default e tem o nome my-release. Por favor, altere os parâmetros para o seu próprio espaço de nomes e nome de lançamento enquanto executa os comandos copiados desta página.

        7. Certifique-se de que o seu ambiente de trabalho tem permissões no namespace acima mencionado no cluster Kubernetes e que os seguintes comandos estão instalados.

          a. kubectl >= 1.20

          b. helm >= 3.14.0

          @@ -113,11 +113,11 @@ Forwarding from 127.

      Saída.

      {​
      -"segmentIDs": [​
      +  "segmentIDs": [​
           454097953998181000,​
           454097953999383600,​
           454097953998180800​
      -]​
      +  ]​
       }​
       
       
      @@ -223,15 +223,15 @@ head milvus.yaml -n 20
      apiVersion: milvus.io/v1beta1​
       kind: Milvus​
       metadata:​
      -annotations:​
      +  annotations:​
           milvus.io/dependency-values-merged: "true"​
           milvus.io/pod-service-label-added: "true"​
           milvus.io/querynode-current-group-id: "0"​
      -creationTimestamp: "2024-11-22T08:06:59Z"​
      -finalizers:​
      -- milvus.milvus.io/finalizer​
      -generation: 3​
      -labels:​
      +  creationTimestamp: "2024-11-22T08:06:59Z"​
      +  finalizers:​
      +  - milvus.milvus.io/finalizer​
      +  generation: 3​
      +  labels:​
           app: milvus​
           milvus.io/operator-version: 1.1.2​
       name: my-release​
      @@ -239,23 +239,23 @@ namespace: default​
       resourceVersion: "692217324"​
       uid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​
       spec:​
      -components:​
      +  components:​
       
       
    • Crie um arquivo patch.yaml com o seguinte conteúdo.

      # a patch to retain etcd & storage data and delete pulsar data while delete milvus​
       spec:​
      -dependencies:​
      +  dependencies:​
           etcd:​
      -    inCluster:​
      +      inCluster:​
               deletionPolicy: Retain​
               pvcDeletion: false​
           storage:​
      -    inCluster:​
      +      inCluster:​
               deletionPolicy: Retain​
               pvcDeletion: false​
           pulsar:​
      -    inCluster:​
      +      inCluster:​
               deletionPolicy: Delete​
               pvcDeletion: true​
       
      @@ -274,7 +274,7 @@ kubectl -n default get milvus my-release​
       kubectl -n default delete milvus my-release --wait=true
      -

      Saída: Observe que pode levar alguns minutos para o milvus parar graciosamente e para o operador excluir os volumes do pulsar.

      +

      Saída: Note que pode levar alguns minutos para que o milvus pare graciosamente e para que o operador exclua os volumes do pulsar.

      milvus.milvus.io "my-release" deleted​
       NAME         MODE      STATUS     UPDATED   AGE​
       my-release   cluster   Deleting   True      41m​
      @@ -291,10 +291,10 @@ milvus.milvus.io "my-release" deleted
       
       
    • -

      Iniciar Pulsar V3 e Milvus

      Nesta etapa, você precisa iniciar os pods Pulsar V3 e Milvus. Há duas seções separadas disponíveis:

      +

      Iniciar o Pulsar V3 e o Milvus

      Nesta etapa, você precisa iniciar os pods Pulsar V3 e Milvus. Há duas seções separadas disponíveis:

      • Para o utilizador do Helm

        -

        Se instalou o Milvus usando a tabela do Milvus Helm, vá para For Helm User.

      • +

        Se instalou o Milvus usando o gráfico do Milvus Helm, vá para For Helm User.

      • Para utilizadores do Milvus Operator

        Se instalou o Milvus usando a carta Milvus Operator, vá para Para o utilizador do Milvus Operator.

      @@ -302,12 +302,12 @@ milvus.milvus.io "my-release" deleted
    • Edite o values.yaml salvo no Passo anterior.

      # change the following:​
       pulsar:​
      -enabled: false # set to false​
      -# you may also clean up rest fields under pulsar field​
      -# it's ok to keep them though.​
      +  enabled: false # set to false​
      +  # you may also clean up rest fields under pulsar field​
      +  # it's ok to keep them though.​
       pulsarv3:​
      -enabled: true​
      -# append other values for pulsar v3 chart if needs​
      +  enabled: true​
      +  # append other values for pulsar v3 chart if needs​
       
       
    • Atualize seu repositório helm local

      @@ -368,13 +368,13 @@ my-release-pulsarv3-zookeeper-2 < apiVersion: milvus.io/v1beta1​ kind: Milvus​ metadata:​ -annotations: null # this field should be removed or set to null​ -resourceVersion: null # this field should be removed or set to null​ -uid: null # this field should be removed or set to null​ + annotations: null # this field should be removed or set to null​ + resourceVersion: null # this field should be removed or set to null​ + uid: null # this field should be removed or set to null​ spec:​ -dependencies:​ + dependencies:​ pulsar:​ - inCluster:​ + inCluster:​ chartVersion: pulsar-v3​ # delete all previous values for pulsar v2 and set it to null.​ # you may add additional values here for pulsar v3 if you're sure about it.​ diff --git a/localization/v2.5.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.5.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.json index 65ce6cfec..efea2ba53 100644 --- a/localization/v2.5.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.5.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Executar o Milvus no Kubernetes com o Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Visão geral","href":"Overview","type":2,"isActive":false},{"label":"Pré-requisitos","href":"Prerequisites","type":2,"isActive":false},{"label":"Instalar o Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Instalação online","href":"Online-install","type":2,"isActive":false},{"label":"Instalação offline","href":"Offline-install","type":2,"isActive":false},{"label":"Atualizar o cluster do Milvus em execução","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Desinstalar o Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"O que vem a seguir","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"Executar o Milvus no Kubernetes com o Helm","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"Visão geral","href":"Overview","type":2,"isActive":false},{"label":"Pré-requisitos","href":"Prerequisites","type":2,"isActive":false},{"label":"Instalar o Milvus Helm Chart","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"Instalação online","href":"Online-install","type":2,"isActive":false},{"label":"Instalação offline","href":"Offline-install","type":2,"isActive":false},{"label":"Atualizar o cluster do Milvus em execução","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"Desinstalar o Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"O que vem a seguir","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.5.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.md index fb2bd7ff9..faa2f8fb9 100644 --- a/localization/v2.5.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.5.x/site/pt/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -62,7 +62,7 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI standard (default) k8s.io/minikube-hostpath Delete Immediate false
    • Verifique os requisitos de hardware e software antes da instalação.

    • -
    • Antes de instalar o Milvus, recomenda-se a utilização da Milvus Sizing Tool para estimar os requisitos de hardware com base no tamanho dos dados. Isso ajuda a garantir o desempenho ideal e a alocação de recursos para a instalação do Milvus.

    • +
    • Antes de instalar o Milvus, é recomendável usar a Milvus Sizing Tool para estimar os requisitos de hardware com base no tamanho dos seus dados. Isso ajuda a garantir o desempenho ideal e a alocação de recursos para a instalação do Milvus.

    • Se encontrar algum problema ao puxar a imagem, contacte-nos em community@zilliz.com com detalhes sobre o problema, e nós forneceremos o suporte necessário.

      @@ -83,11 +83,11 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI >

      Antes de instalar o Milvus Helm Charts, é necessário adicionar o repositório Milvus Helm.

      -
      $ helm repo add milvus https://github.com/zilliztech/milvus-helm
      +
      $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
       

      O repositório do Milvus Helm Charts em https://github.com/milvus-io/milvus-helm foi arquivado e pode obter mais actualizações em https://github.com/zilliztech/milvus-helm da seguinte forma:

      -
      helm repo add zilliztech https://github.com/zilliztech/milvus-helm
      +
      helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
       helm repo update
       # upgrade existing helm release
       helm upgrade my-release zilliztech/milvus
      @@ -122,7 +122,7 @@ helm upgrade my-release zilliztech/milvus
         
      • O nome da versão deve conter apenas letras, números e traços. Não são permitidos pontos no nome da versão.
      • A linha de comando padrão instala a versão de cluster do Milvus durante a instalação do Milvus com o Helm. São necessárias outras definições durante a instalação do Milvus autónomo.
      • -
      • De acordo com o guia de migração de APIs obsoletas do Kubernetes, a versão da API policy/v1beta1 do PodDisruptionBudget não é mais servida a partir da versão 1.25. Sugere-se que você migre manifestos e clientes de API para usar a versão de API policy/v1.
        Como solução alternativa para usuários que ainda usam a versão da API policy/v1beta1 do PodDisruptionBudget no Kubernetes v1.25 e posterior, você pode executar o seguinte comando para instalar o Milvus:
        helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
      • +
      • De acordo com o guia de migração de APIs obsoletas do Kubernetes, a versão da API policy/v1beta1 do PodDisruptionBudget não é mais servida a partir da versão 1.25. Sugere-se que você migre manifestos e clientes de API para usar a versão de API policy/v1.
        Como solução alternativa para os usuários que ainda usam a versão da API policy/v1beta1 do PodDisruptionBudget no Kubernetes v1.25 e posterior, você pode executar o seguinte comando para instalar o Milvus:
        helm install my-release milvus/milvus --set pulsar.bookkeeper.pdb.usePolicy=false,pulsar.broker.pdb.usePolicy=false,pulsar.proxy.pdb.usePolicy=false,pulsar.zookeeper.pdb.usePolicy=false
      • Consulte Gráfico de Helm do Milvus e Helm para obter mais informações.
      @@ -275,8 +275,8 @@ $ helm upgrade my-release zilliztech/milvus
    • Pesquisa híbrida
    • Atualizar o Milvus usando o Helm Chart.

    • -
    • Escalar o cluster do Milvus.

    • -
    • Implante seu cluster Milvus em nuvens:

      +
    • Escalar seu cluster Milvus.

    • +
    • Implantar seu cluster Milvus em nuvens:

      • Amazon EKS
      • Google Cloud
      • diff --git a/localization/v2.5.x/site/pt/home/home.md b/localization/v2.5.x/site/pt/home/home.md index 89b15e62e..95beb5b58 100644 --- a/localization/v2.5.x/site/pt/home/home.md +++ b/localization/v2.5.x/site/pt/home/home.md @@ -113,8 +113,8 @@ Aqui aprenderá o que é o Milvus e como instalar, utilizar e implementar o Milv

        Nov 2024 - Lançamento do Milvus 2.5.0

          -
        • Adicionadas orientações sobre como efetuar uma pesquisa de texto completo.
        • -
        • Adicionada orientação sobre como conduzir a correspondência de palavras-chave.
        • +
        • Adicionada orientação sobre como conduzir uma pesquisa de texto completo.
        • +
        • Adicionada orientação sobre como conduzir a correspondência de texto.
        • Adicionada orientação sobre como ativar valores anuláveis e predefinidos.
        • Adicionadas descrições de analisadores.
        • Adicionadas descrições de índices bitmap.
        • diff --git a/localization/v2.5.x/site/pt/menuStructure/pt.json b/localization/v2.5.x/site/pt/menuStructure/pt.json index 0067d77fe..568ec4e85 100644 --- a/localization/v2.5.x/site/pt/menuStructure/pt.json +++ b/localization/v2.5.x/site/pt/menuStructure/pt.json @@ -286,6 +286,18 @@ "order": 8, "children": [] }, + { + "label": "Tipos métricos", + "id": "metric.md", + "order": 9, + "children": [] + }, + { + "label": "Nível de consistência", + "id": "consistency.md", + "order": 10, + "children": [] + }, { "label": "Réplica na memória", "id": "replica.md", @@ -602,31 +614,39 @@ ] }, { - "label": "Gerir índices", + "label": "Índices", "id": "manage_indexes", "order": 4, "isMenu": true, "children": [ { - "label": "Campos de vectores de índice", + "label": "Índices vectoriais", "id": "index-vector-fields.md", "order": 0, "children": [] }, { - "label": "Campos escalares de índice", - "id": "index-scalar-fields.md", + "label": "Índices escalares", + "id": "scalar-index", "order": 1, - "children": [] - }, - { - "label": "Índice BITMAP", - "id": "bitmap.md", - "order": 2, - "children": [] + "isMenu": true, + "children": [ + { + "label": "Campos escalares de índice", + "id": "index-scalar-fields.md", + "order": 1, + "children": [] + }, + { + "label": "Índice de bitmap", + "id": "bitmap.md", + "order": 2, + "children": [] + } + ] }, { - "label": "Índice com GPU", + "label": "Índices activados por GPU", "id": "index-with-gpu.md", "order": 3, "children": [] @@ -682,7 +702,7 @@ "children": [] }, { - "label": "Correspondência de palavras-chave", + "label": "Correspondência de texto", "id": "keyword-match.md", "order": 7, "children": [] @@ -699,30 +719,6 @@ "order": 9, "children": [] }, - { - "label": "Utilizar o mmap", - "id": "mmap.md", - "order": 10, - "children": [] - }, - { - "label": "Compactação de clusters", - "id": "clustering-compaction.md", - "order": 11, - "children": [] - }, - { - "label": "Nível de consistência", - "id": "consistency.md", - "order": 12, - "children": [] - }, - { - "label": "Tipos métricos", - "id": "metric.md", - "order": 13, - "children": [] - }, { "label": "Filtragem de metadados", "id": "boolean.md", @@ -736,26 +732,6 @@ "children": [] } ] - }, - { - "label": "Importação de dados", - "id": "data_import", - "order": 6, - "isMenu": true, - "children": [ - { - "label": "Preparar dados de origem", - "id": "prepare-source-data.md", - "order": 0, - "children": [] - }, - { - "label": "Importar dados", - "id": "import-data.md", - "order": 1, - "children": [] - } - ] } ] }, @@ -897,11 +873,31 @@ } ] }, + { + "label": "Importação de dados", + "id": "data_import", + "order": 5, + "isMenu": true, + "children": [ + { + "label": "Preparar dados de origem", + "id": "prepare-source-data.md", + "order": 0, + "children": [] + }, + { + "label": "Importar dados", + "id": "import-data.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Migração de Milvus", "id": "milvus_migration", "isMenu": true, - "order": 5, + "order": 6, "children": [ { "label": "Visão geral", @@ -1321,10 +1317,30 @@ } ] }, + { + "label": "Otimização do armazenamento", + "id": "storage_optimization", + "order": 10, + "isMenu": true, + "children": [ + { + "label": "Utilizar o mmap", + "id": "mmap.md", + "order": 0, + "children": [] + }, + { + "label": "Compactação de clusters", + "id": "clustering-compaction.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Segurança", "id": "security", - "order": 10, + "order": 11, "isMenu": true, "children": [ { diff --git a/localization/v2.5.x/site/pt/release_notes.md b/localization/v2.5.x/site/pt/release_notes.md index e753c808f..cb20c808e 100644 --- a/localization/v2.5.x/site/pt/release_notes.md +++ b/localization/v2.5.x/site/pt/release_notes.md @@ -50,17 +50,17 @@ title: Notas de lançamento

          WebUI de gestão de clusters (Beta)

          Para melhor suportar dados massivos e funcionalidades ricas, o design sofisticado do Milvus inclui várias dependências, numerosas funções de nó, estruturas de dados complexas e muito mais. Estes aspectos podem representar desafios para a utilização e manutenção.

          Milvus 2.5 introduz uma WebUI de Gerenciamento de Cluster embutida, reduzindo a dificuldade de manutenção do sistema através da visualização de informações complexas do ambiente de tempo de execução do Milvus. Isso inclui detalhes de bancos de dados e coleções, segmentos, canais, dependências, estado de saúde do nó, informações de tarefas, consultas lentas e muito mais.

          Correspondência de texto

          O Milvus 2.5 aproveita os analisadores e a indexação do Tantivy para o pré-processamento de texto e a construção de índices, suportando a correspondência precisa de linguagem natural de dados de texto com base em termos específicos. Esta funcionalidade é utilizada principalmente para pesquisa filtrada para satisfazer condições específicas e pode incorporar filtragem escalar para refinar os resultados da consulta, permitindo pesquisas de semelhança dentro de vectores que satisfaçam critérios escalares.

          -

          Para obter detalhes, consulte Correspondência de palavras-chave.

          +

          Para obter detalhes, consulte Correspondência de texto.

          Índice Bitmap

          Um novo índice de dados escalar foi adicionado à família Milvus. O índice BitMap utiliza uma matriz de bits, de comprimento igual ao número de linhas, para representar a existência de valores e acelerar as pesquisas.

          Os índices Bitmap têm sido tradicionalmente eficazes para campos de baixa cardinalidade, que têm um número modesto de valores distintos - por exemplo, uma coluna que contém informações de género com apenas dois valores possíveis: masculino e feminino.

          Para obter detalhes, consulte Índice de bitmap.

          -

          Nullable & Valor Padrão

          Milvus agora suporta a configuração de propriedades anuláveis e valores padrão para campos escalares que não sejam o campo de chave primária. Para campos escalares marcados como nullable=True, os utilizadores podem omitir o campo ao inserir dados; o sistema irá tratá-lo como um valor nulo ou valor por defeito (se definido) sem lançar um erro.

          +

          Nullable & Valor Padrão

          Milvus agora suporta a configuração de propriedades anuláveis e valores padrão para campos escalares que não sejam o campo de chave primária. Para campos escalares marcados como nullable=True, os utilizadores podem omitir o campo ao inserir dados; o sistema irá tratá-lo como um valor nulo ou valor padrão (se definido) sem lançar um erro.

          Os valores por defeito e as propriedades anuláveis proporcionam uma maior flexibilidade ao Milvus. Os utilizadores podem utilizar esta funcionalidade para campos com valores incertos ao criar colecções. Também simplifica a migração de dados de outros sistemas de base de dados para o Milvus, permitindo a manipulação de conjuntos de dados que contêm valores nulos, preservando as definições originais de valores por defeito.

          -

          Para mais pormenores, consulte Nullable & Default Value.

          +

          Para mais informações, consulte Nullable & Default Value.

          HNSW SQ/PQ/PRQ baseado em Faiss

          Através de uma estreita colaboração com a comunidade Faiss, o algoritmo HNSW em Faiss registou melhorias significativas tanto na funcionalidade como no desempenho. Por questões de estabilidade e manutenção, o Milvus 2.5 migrou oficialmente seu suporte ao HNSW da hnswlib para o Faiss.

          Baseado em Faiss, Milvus 2.5 suporta múltiplos métodos de quantização em HNSW para atender às necessidades de diferentes cenários: SQ (Scalar Quantizers), PQ (Product Quantizer), e PRQ (Product Residual Quantizer). SQ e PQ são mais comuns; SQ oferece um bom desempenho de consulta e velocidade de construção, enquanto PQ oferece uma melhor recuperação com o mesmo rácio de compressão. Muitas bases de dados vectoriais utilizam normalmente a quantização binária, que é uma forma simples de quantização SQ.

          PRQ é uma fusão de PQ e AQ (Quantizador Aditivo). Em comparação com o PQ, requer tempos de construção mais longos para proporcionar uma melhor recuperação, especialmente a taxas de compressão elevadas, dizendo compressão binária.

          -

          Compactação por agrupamento (Beta)

          O Milvus 2.5 introduz o Clustering Compaction para acelerar as pesquisas e reduzir os custos em grandes colecções. Ao especificar um campo escalar como uma chave de agrupamento, os dados são redistribuídos por intervalo para otimizar o armazenamento e a recuperação. Agindo como um índice global, esse recurso permite que o Milvus remova eficientemente os dados durante as consultas com base em metadados de agrupamento, melhorando o desempenho da pesquisa quando filtros escalares são aplicados.

          +

          Compactação por agrupamento (Beta)

          O Milvus 2.5 introduz o Clustering Compaction para acelerar as pesquisas e reduzir os custos em grandes colecções. Ao especificar um campo escalar como uma chave de agrupamento, os dados são redistribuídos por intervalo para otimizar o armazenamento e a recuperação. Agindo como um índice global, este recurso permite que o Milvus retire eficientemente os dados durante as consultas baseadas em metadados de agrupamento, melhorando o desempenho da pesquisa quando filtros escalares são aplicados.

          Para obter detalhes, consulte Compactação de clustering.

          Outros recursos

          Nó de fluxo contínuo (Beta)

          O Milvus 2.5 introduz um novo componente chamado nó de streaming, que fornece serviços de Write-Ahead Logging (WAL). Isto permite ao Milvus obter consenso antes e depois de ler e escrever canais, desbloqueando novas caraterísticas, funcionalidades e optimizações. Esta funcionalidade está desactivada por defeito no Milvus 2.5 e estará oficialmente disponível na versão 3.0.

          Suporte a IPv6

          Milvus agora suporta IPv6, permitindo maior conetividade e compatibilidade de rede.

          @@ -76,7 +76,7 @@ title: Notas de lançamento

        Melhorias

        Otimização da eliminação

        Melhoria da velocidade e redução da utilização de memória para eliminações em grande escala, optimizando a utilização de bloqueios e a gestão de memória.

        Atualização de dependências

        Atualizada para ETCD 3.5.16 e Pulsar 3.0.7 LTS, corrigindo CVEs existentes e melhorando a segurança. Nota: A atualização para o Pulsar 3.x não é compatível com as versões anteriores 2.x.

        -

        Para os utilizadores que já têm uma implementação Milvus em funcionamento, é necessário atualizar os componentes ETCD e Pulsar antes de poder utilizar as novas caraterísticas e funções. Para obter detalhes, consulte Atualizar a Pulsar de 2.x para 3.x

        +

        Para os utilizadores que já têm uma implementação Milvus a funcionar, é necessário atualizar os componentes ETCD e Pulsar antes de poder utilizar as novas caraterísticas e funções. Para obter detalhes, consulte Atualizar a Pulsar de 2.x para 3.x

        Armazenamento local V2

        Introduziu um novo formato de arquivo local no Milvus 2.5, melhorando a eficiência de carregamento e consulta para dados escalares, reduzindo a sobrecarga de memória e estabelecendo as bases para futuras otimizações.

        Otimização da análise de expressões

        Melhoria da análise de expressões através da implementação de cache para expressões repetidas, atualização do ANTLR e otimização do desempenho das cláusulas NOT IN.

        Desempenho aprimorado da simultaneidade de DDL

        Otimizado o desempenho de simultaneidade das operações da Linguagem de Definição de Dados (DDL).

        diff --git a/localization/v2.5.x/site/pt/tutorials/hybrid_search_with_milvus.md b/localization/v2.5.x/site/pt/tutorials/hybrid_search_with_milvus.md index a2ff58629..d8cce8497 100644 --- a/localization/v2.5.x/site/pt/tutorials/hybrid_search_with_milvus.md +++ b/localization/v2.5.x/site/pt/tutorials/hybrid_search_with_milvus.md @@ -25,7 +25,7 @@ title: Pesquisa híbrida com Milvus

        O Milvus suporta métodos de recuperação densos, esparsos e híbridos:

        • Recuperação Densa: Utiliza o contexto semântico para entender o significado por trás das consultas.
        • -
        • Recuperação esparsa: Dá ênfase à correspondência de palavras-chave para encontrar resultados com base em termos específicos, equivalente à pesquisa de texto completo.
        • +
        • Recuperação esparsa: Dá ênfase à correspondência de texto para encontrar resultados com base em termos específicos, equivalente à pesquisa de texto completo.
        • Recuperação híbrida: Combina as abordagens Densa e Esparsa, capturando todo o contexto e palavras-chave específicas para obter resultados de pesquisa abrangentes.

        Ao integrar estes métodos, a Pesquisa Híbrida Milvus equilibra as semelhanças semânticas e lexicais, melhorando a relevância geral dos resultados da pesquisa. Este bloco de notas irá percorrer o processo de configuração e utilização destas estratégias de recuperação, realçando a sua eficácia em vários cenários de pesquisa.

        @@ -71,7 +71,7 @@ Inference Embeddings: 100%|██████████| 32/32 [01:59<00:00

        Configurar a coleção e o índice Milvus

        Vamos configurar a coleção Milvus e criar índices para os campos vectoriais.

          -
        • Definir o uri como um ficheiro local, por exemplo, "./milvus.db", é o método mais conveniente, uma vez que utiliza automaticamente o Milvus Lite para armazenar todos os dados neste ficheiro.
        • +
        • Definir o uri como um ficheiro local, por exemplo "./milvus.db", é o método mais conveniente, uma vez que utiliza automaticamente o Milvus Lite para armazenar todos os dados neste ficheiro.
        • Se tiver uma grande escala de dados, digamos mais de um milhão de vectores, pode configurar um servidor Milvus mais eficiente em Docker ou Kubernetes. Nesta configuração, utilize o uri do servidor, por exemplo, http://localhost:19530, como o seu uri.
        • Se pretender utilizar o Zilliz Cloud, o serviço de nuvem totalmente gerido para o Milvus, ajuste o uri e o token, que correspondem ao Public Endpoint e à chave API no Zilliz Cloud.
        diff --git a/localization/v2.5.x/site/pt/userGuide/collections/manage-collections.md b/localization/v2.5.x/site/pt/userGuide/collections/manage-collections.md index 507f83f78..a402ed13c 100644 --- a/localization/v2.5.x/site/pt/userGuide/collections/manage-collections.md +++ b/localization/v2.5.x/site/pt/userGuide/collections/manage-collections.md @@ -17,7 +17,7 @@ title: Explicação da coleção d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

        No Milvus, pode criar várias colecções para gerir os seus dados e inserir os seus dados como entidades nas colecções. A coleção e a entidade são semelhantes às tabelas e registos nas bases de dados relacionais. Esta página ajuda-o a aprender sobre a coleção e conceitos relacionados.

        +

        No Milvus, pode criar várias colecções para gerir os seus dados e inserir os seus dados como entidades nas colecções. A coleção e a entidade são semelhantes a tabelas e registos em bases de dados relacionais. Esta página ajuda-o a aprender sobre a coleção e conceitos relacionados.

        Coleção

        À semelhança do campo primário numa base de dados relacional, uma coleção tem um campo primário para distinguir uma entidade de outras. Cada valor no campo primário é globalmente único e corresponde a uma entidade específica.

        +

        À semelhança do campo primário numa base de dados relacional, uma coleção tem um campo primário para distinguir uma entidade das outras. Cada valor no campo primário é globalmente único e corresponde a uma entidade específica.

        Como mostrado no gráfico acima, o campo denominado id serve como campo primário, e o primeiro ID 0 corresponde a uma entidade intitulada A taxa de mortalidade do coronavírus não é importante. Não haverá nenhuma outra entidade que tenha o campo primário 0.

        Um campo primário aceita apenas números inteiros ou cadeias de caracteres. Ao inserir entidades, você deve incluir os valores do campo primário por padrão. No entanto, se tiver ativado o AutoId aquando da criação da coleção, o Milvus irá gerar esses valores aquando da inserção de dados. Nesse caso, exclua os valores do campo primário das entidades a serem inseridas.

        Para mais informações, consulte Primary Field & AutoID.

        @@ -130,7 +130,7 @@ title: Explicação da coleção d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

        Carregar uma coleção é o pré-requisito para realizar pesquisas e consultas por similaridade nas coleções. Ao carregar uma coleção, o Milvus carrega todos os arquivos de índice e os dados brutos em cada campo na memória para uma resposta rápida às pesquisas e consultas.

        +

        Carregar uma coleção é o pré-requisito para realizar pesquisas e consultas por similaridade nas coleções. Quando você carrega uma coleção, o Milvus carrega todos os arquivos de índice e os dados brutos em cada campo na memória para uma resposta rápida às pesquisas e consultas.

        As pesquisas e consultas são operações que consomem muita memória. Para poupar custos, é aconselhável libertar as colecções que não estão a ser utilizadas.

        Para obter mais detalhes, consulte Carregar e liberar.

        Pesquisa e consulta

        Depois de criar índices e carregar a coleção, pode iniciar uma pesquisa por semelhança alimentando um ou vários vectores de consulta. Por exemplo, ao receber a representação vetorial da sua consulta transportada num pedido de pesquisa, o Milvus utiliza o tipo de métrica especificado para medir a semelhança entre o vetor de consulta e os vectores da coleção de destino antes de devolver os que são semanticamente semelhantes à consulta.

        Também é possível incluir a filtragem de metadados nas pesquisas e consultas para melhorar a relevância dos resultados. Note que as condições de filtragem de metadados são obrigatórias nas consultas, mas opcionais nas pesquisas.

        Para obter detalhes sobre os tipos de métricas aplicáveis, consulte Tipos de métricas.

        -

        Para obter mais informações sobre pesquisas e consultas, consulte os artigos do capítulo Pesquisa e classificação, entre os quais se encontram as funcionalidades básicas.

        +

        Para obter mais informações sobre pesquisas e consultas, consulte os artigos no capítulo Pesquisa e classificação, entre os quais se encontram as funcionalidades básicas.

        -

        Além disso, o Milvus também fornece melhorias para melhorar o desempenho e a eficiência da pesquisa. Estão desactivadas por defeito e pode activá-las e utilizá-las de acordo com os seus requisitos de serviço. São elas

        +

        Além disso, o Milvus também fornece melhorias para melhorar o desempenho e a eficiência da pesquisa. Estas estão desactivadas por defeito e pode activá-las e utilizá-las de acordo com os seus requisitos de serviço. São elas

        • Usar chave de partição

        • Usar mmap

        • @@ -236,7 +236,7 @@ title: Explicação da coleção d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

          É possível definir funções para o Milvus para derivar campos na criação da coleção. Por exemplo, a função de pesquisa de texto completo utiliza a função definida pelo utilizador para derivar um campo de vetor esparso de um campo varchar específico. Para obter mais informações sobre a pesquisa de texto completo, consulte Pesquisa de texto completo.

          +

          É possível definir funções para o Milvus derivar campos aquando da criação da coleção. Por exemplo, a função de pesquisa de texto completo utiliza a função definida pelo utilizador para derivar um campo de vetor esparso de um campo varchar específico. Para obter mais informações sobre a pesquisa de texto completo, consulte Pesquisa de texto completo.

          Nível de consistência

          Depois de criar seu índice GPU, a próxima etapa é preparar os parâmetros de pesquisa antes de realizar uma pesquisa.

          +

          Depois de criar o índice GPU, a próxima etapa é preparar os parâmetros de pesquisa antes de realizar uma pesquisa.

          Preparar parâmetros de pesquisa

          Abaixo estão exemplos de configurações para diferentes tipos de índice:

          • ÍndiceGPU_BRUTE_FORCE

            @@ -162,7 +162,7 @@ collection.create_index(
    • Os principais parâmetros de pesquisa incluem:

        -
      • itopk_size: Determina o tamanho dos resultados intermediários mantidos durante a pesquisa. Um valor maior pode melhorar a recuperação às custas do desempenho da pesquisa. Ele deve ser pelo menos igual ao valor final do top-k(limite) e é tipicamente uma potência de 2 (por exemplo, 16, 32, 64, 128).

      • +
      • itopk_size: Determina o tamanho dos resultados intermediários mantidos durante a pesquisa. Um valor maior pode melhorar a recuperação à custa do desempenho da pesquisa. Ele deve ser pelo menos igual ao valor final do top-k(limite) e é tipicamente uma potência de 2 (por exemplo, 16, 32, 64, 128).

      • search_width: Especifica o número de pontos de entrada no gráfico CAGRA durante a pesquisa. O aumento deste valor pode melhorar a recuperação, mas pode afetar o desempenho da pesquisa.

      • min_iterações / max_iterações: Estes parâmetros controlam o processo de iteração da pesquisa. Por padrão, eles são definidos como 0, e o CAGRA determina automaticamente o número de iterações com base em itopk_size e search_width. O ajuste manual desses valores pode ajudar a equilibrar o desempenho e a precisão.

      • team_size: Especifica o número de threads CUDA usadas para calcular a distância métrica na GPU. Os valores comuns são uma potência de 2 até 32 (por exemplo, 2, 4, 8, 16, 32). Tem um impacto menor no desempenho da pesquisa. O valor predefinido é 0, em que o Milvus seleciona automaticamente o team_size com base na dimensão do vetor.

      • @@ -203,7 +203,7 @@ collection.search(

        Ao usar índices de GPU, esteja ciente de certas restrições:

          -
        • Para GPU_IVF_FLAT, o valor máximo para o limite é 256.

        • +
        • Para GPU_IVF_FLAT, o valor máximo para o limite é 1024.

        • Para GPU_IVF_PQ e GPU_CAGRA, o valor máximo para o limite é 1024.

        • Embora não exista um limite definido para o limite em GPU_BRUTE_FORCE, recomenda-se que não exceda 4096 para evitar potenciais problemas de desempenho.

        • Atualmente, os índices GPU não suportam a distância COSINE. Se a distância COSINE for necessária, os dados devem ser normalizados primeiro e, em seguida, a distância do produto interno (IP) pode ser usada como um substituto.

        • diff --git a/localization/v2.5.x/site/pt/userGuide/schema/analyzer/analyzer-overview.md b/localization/v2.5.x/site/pt/userGuide/schema/analyzer/analyzer-overview.md index 5bc299e58..7dc476ccd 100644 --- a/localization/v2.5.x/site/pt/userGuide/schema/analyzer/analyzer-overview.md +++ b/localization/v2.5.x/site/pt/userGuide/schema/analyzer/analyzer-overview.md @@ -24,12 +24,12 @@ summary: >- >

          No processamento de texto, um analisador é um componente crucial que converte o texto em bruto num formato estruturado e pesquisável. Cada analisador é normalmente composto por dois elementos principais: tokenizador e filtro. Juntos, eles transformam o texto de entrada em tokens, refinam esses tokens e preparam-nos para uma indexação e recuperação eficientes.

          -

          No Milvus, os analisadores são configurados durante a criação da coleção quando se adicionam os campos VARCHAR ao esquema da coleção. Os símbolos produzidos por um analisador podem ser utilizados para construir um índice para correspondência de palavras-chave ou convertidos em embeddings esparsos para pesquisa de texto completo. Para obter mais informações, consulte Correspondência de palavras-chave ou Pesquisa de texto completo.

          +

          No Milvus, os analisadores são configurados durante a criação da coleção quando se adicionam os campos VARCHAR ao esquema da coleção. Os símbolos produzidos por um analisador podem ser usados para construir um índice para correspondência de texto ou convertidos em embeddings esparsos para pesquisa de texto completo. Para obter mais informações, consulte Correspondência de texto ou Pesquisa de texto completo.

          A utilização de analisadores pode afetar o desempenho.

          • Pesquisa de texto completo: Para a pesquisa de texto completo, os canais DataNode e QueryNode consomem dados mais lentamente porque precisam aguardar a conclusão da tokenização. Como resultado, os dados recém-ingressados levam mais tempo para ficarem disponíveis para pesquisa.

          • -
          • Correspondência de palavras-chave: Para a correspondência de palavras-chave, a criação de índices também é mais lenta, pois a tokenização precisa ser concluída antes que um índice possa ser criado.

          • +
          • Correspondência de texto: Para a correspondência de texto, a criação de índices também é mais lenta, pois a tokenização precisa ser concluída antes que um índice possa ser criado.

          Anatomia de um analisador

          Um analisador no Milvus consiste exatamente num tokenizador e em zero ou mais filtros.

            -
          • Tokenizador: O tokenizador divide o texto de entrada em unidades discretas chamadas tokens. Estes tokens podem ser palavras ou frases, dependendo do tipo de tokenizador.

          • +
          • Tokenizador: O tokenizador divide o texto de entrada em unidades discretas chamadas tokens. Esses tokens podem ser palavras ou frases, dependendo do tipo de tokenizador.

          • Filtros: Os filtros podem ser aplicados aos tokens para refiná-los ainda mais, por exemplo, tornando-os minúsculos ou removendo palavras comuns.

          O fluxo de trabalho abaixo mostra como um analisador processa o texto.

          @@ -101,7 +101,7 @@ summary: >-

          O Milvus oferece os seguintes analisadores incorporados, cada um dos quais pode ser utilizado diretamente especificando o seu nome como parâmetro type.

          • standard: Adequado para processamento de texto de uso geral, aplicando tokenização padrão e filtragem de letras minúsculas.

          • -
          • english: Optimizado para texto em inglês, com suporte para stop words em inglês.

          • +
          • english: Optimizado para texto em língua inglesa, com suporte para palavras de paragem em inglês.

          • chinese: Especializado para o processamento de texto chinês, incluindo tokenização adaptada às estruturas da língua chinesa.

          Analisador personalizado

          Para um processamento de texto mais avançado, os analisadores personalizados no Milvus permitem-lhe construir um pipeline de tratamento de texto personalizado, especificando tanto um tokenizador como filtros. Esta configuração é ideal para casos de utilização especializados em que é necessário um controlo preciso.

          @@ -122,7 +122,7 @@ summary: >-
        • Filtros incorporados: Pré-configurados pelo Milvus, requerem uma configuração mínima. Pode utilizar estes filtros imediatamente, especificando os seus nomes. Os filtros abaixo são integrados para uso direto.

          • lowercase: Converte o texto em minúsculas, garantindo uma correspondência sem distinção entre maiúsculas e minúsculas. Para obter detalhes, consulte Minúsculas.

          • -
          • asciifolding: Converte caracteres não-ASCII em equivalentes ASCII, simplificando o manuseamento de texto multilingue. Para mais pormenores, consulte Dobragem ASCII.

          • +
          • asciifolding: Converte caracteres não-ASCII em equivalentes ASCII, simplificando o tratamento de texto multilingue. Para mais pormenores, consulte Dobragem ASCII.

          • alphanumonly: Mantém apenas os caracteres alfanuméricos, removendo os outros. Para mais pormenores, consulte Apenas alfanuméricos.

          • cnalphanumonly: Remove tokens que contêm quaisquer caracteres que não sejam caracteres chineses, letras inglesas ou dígitos. Para mais pormenores, consulte Cnalphanumonly.

          • cncharonly: Remove tokens que contêm quaisquer caracteres não chineses. Para mais pormenores, consulte Cncharonly.

          • diff --git a/localization/v2.5.x/site/pt/userGuide/schema/sparse_vector.json b/localization/v2.5.x/site/pt/userGuide/schema/sparse_vector.json index 20403822a..919443567 100644 --- a/localization/v2.5.x/site/pt/userGuide/schema/sparse_vector.json +++ b/localization/v2.5.x/site/pt/userGuide/schema/sparse_vector.json @@ -1 +1 @@ -{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"Vetor esparso","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"Visão geral","href":"Overview​","type":2,"isActive":false},{"label":"Usar vetores esparsos no Milvus","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n auto_id=True,​\n enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"pk\")​\n .dataType(DataType.VarChar)​\n .isPrimaryKey(true)​\n .autoID(true)​\n .maxLength(100)​\n .build());​\n​\nschema.addField(AddFieldReq.builder()​\n .fieldName(\"sparse_vector\")​\n .dataType(DataType.SparseFloatVector)​\n .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n {​\n name: \"metadata\",​\n data_type: DataType.JSON,​\n },​\n {​\n name: \"pk\",​\n data_type: DataType.Int64,​\n is_primary_key: true,​\n },​\n {​\n name: \"sparse_vector\",​\n data_type: DataType.SparseFloatVector,​\n }​\n];​\n​\n\n","export primaryField='{​\n \"fieldName\": \"pk\",​\n \"dataType\": \"VarChar\",​\n \"isPrimary\": true,​\n \"elementTypeParams\": {​\n \"max_length\": 100​\n }​\n}'​\n​\nexport vectorField='{​\n \"fieldName\": \"sparse_vector\",​\n \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n \\\"autoID\\\": true,​\n \\\"fields\\\": [​\n $primaryField,​\n $vectorField​\n ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse_vector\",​\n index_name=\"sparse_inverted_index\",​\n index_type=\"SPARSE_INVERTED_INDEX\",​\n metric_type=\"IP\",​\n params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n .fieldName(\"sparse_vector\")​\n .indexName(\"sparse_inverted_index\")​\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n .metricType(IndexParam.MetricType.IP)​\n .extraParams(extraParams)​\n .build());​\n\n","const indexParams = await client.createIndex({​\n index_name: 'sparse_inverted_index',​\n field_name: 'sparse_vector',​\n metric_type: MetricType.IP,​\n index_type: IndexType.SPARSE_WAND,​\n params: {​\n drop_ratio_build: 0.2,​\n },​\n});​\n\n","export indexParams='[​\n {​\n \"fieldName\": \"sparse_vector\",​\n \"metricType\": \"IP\",​\n \"indexName\": \"sparse_inverted_index\",​\n \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n \"params\":{\"drop_ratio_build\": 0.2}​\n }​\n ]'​\n\n","client.create_collection(​\n collection_name=\"my_sparse_collection\",​\n schema=schema,​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n .uri(\"http://localhost:19530\")​\n .build());​\n ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .collectionSchema(schema)​\n .indexParams(indexes)​\n .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n collection_name: 'my_sparse_collection',​\n schema: schema,​\n index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n \\\"schema\\\": $schema,​\n \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n collection_name=\"my_sparse_collection\",​\n data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(1L, 0.5f);​\n sparse.put(100L, 0.3f);​\n sparse.put(500L, 0.8f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n{​\n JsonObject row = new JsonObject();​\n SortedMap sparse = new TreeMap<>();​\n sparse.put(10L, 0.1f);​\n sparse.put(200L, 0.7f);​\n sparse.put(1000L, 0.9f);​\n row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(rows)​\n .build());​\n\n","const data = [​\n { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n collection_name: \"my_sparse_collection\",​\n data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"data\": [​\n {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}} ​\n ],​\n \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n \"params\": {\"drop_ratio_search\": 0.2}, # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n collection_name=\"my_sparse_collection\",​\n data=query_vector,​\n limit=3,​\n output_fields=[\"pk\"],​\n search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n .collectionName(\"my_sparse_collection\")​\n .data(Collections.singletonList(queryVector))​\n .annsField(\"sparse_vector\")​\n .searchParams(searchParams)​\n .topK(3)​\n .outputFields(Collections.singletonList(\"pk\"))​\n .build());​\n ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n collection_name: 'my_sparse_collection',​\n data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n limit: 3,​\n output_fields: ['pk'],​\n params: {​\n drop_ratio_search: 0.2​\n }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n \"collectionName\": \"my_sparse_collection\",​\n \"data\": [​\n {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n ],​\n \"annsField\": \"sparse_vector\",​\n \"limit\": 3,​\n \"searchParams\":{​\n \"params\":{\"drop_ratio_search\": 0.2}​\n },​\n \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"Vetor esparso","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"Visão geral","href":"Overview​","type":2,"isActive":false},{"label":"Usar vetores esparsos no Milvus","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false},{"label":"Limites","href":"Limits","type":2,"isActive":false},{"label":"PERGUNTAS FREQUENTES","href":"FAQ","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/pt/userGuide/schema/sparse_vector.md b/localization/v2.5.x/site/pt/userGuide/schema/sparse_vector.md index 9b8ad970d..d8e3bd4ef 100644 --- a/localization/v2.5.x/site/pt/userGuide/schema/sparse_vector.md +++ b/localization/v2.5.x/site/pt/userGuide/schema/sparse_vector.md @@ -52,7 +52,7 @@ summary: >- Spare vector representation Representação de vectores esparsos

            Os vectores esparsos podem ser gerados utilizando vários métodos, como o TF-IDF (Term Frequency-Inverse Document Frequency) e o BM25 no processamento de texto. Além disso, o Milvus oferece métodos convenientes para ajudar a gerar e processar vectores esparsos. Para obter detalhes, consulte Embeddings.

            -

            Para dados de texto, o Milvus também fornece capacidades de pesquisa de texto completo, permitindo-lhe efetuar pesquisas vectoriais diretamente em dados de texto em bruto sem utilizar modelos de incorporação externos para gerar vectores esparsos. Para obter mais informações, consulte Pesquisa de texto completo.

            +

            Para dados de texto, Milvus também fornece capacidades de pesquisa de texto completo, permitindo-lhe realizar pesquisas vectoriais diretamente em dados de texto em bruto sem utilizar modelos de incorporação externos para gerar vectores esparsos. Para obter mais informações, consulte Pesquisa de texto completo.

            Após a vectorização, os dados podem ser armazenados no Milvus para gestão e recuperação de vectores. O diagrama abaixo ilustra o processo básico.

            @@ -116,7 +116,7 @@ sparse_vector = [[(1,

          -

          Adicionar campo vetorial

          Para utilizar vectores esparsos em Milvus, defina um campo para armazenar vectores esparsos ao criar uma coleção. Este processo inclui.

          +

          Adicionar campo vetorial

          Para utilizar vectores esparsos no Milvus, defina um campo para armazenar vectores esparsos ao criar uma coleção. Este processo inclui.

          1. Definir datatype como o tipo de dados de vetor esparso suportado, SPARSE_FLOAT_VECTOR.

          2. Não é necessário especificar a dimensão.

          3. @@ -208,7 +208,7 @@ schema.addField(AddFieldReq.builder()​ }"​
            -

            Neste exemplo, é adicionado um campo de vetor chamado sparse_vector para armazenar vectores esparsos. O tipo de dados deste campo é SPARSE_FLOAT_VECTOR.

            +

            Neste exemplo, um campo de vetor chamado sparse_vector é adicionado para armazenar vectores esparsos. O tipo de dados deste campo é SPARSE_FLOAT_VECTOR.

            Definir parâmetros de índice para o campo de vetor

            O processo de criação de um índice para vectores esparsos é semelhante ao dos vectores densos, mas com diferenças no tipo de índice especificado (index_type), na métrica de distância (metric_type) e nos parâmetros de índice (params).

            @@ -262,9 +262,9 @@ indexes.add(

            No exemplo acima.

              -
            • Um índice do tipo SPARSE_INVERTED_INDEX é criado para o vetor esparso. Para vectores esparsos, pode especificar SPARSE_INVERTED_INDEX ou SPARSE_WAND. Para obter detalhes, consulte Índices de vetor esparso.

            • +
            • Um índice do tipo SPARSE_INVERTED_INDEX é criado para o vetor esparso. Para vectores esparsos, pode especificar SPARSE_INVERTED_INDEX ou SPARSE_WAND. Para obter detalhes, consulte Índices de vetor esparso.

            • Para vectores esparsos, metric_type suporta apenas IP (Inner Product), utilizado para medir a semelhança entre dois vectores esparsos. Para obter mais informações sobre similaridade, consulte Tipos de métrica.

            • -
            • drop_ratio_build é um parâmetro de índice opcional específico para vectores esparsos. Controla a proporção de pequenos valores de vetor excluídos durante a construção do índice. Por exemplo, com {"drop_ratio_build": 0.2}, os 20% mais pequenos dos valores de vetor serão excluídos durante a criação do índice, reduzindo o esforço computacional durante as pesquisas.

            • +
            • drop_ratio_build é um parâmetro de índice opcional especificamente para vectores esparsos. Controla a proporção de pequenos valores de vetor excluídos durante a construção do índice. Por exemplo, com {"drop_ratio_build": 0.2}, os 20% mais pequenos dos valores de vetor serão excluídos durante a criação do índice, reduzindo o esforço computacional durante as pesquisas.

            Criar coleção

            Quando as definições do vetor esparso e do índice estiverem concluídas, pode criar uma coleção que contenha vectores esparsos. O exemplo abaixo utiliza o método create_collection para criar uma coleção denominada my_sparse_collection.

            @@ -479,3 +479,60 @@ sparse.put(1000L,

            Para obter mais informações sobre os parâmetros de pesquisa de similaridade, consulte Pesquisa ANN básica.

            +

            Limites

            Ao usar vetores esparsos no Milvus, considere os seguintes limites:

            +
              +
            • Atualmente, apenas a métrica de distância IP é suportada para vetores esparsos. A elevada dimensionalidade dos vectores esparsos torna a distância L2 e cosseno impraticável.

            • +
            • Para campos de vectores esparsos, apenas são suportados os tipos de índice SPARSE_INVERTED_INDEX e SPARSE_WAND.

            • +
            • Os tipos de dados suportados para vectores esparsos:

              +
                +
              • A parte da dimensão deve ser um inteiro de 32 bits sem sinal;
              • +
              • A parte do valor pode ser um número de ponto flutuante de 32 bits não negativo.
              • +
            • +
            • Os vectores esparsos têm de cumprir os seguintes requisitos para inserção e pesquisa:

              +
                +
              • Pelo menos um valor no vetor é diferente de zero;
              • +
              • Os índices do vetor são não-negativos.
              • +
            • +
            +

            PERGUNTAS FREQUENTES

              +
            • Pode explicar a diferença entre SPARSE_INVERTED_INDEX e SPARSE_WAND, e como posso escolher entre eles?

              +

              O SPARSE_INVERTED_INDEX é um índice invertido tradicional, enquanto o SPARSE_WAND utiliza o algoritmo Weak-AND para reduzir o número de avaliações de distância IP completas durante a pesquisa. O SPARSE_WAND é normalmente mais rápido, mas o seu desempenho pode diminuir com o aumento da densidade do vetor. Para escolher entre eles, realize experiências e benchmarks com base no seu conjunto de dados e caso de utilização específicos.

            • +
            • Como devo escolher os parâmetros drop_ratio_build e drop_ratio_search?

              +

              A escolha de drop_ratio_build e drop_ratio_search depende das caraterísticas dos seus dados e dos seus requisitos de latência/rendimento e precisão da pesquisa.

            • +
            • A dimensão de um embedding esparso pode ser qualquer valor discreto dentro do espaço uint32?

              +

              Sim, com uma exceção. A dimensão de uma incorporação esparsa pode ser qualquer valor no intervalo de [0, maximum of uint32). Isso significa que você não pode usar o valor máximo de uint32.

            • +
            • As pesquisas em segmentos crescentes são conduzidas através de um índice ou por força bruta?

              +

              As pesquisas em segmentos crescentes são realizadas através de um índice do mesmo tipo que o índice do segmento selado. Para novos segmentos crescentes antes de o índice ser construído, é usada uma pesquisa de força bruta.

            • +
            • É possível ter vetores esparsos e densos em uma única coleção?

              +

              Sim, com suporte a vários tipos de vetores, é possível criar coleções com colunas de vetores esparsos e densos e executar pesquisas híbridas nelas.

            • +
            diff --git a/localization/v2.5.x/site/pt/userGuide/search-query-get/boolean.md b/localization/v2.5.x/site/pt/userGuide/search-query-get/boolean.md index cae55a725..e9ee0c70f 100644 --- a/localization/v2.5.x/site/pt/userGuide/search-query-get/boolean.md +++ b/localization/v2.5.x/site/pt/userGuide/search-query-get/boolean.md @@ -700,9 +700,9 @@ curl --request POST \​

            Match operators​

            Match operators include:​

            • like: Match constants or prefixes (prefix%), infixes (%infix%), and suffixes (%suffix) within constants. It relies on a brute-force search mechanism using wildcards and does not involve text tokenization. While it can achieve exact matches, its query efficiency is relatively low, making it suitable for simple matching tasks or queries on smaller datasets.​

            • -
            • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​ -

              -

              To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to ​Keyword Match.​

              +
            • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​

              +

              +

              To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to Text Match.​

            Example 1: Apply filter on scalar field​

            The following example demonstrates how to filter products whose color is red. In this case, you can quickly filter all red products by matching the prefix 'red%’. Similarly, you can use the expression color in ['red_7025’, 'red_4794’, ‘red_9392’] to filter all red products. However, when the data is more complex, we recommend using the like operator for more efficient filtering.​

            @@ -857,8 +857,8 @@ curl --request POST \​ ]​ -

            Example 3: Keyword match on VARCHAR fields​

            The TEXT_MATCH expression is used for keyword match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to ​Keyword Match.​

            -

            The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the keyword "Apple" or "iPhone":​

            +

            Example 3: Text match on VARCHAR fields​

            The TEXT_MATCH expression is used for text match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to Text Match.​

            +

            The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the term "Apple" or "iPhone":​

            Python Java diff --git a/localization/v2.5.x/site/pt/userGuide/search-query-get/full-text-search.json b/localization/v2.5.x/site/pt/userGuide/search-query-get/full-text-search.json index f854c9aa0..11a17456a 100644 --- a/localization/v2.5.x/site/pt/userGuide/search-query-get/full-text-search.json +++ b/localization/v2.5.x/site/pt/userGuide/search-query-get/full-text-search.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","MilvusClient.insert('demo', [​\n {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​\n {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​\n {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​\n])​\n\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['Who started AI research?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Pesquisa de texto integral","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Visão geral","href":"Overview​","type":2,"isActive":false},{"label":"Criar uma coleção para pesquisa de texto integral","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Inserir dados de texto","href":"Insert-text-data","type":2,"isActive":false},{"label":"Efetuar uma pesquisa de texto completo","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .build();\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"sparse\")\n .dataType(DataType.SparseFloatVector)\n .build());\n","import { MilvusClient, DataType } from \"@zilliz/milvus2-sdk-node\";\n\nconst address = \"http://localhost:19530\";\nconst token = \"root:Milvus\";\nconst client = new MilvusClient({address, token});\nconst schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n\nconsole.log(res.results)\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","import io.milvus.common.clientenum.FunctionType;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq.Function;\n\nimport java.util.*;\n\nschema.addFunction(Function.builder()\n .functionType(FunctionType.BM25)\n .name(\"text_bm25_emb\")\n .inputFieldNames(Collections.singletonList(\"text\"))\n .outputFieldNames(Collections.singletonList(\"vector\"))\n .build());\n","const functions = [\n {\n name: 'text_bm25_emb',\n description: 'bm25 function',\n type: FunctionType.BM25,\n input_field_names: ['text'],\n output_field_names: ['vector'],\n params: {},\n },\n];\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ],\n \"functions\": [\n {\n \"name\": \"text_bm25_emb\",\n \"type\": \"BM25\",\n \"inputFieldNames\": [\"text\"],\n \"outputFieldNames\": [\"sparse\"],\n \"params\": {}\n }\n ]\n }'\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","import io.milvus.v2.common.IndexParam;\n\nList indexes = new ArrayList<>();\nindexes.add(IndexParam.builder()\n .fieldName(\"sparse\")\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)\n .metricType(IndexParam.MetricType.BM25)\n .build());\n","const index_params = [\n {\n fieldName: \"sparse\",\n metricType: \"BM25\",\n indexType: \"AUTOINDEX\",\n },\n];\n","export indexParams='[\n {\n \"fieldName\": \"sparse\",\n \"metricType\": \"BM25\",\n \"indexType\": \"AUTOINDEX\"\n }\n ]'\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()\n .collectionName(\"demo\")\n .collectionSchema(schema)\n .indexParams(indexes)\n .build();\nclient.createCollection(requestCreate);\n","await client.create_collection(\n collection_name: 'demo', \n schema: schema, \n index_params: index_params\n);\n","export CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d \"{\n \\\"collectionName\\\": \\\"demo\\\",\n \\\"schema\\\": $schema,\n \\\"indexParams\\\": $indexParams\n}\"\n","client.insert('demo', [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n])\n\n","import com.google.gson.Gson;\nimport com.google.gson.JsonObject;\n\nimport io.milvus.v2.service.vector.request.InsertReq;\n\nGson gson = new Gson();\nList rows = Arrays.asList(\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval is a field of study.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval focuses on finding relevant information in large datasets.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"data mining and information retrieval overlap in research.\\\"}\", JsonObject.class)\n);\n\nclient.insert(InsertReq.builder()\n .collectionName(\"demo\")\n .data(rows)\n .build());\n","await client.insert({\ncollection_name: 'demo', \ndata: [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n]);\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"data\": [\n {\"text\": \"information retrieval is a field of study.\"},\n {\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"},\n {\"text\": \"data mining and information retrieval overlap in research.\"} \n ],\n \"collectionName\": \"demo\"\n}'\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['whats the focus of information retrieval?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n","import io.milvus.v2.service.vector.request.SearchReq;\nimport io.milvus.v2.service.vector.request.data.EmbeddedText;\nimport io.milvus.v2.service.vector.response.SearchResp;\n\nMap searchParams = new HashMap<>();\nsearchParams.put(\"drop_ratio_search\", 0.6);\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"demo\")\n .data(Collections.singletonList(new EmbeddedText(\"whats the focus of information retrieval?\")))\n .annsField(\"sparse\")\n .topK(3)\n .searchParams(searchParams)\n .outputFields(Collections.singletonList(\"text\"))\n .build());\n","await client.search(\n collection_name: 'demo', \n data: ['whats the focus of information retrieval?'],\n anns_field: 'sparse',\n limit: 3,\n params: {'drop_ratio_search': 0.6},\n)\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"demo\",\n \"data\": [\n \"whats the focus of information retrieval?\"\n ],\n \"annsField\": \"sparse\",\n \"limit\": 3,\n \"outputFields\": [\n \"text\"\n ],\n \"searchParams\":{\n \"params\":{\n \"drop_ratio_search\":0.6\n }\n }\n}'\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"Pesquisa de texto integral","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"Visão geral","href":"Overview​","type":2,"isActive":false},{"label":"Criar uma coleção para pesquisa de texto integral","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"Inserir dados de texto","href":"Insert-text-data","type":2,"isActive":false},{"label":"Efetuar uma pesquisa de texto completo","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/pt/userGuide/search-query-get/full-text-search.md b/localization/v2.5.x/site/pt/userGuide/search-query-get/full-text-search.md index 6c86aae32..9f77ddcd0 100644 --- a/localization/v2.5.x/site/pt/userGuide/search-query-get/full-text-search.md +++ b/localization/v2.5.x/site/pt/userGuide/search-query-get/full-text-search.md @@ -82,6 +82,8 @@ summary: >-
          4. Um campo SPARSE_FLOAT_VECTOR reservado para armazenar embeddings esparsos que o Milvus irá gerar automaticamente para o campo VARCHAR.

        Definir o esquema da coleção

        Em primeiro lugar, crie o esquema e adicione os campos necessários.

        +
        from pymilvus import MilvusClient, DataType, Function, FunctionType​
         ​
         schema = MilvusClient.create_schema()​
        @@ -90,14 +92,90 @@ schema.add_field(field_name="id", dat
         schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​
         schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)​
         
        +
        +
        import io.milvus.v2.common.DataType;
        +import io.milvus.v2.service.collection.request.AddFieldReq;
        +import io.milvus.v2.service.collection.request.CreateCollectionReq;
        +
        +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
        +        .build();
        +schema.addField(AddFieldReq.builder()
        +        .fieldName("id")
        +        .dataType(DataType.Int64)
        +        .isPrimaryKey(true)
        +        .autoID(true)
        +        .build());
        +schema.addField(AddFieldReq.builder()
        +        .fieldName("text")
        +        .dataType(DataType.VarChar)
        +        .maxLength(1000)
        +        .enableAnalyzer(true)
        +        .build());
        +schema.addField(AddFieldReq.builder()
        +        .fieldName("sparse")
        +        .dataType(DataType.SparseFloatVector)
        +        .build());
        +
        +
        import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
        +
        +const address = "http://localhost:19530";
        +const token = "root:Milvus";
        +const client = new MilvusClient({address, token});
        +const schema = [
        +  {
        +    name: "id",
        +    data_type: DataType.Int64,
        +    is_primary_key: true,
        +  },
        +  {
        +    name: "text",
        +    data_type: "VarChar",
        +    enable_analyzer: true,
        +    enable_match: true,
        +    max_length: 1000,
        +  },
        +  {
        +    name: "sparse",
        +    data_type: DataType.SparseFloatVector,
        +  },
        +];
        +
        +
        +console.log(res.results)
        +
        +
        export schema='{
        +        "autoId": true,
        +        "enabledDynamicField": false,
        +        "fields": [
        +            {
        +                "fieldName": "id",
        +                "dataType": "Int64",
        +                "isPrimary": true
        +            },
        +            {
        +                "fieldName": "text",
        +                "dataType": "VarChar",
        +                "elementTypeParams": {
        +                    "max_length": 1000,
        +                    "enable_analyzer": true
        +                }
        +            },
        +            {
        +                "fieldName": "sparse",
        +                "dataType": "SparseFloatVector"
        +            }
        +        ]
        +    }'
         

        Nesta configuração.

          -
        • id: serve de chave primária e é gerado automaticamente com auto_id=True.

        • +
        • id: serve como chave primária e é gerado automaticamente com auto_id=True.

        • text: armazena os seus dados de texto em bruto para operações de pesquisa de texto completo. O tipo de dados deve ser VARCHAR, uma vez que VARCHAR é o tipo de dados de cadeia de caracteres do Milvus para armazenamento de texto. Defina enable_analyzer=True para permitir que o Milvus tokenize o texto. Por defeito, o Milvus utiliza o analisador padrão para a análise de texto. Para configurar um analisador diferente, consulte Visão geral.

        • -
        • sparseCampo vetorial: um campo vetorial reservado para armazenar as incorporações esparsas geradas internamente para operações de pesquisa de texto completo. O tipo de dados deve ser SPARSE_FLOAT_VECTOR.

        • +
        • sparseCampo vetorial: um campo vetorial reservado para armazenar as incorporações esparsas geradas internamente para operações de pesquisa de texto completo. O tipo de dados tem de ser SPARSE_FLOAT_VECTOR.

        -

        Agora, defina uma função que converta o seu texto em representações vectoriais esparsas e, em seguida, adicione-a ao esquema.

        +

        Agora, defina uma função que converterá o seu texto em representações vectoriais esparsas e, em seguida, adicione-a ao esquema.

        +
        bm25_function = Function(​
             name="text_bm25_emb", # Function name​
             input_field_names=["text"], # Name of the VARCHAR field containing raw text data​
        @@ -107,6 +185,62 @@ schema.add_field(field_name="sparse",
         ​
         schema.add_function(bm25_function)​
         
        +
        +
        import io.milvus.common.clientenum.FunctionType;
        +import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
        +
        +import java.util.*;
        +
        +schema.addFunction(Function.builder()
        +        .functionType(FunctionType.BM25)
        +        .name("text_bm25_emb")
        +        .inputFieldNames(Collections.singletonList("text"))
        +        .outputFieldNames(Collections.singletonList("vector"))
        +        .build());
        +
        +
        const functions = [
        +    {
        +      name: 'text_bm25_emb',
        +      description: 'bm25 function',
        +      type: FunctionType.BM25,
        +      input_field_names: ['text'],
        +      output_field_names: ['vector'],
        +      params: {},
        +    },
        +];
        +
        +
        export schema='{
        +        "autoId": true,
        +        "enabledDynamicField": false,
        +        "fields": [
        +            {
        +                "fieldName": "id",
        +                "dataType": "Int64",
        +                "isPrimary": true
        +            },
        +            {
        +                "fieldName": "text",
        +                "dataType": "VarChar",
        +                "elementTypeParams": {
        +                    "max_length": 1000,
        +                    "enable_analyzer": true
        +                }
        +            },
        +            {
        +                "fieldName": "sparse",
        +                "dataType": "SparseFloatVector"
        +            }
        +        ],
        +        "functions": [
        +            {
        +                "name": "text_bm25_emb",
        +                "type": "BM25",
        +                "inputFieldNames": ["text"],
        +                "outputFieldNames": ["sparse"],
        +                "params": {}
        +            }
        +        ]
        +    }'
         

        Parâmetro

        Descrição

        @@ -124,6 +258,8 @@ schema.add_function(bm25_function)​

        Para colecções com vários campos VARCHAR que requerem conversão de texto em vetor esparso, adicione funções separadas ao esquema de coleção, assegurando que cada função tem um nome único e um valor output_field_names.

        Configurar o índice

        Depois de definir o esquema com os campos necessários e a função incorporada, configure o índice para a sua coleção. Para simplificar este processo, utilize AUTOINDEX como index_type, uma opção que permite ao Milvus escolher e configurar o tipo de índice mais adequado com base na estrutura dos seus dados.

        +
        index_params = MilvusClient.prepare_index_params()​
         ​
         index_params.add_index(​
        @@ -132,23 +268,78 @@ index_params.add_index(​
             metric_type="BM25"​
         )​
         
        +
        +
        import io.milvus.v2.common.IndexParam;
        +
        +List<IndexParam> indexes = new ArrayList<>();
        +indexes.add(IndexParam.builder()
        +        .fieldName("sparse")
        +        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
        +        .metricType(IndexParam.MetricType.BM25)
        +        .build());
        +
        +
        const index_params = [
        +  {
        +    fieldName: "sparse",
        +    metricType: "BM25",
        +    indexType: "AUTOINDEX",
        +  },
        +];
        +
        +
        export indexParams='[
        +        {
        +            "fieldName": "sparse",
        +            "metricType": "BM25",
        +            "indexType": "AUTOINDEX"
        +        }
        +    ]'
         

        Parâmetro

        Descrição

        field_name

        -

        O nome do campo vetorial a indexar. Para a pesquisa de texto integral, este deve ser o campo que armazena os vectores esparsos gerados. Neste exemplo, defina o valor para sparse.

        +

        O nome do campo vetorial a indexar. Para pesquisa de texto completo, este deve ser o campo que armazena os vectores esparsos gerados. Neste exemplo, defina o valor para sparse.

        index_type

        O tipo de índice a criar. AUTOINDEX permite ao Milvus otimizar automaticamente as definições do índice. Se precisar de mais controlo sobre as definições do índice, pode escolher entre vários tipos de índices disponíveis para vectores esparsos no Milvus. Para mais informações, consulte Índices suportados no Milvus.

        metric_type

        O valor deste parâmetro deve ser definido como BM25 especificamente para a funcionalidade de pesquisa de texto completo.

        Criar a coleção

        Crie agora a coleção utilizando o esquema e os parâmetros de índice definidos.

        +
        MilvusClient.create_collection(​
             collection_name='demo', ​
             schema=schema, ​
             index_params=index_params​
         )​
         
        +
        +
        import io.milvus.v2.service.collection.request.CreateCollectionReq;
        +
        +CreateCollectionReq requestCreate = CreateCollectionReq.builder()
        +        .collectionName("demo")
        +        .collectionSchema(schema)
        +        .indexParams(indexes)
        +        .build();
        +client.createCollection(requestCreate);
        +
        +
        await client.create_collection(
        +    collection_name: 'demo', 
        +    schema: schema, 
        +    index_params: index_params
        +);
        +
        +
        export CLUSTER_ENDPOINT="http://localhost:19530"
        +export TOKEN="root:Milvus"
        +
        +curl --request POST \
        +--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
        +--header "Authorization: Bearer ${TOKEN}" \
        +--header "Content-Type: application/json" \
        +-d "{
        +    \"collectionName\": \"demo\",
        +    \"schema\": $schema,
        +    \"indexParams\": $indexParams
        +}"
         

        Inserir dados de texto

        Depois de configurar a coleção e o índice, está pronto para inserir dados de texto. Neste processo, só precisa de fornecer o texto em bruto. A função incorporada que definimos anteriormente gera automaticamente o vetor esparso correspondente para cada entrada de texto.

        -
        MilvusClient.insert('demo', [​
        -    {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​
        -    {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​
        -    {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​
        -])​
        +
        +
        client.insert('demo', [
        +    {'text': 'information retrieval is a field of study.'},
        +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
        +    {'text': 'data mining and information retrieval overlap in research.'},
        +])
         
        +
        +
        import com.google.gson.Gson;
        +import com.google.gson.JsonObject;
        +
        +import io.milvus.v2.service.vector.request.InsertReq;
        +
        +Gson gson = new Gson();
        +List<JsonObject> rows = Arrays.asList(
        +        gson.fromJson("{\"text\": \"information retrieval is a field of study.\"}", JsonObject.class),
        +        gson.fromJson("{\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"}", JsonObject.class),
        +        gson.fromJson("{\"text\": \"data mining and information retrieval overlap in research.\"}", JsonObject.class)
        +);
        +
        +client.insert(InsertReq.builder()
        +        .collectionName("demo")
        +        .data(rows)
        +        .build());
        +
        +
        await client.insert({
        +collection_name: 'demo', 
        +data: [
        +    {'text': 'information retrieval is a field of study.'},
        +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
        +    {'text': 'data mining and information retrieval overlap in research.'},
        +]);
        +
        +
        curl --request POST \
        +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
        +--header "Authorization: Bearer ${TOKEN}" \
        +--header "Content-Type: application/json" \
        +-d '{
        +    "data": [
        +        {"text": "information retrieval is a field of study."},
        +        {"text": "information retrieval focuses on finding relevant information in large datasets."},
        +        {"text": "data mining and information retrieval overlap in research."}       
        +    ],
        +    "collectionName": "demo"
        +}'
         

        Uma vez inseridos os dados na sua coleção, pode efetuar pesquisas de texto completo utilizando consultas de texto em bruto. Milvus converte automaticamente a sua consulta num vetor esparso e classifica os resultados de pesquisa correspondentes utilizando o algoritmo BM25, e depois devolve os resultados topK (limit).

        +

        Depois de inserir os dados na sua coleção, pode efetuar pesquisas de texto completo utilizando consultas de texto em bruto. Milvus converte automaticamente a sua consulta num vetor esparso e classifica os resultados de pesquisa correspondentes utilizando o algoritmo BM25, devolvendo depois os resultados topK (limit).

        +
        search_params = {​
             'params': {'drop_ratio_search': 0.6},​
         }​
         ​
         MilvusClient.search(​
             collection_name='demo', ​
        -    data=['Who started AI research?'],​
        +    data=['whats the focus of information retrieval?'],​
             anns_field='sparse',​
             limit=3,​
             search_params=search_params​
         )​
         
        +
        +
        import io.milvus.v2.service.vector.request.SearchReq;
        +import io.milvus.v2.service.vector.request.data.EmbeddedText;
        +import io.milvus.v2.service.vector.response.SearchResp;
        +
        +Map<String,Object> searchParams = new HashMap<>();
        +searchParams.put("drop_ratio_search", 0.6);
        +SearchResp searchResp = client.search(SearchReq.builder()
        +        .collectionName("demo")
        +        .data(Collections.singletonList(new EmbeddedText("whats the focus of information retrieval?")))
        +        .annsField("sparse")
        +        .topK(3)
        +        .searchParams(searchParams)
        +        .outputFields(Collections.singletonList("text"))
        +        .build());
        +
        +
        await client.search(
        +    collection_name: 'demo', 
        +    data: ['whats the focus of information retrieval?'],
        +    anns_field: 'sparse',
        +    limit: 3,
        +    params: {'drop_ratio_search': 0.6},
        +)
        +
        +
        curl --request POST \
        +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
        +--header "Authorization: Bearer ${TOKEN}" \
        +--header "Content-Type: application/json" \
        +--data-raw '{
        +    "collectionName": "demo",
        +    "data": [
        +        "whats the focus of information retrieval?"
        +    ],
        +    "annsField": "sparse",
        +    "limit": 3,
        +    "outputFields": [
        +        "text"
        +    ],
        +    "searchParams":{
        +        "params":{
        +            "drop_ratio_search":0.6
        +        }
        +    }
        +}'
         

        Parâmetro

        Descrição

        diff --git a/localization/v2.5.x/site/pt/userGuide/search-query-get/keyword-match.json b/localization/v2.5.x/site/pt/userGuide/search-query-get/keyword-match.json index 94a3bdc03..33c92ad5d 100644 --- a/localization/v2.5.x/site/pt/userGuide/search-query-get/keyword-match.json +++ b/localization/v2.5.x/site/pt/userGuide/search-query-get/keyword-match.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n"],"headingContent":"Keyword Match​","anchorList":[{"label":"Correspondência de palavras-chave","href":"Keyword-Match​","type":1,"isActive":false},{"label":"Visão geral","href":"Overview","type":2,"isActive":false},{"label":"Ativar a correspondência de palavras-chave","href":"Enable-keyword-match","type":2,"isActive":false},{"label":"Utilizar a correspondência de palavras-chave","href":"Use-keyword-match","type":2,"isActive":false},{"label":"Considerações","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .enableDynamicField(false)\n .build();\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true,\n \"enable_match\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","Map analyzerParams = new HashMap<>();\nanalyzerParams.put(\"type\", \"english\");\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(200)\n .enableAnalyzer(true)\n .analyzerParams(analyzerParams)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n analyzer_params: { type: 'english' },\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 200,\n \"enable_analyzer\": true,\n \"enable_match\": true,\n \"analyzer_params\": {\"type\": \"english\"}\n }\n },\n {\n \"fieldName\": \"my_vector\",\n \"dataType\": \"FloatVector\",\n \"elementTypeParams\": {\n \"dim\": \"5\"\n }\n }\n ]\n }'\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine deep')\";\n","export filter=\"\\\"TEXT_MATCH(text, 'machine deep')\\\"\"\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"\n","export filter=\"\\\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\\\"\"\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .annsField(\"embeddings\")\n .data(Collections.singletonList(queryVector)))\n .filter(filter)\n .topK(10)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build());\n","// Match entities with `keyword1` or `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\n// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field\nconst result = await client.search(\n collection_name: \"YOUR_COLLECTION_NAME\", // Your collection name\n anns_field: \"embeddings\", // Vector field name\n data: [query_vector], // Query vector\n filter: filter,\n params: {\"nprobe\": 10},\n limit: 10, // Max. number of results to return\n output_fields: [\"id\", \"text\"] //Fields to return\n);\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1 keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"annsField\": \"my_vector\",\n \"data\": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],\n \"filter\": '\"$filter\"',\n \"searchParams\": {\n \"params\": {\n \"nprobe\": 10\n }\n },\n \"limit\": 3,\n \"outputFields\": [\"text\",\"id\"]\n}'\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nQueryResp queryResp = client.query(QueryReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .filter(filter)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build()\n);\n","// Match entities with both `keyword1` and `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nconst result = await client.query(\n collection_name: \"YOUR_COLLECTION_NAME\",\n filter: filter, \n output_fields: [\"id\", \"text\"]\n)\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/query\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"filter\": '\"$filter\"',\n \"outputFields\": [\"id\", \"text\"]\n}'\n"],"headingContent":"Text Match​","anchorList":[{"label":"Correspondência de texto","href":"Text-Match​","type":1,"isActive":false},{"label":"Visão geral","href":"Overview","type":2,"isActive":false},{"label":"Ativar a correspondência de texto","href":"Enable-text-match","type":2,"isActive":false},{"label":"Utilizar a correspondência de texto","href":"Use-text-match","type":2,"isActive":false},{"label":"Considerações","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/pt/userGuide/search-query-get/keyword-match.md b/localization/v2.5.x/site/pt/userGuide/search-query-get/keyword-match.md index c3d08a19e..8a2f860fc 100644 --- a/localization/v2.5.x/site/pt/userGuide/search-query-get/keyword-match.md +++ b/localization/v2.5.x/site/pt/userGuide/search-query-get/keyword-match.md @@ -1,15 +1,15 @@ --- id: keyword-match.md summary: >- - A correspondência de palavras-chave no Milvus permite a recuperação precisa de + A correspondência de texto no Milvus permite a recuperação precisa de documentos com base em termos específicos. Esta funcionalidade é utilizada principalmente para pesquisa filtrada para satisfazer condições específicas e pode incorporar filtragem escalar para refinar os resultados da consulta, permitindo pesquisas por semelhança dentro de vectores que satisfaçam critérios escalares. -title: Correspondência de palavras-chave +title: Correspondência de texto --- -

        Correspondência de palavras-chave

        A correspondência de palavras-chave no Milvus permite a recuperação precisa de documentos com base em termos específicos. Esta funcionalidade é utilizada principalmente para pesquisa filtrada para satisfazer condições específicas e pode incorporar filtragem escalar para refinar os resultados da consulta, permitindo pesquisas por semelhança dentro de vectores que satisfaçam critérios escalares.

        +

        A correspondência de texto em Milvus permite a recuperação precisa de documentos com base em termos específicos. Esta funcionalidade é utilizada principalmente para pesquisa filtrada para satisfazer condições específicas e pode incorporar filtragem escalar para refinar os resultados da consulta, permitindo pesquisas de semelhança dentro de vectores que satisfaçam critérios escalares.

        -

        A correspondência de palavras-chave centra-se na procura de ocorrências exactas dos termos de consulta, sem pontuar a relevância dos documentos correspondentes. Se pretender obter os documentos mais relevantes com base no significado semântico e na importância dos termos de consulta, recomendamos que utilize a Pesquisa de texto integral.

        +

        A correspondência de texto concentra-se em encontrar ocorrências exactas dos termos de consulta, sem pontuar a relevância dos documentos correspondentes. Se pretender obter os documentos mais relevantes com base no significado semântico e na importância dos termos de consulta, recomendamos que utilize a Pesquisa de texto integral.

        Visão geral

        O Milvus integra o Tantivy para alimentar o seu índice invertido subjacente e a pesquisa por palavras-chave. Para cada entrada de texto, Milvus indexa-o seguindo o procedimento.

        +

        O Milvus integra o Tantivy para alimentar o seu índice invertido subjacente e a pesquisa de texto baseada em termos. Para cada entrada de texto, Milvus indexa-o seguindo o procedimento.

        1. Analisador: O analisador processa o texto de entrada, transformando-o em palavras individuais, ou tokens, e aplicando filtros conforme necessário. Isto permite ao Milvus construir um índice baseado nestes tokens.

        2. Indexação: Após a análise do texto, o Milvus cria um índice invertido que mapeia cada token único para os documentos que o contêm.

        -

        Quando um utilizador efectua uma correspondência de palavras-chave, o índice invertido é utilizado para recuperar rapidamente todos os documentos que contêm as palavras-chave. Isto é muito mais rápido do que analisar cada documento individualmente.

        +

        Quando um utilizador efectua uma correspondência de texto, o índice invertido é utilizado para recuperar rapidamente todos os documentos que contêm os termos. Isto é muito mais rápido do que analisar cada documento individualmente.

        - Keyword Match - Correspondência de palavras-chave

        -

        Ativar a correspondência de palavras-chave

        A correspondência de palavras-chave funciona no tipo de campo VARCHAR, que é essencialmente o tipo de dados de cadeia em Milvus. Para ativar a correspondência de palavras-chave, defina enable_analyzer e enable_match para True e, em seguida, configure opcionalmente um analisador para análise de texto ao definir o esquema da coleção.

        -

        Definir enable_analyzer e enable_match

        Para ativar a correspondência de palavras-chave para um campo VARCHAR específico, defina os parâmetros enable_analyzer e enable_match para True ao definir o esquema do campo. Isto instrui o Milvus a tokenizar o texto e a criar um índice invertido para o campo especificado, permitindo correspondências de palavras-chave rápidas e eficientes.

        +

        A correspondência de texto funciona no tipo de campo VARCHAR, que é essencialmente o tipo de dados de cadeia em Milvus. Para ativar a correspondência de texto, defina enable_analyzer e enable_match para True e, em seguida, configure opcionalmente um analisador para análise de texto ao definir o esquema da coleção.

        +

        Definir enable_analyzer e enable_match

        Para ativar a correspondência de texto para um campo VARCHAR específico, defina os parâmetros enable_analyzer e enable_match para True ao definir o esquema do campo. Isto instrui o Milvus a tokenizar o texto e a criar um índice invertido para o campo especificado, permitindo correspondências de texto rápidas e eficientes.

        +
        from pymilvus import MilvusClient, DataType​
         ​
         schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​
        @@ -83,9 +85,74 @@ schema.add_field(​
         )​
         
         
        -

        Opcional: Configurar um analisador

        O desempenho e a precisão da correspondência de palavras-chave dependem do analisador selecionado. Diferentes analisadores são adaptados a várias línguas e estruturas de texto, por isso escolher o correto pode ter um impacto significativo nos resultados de pesquisa para o seu caso de utilização específico.

        -

        Por predefinição, o Milvus utiliza o analisador standard, que tokeniza o texto com base em espaços em branco e pontuação, remove tokens com mais de 40 caracteres e converte o texto para minúsculas. Não são necessários parâmetros adicionais para aplicar essa configuração padrão. Para obter mais informações, consulte Padrão.

        +
        import io.milvus.v2.common.DataType;
        +import io.milvus.v2.service.collection.request.AddFieldReq;
        +import io.milvus.v2.service.collection.request.CreateCollectionReq;
        +
        +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
        +        .enableDynamicField(false)
        +        .build();
        +
        +schema.addField(AddFieldReq.builder()
        +        .fieldName("text")
        +        .dataType(DataType.VarChar)
        +        .maxLength(1000)
        +        .enableAnalyzer(true)
        +        .enableMatch(true)
        +        .build());
        +
        +
        +
        const schema = [
        +  {
        +    name: "id",
        +    data_type: DataType.Int64,
        +    is_primary_key: true,
        +  },
        +  {
        +    name: "text",
        +    data_type: "VarChar",
        +    enable_analyzer: true,
        +    enable_match: true,
        +    max_length: 1000,
        +  },
        +  {
        +    name: "sparse",
        +    data_type: DataType.SparseFloatVector,
        +  },
        +];
        +
        +
        +
        export schema='{
        +        "autoId": true,
        +        "enabledDynamicField": false,
        +        "fields": [
        +            {
        +                "fieldName": "id",
        +                "dataType": "Int64",
        +                "isPrimary": true
        +            },
        +            {
        +                "fieldName": "text",
        +                "dataType": "VarChar",
        +                "elementTypeParams": {
        +                    "max_length": 1000,
        +                    "enable_analyzer": true,
        +                    "enable_match": true
        +                }
        +            },
        +            {
        +                "fieldName": "sparse",
        +                "dataType": "SparseFloatVector"
        +            }
        +        ]
        +    }'
        +
        +
        +

        Opcional: Configurar um analisador

        O desempenho e a precisão da correspondência de texto dependem do analisador selecionado. Diferentes analisadores são adaptados a várias línguas e estruturas de texto, pelo que a escolha do analisador correto pode ter um impacto significativo nos resultados da pesquisa para o seu caso de utilização específico.

        +

        Por predefinição, o Milvus utiliza o analisador standard, que define os tokens do texto com base nos espaços em branco e na pontuação, remove os tokens com mais de 40 caracteres e converte o texto em minúsculas. Não são necessários parâmetros adicionais para aplicar essa configuração padrão. Para obter mais informações, consulte Padrão.

        Nos casos em que um analisador diferente é necessário, você pode configurá-lo usando o parâmetro analyzer_params. Por exemplo, para aplicar o analisador english para processar texto em inglês.

        +
        analyzer_params={​
             "type": "english"​
         }​
        @@ -99,9 +166,72 @@ schema.add_field(​
             enable_match=True, ​
         )​
         
        +
        +
        Map<String, Object> analyzerParams = new HashMap<>();
        +analyzerParams.put("type", "english");
        +schema.addField(AddFieldReq.builder()
        +        .fieldName("text")
        +        .dataType(DataType.VarChar)
        +        .maxLength(200)
        +        .enableAnalyzer(true)
        +        .analyzerParams(analyzerParams)
        +        .enableMatch(true)
        +        .build());
        +
        +
        +
        const schema = [
        +  {
        +    name: "id",
        +    data_type: DataType.Int64,
        +    is_primary_key: true,
        +  },
        +  {
        +    name: "text",
        +    data_type: "VarChar",
        +    enable_analyzer: true,
        +    enable_match: true,
        +    max_length: 1000,
        +    analyzer_params: { type: 'english' },
        +  },
        +  {
        +    name: "sparse",
        +    data_type: DataType.SparseFloatVector,
        +  },
        +];
        +
        +
        +
        export schema='{
        +        "autoId": true,
        +        "enabledDynamicField": false,
        +        "fields": [
        +            {
        +                "fieldName": "id",
        +                "dataType": "Int64",
        +                "isPrimary": true
        +            },
        +            {
        +                "fieldName": "text",
        +                "dataType": "VarChar",
        +                "elementTypeParams": {
        +                    "max_length": 200,
        +                    "enable_analyzer": true,
        +                    "enable_match": true,
        +                    "analyzer_params": {"type": "english"}
        +                }
        +            },
        +            {
        +                "fieldName": "my_vector",
        +                "dataType": "FloatVector",
        +                "elementTypeParams": {
        +                    "dim": "5"
        +                }
        +            }
        +        ]
        +    }'
        +
         

        O Milvus também fornece vários outros analisadores adequados a diferentes idiomas e cenários. Para obter mais detalhes, consulte Visão geral.

        -

        Utilizar a correspondência de palavras-chave

        Depois de ativar a correspondência de palavras-chave para um campo VARCHAR no seu esquema de coleção, pode efetuar correspondências de palavras-chave utilizando a expressão TEXT_MATCH.

        -

        Sintaxe da expressão TEXT_MATCH

        A expressão TEXT_MATCH é utilizada para especificar o campo e as palavras-chave a pesquisar. A sua sintaxe é a seguinte.

        -
        TEXT_MATCH(field_name, text)​
        +    

        Depois de ativar a correspondência de texto para um campo VARCHAR no seu esquema de coleção, pode efetuar correspondências de texto utilizando a expressão TEXT_MATCH.

        +

        Sintaxe da expressão TEXT_MATCH

        A expressão TEXT_MATCH é utilizada para especificar o campo e os termos a pesquisar. A sua sintaxe é a seguinte.

        +
        TEXT_MATCH(field_name, text)​
         
         
        • field_name: O nome do campo VARCHAR a ser pesquisado.

        • -
        • text: As palavras-chave a pesquisar. Várias palavras-chave podem ser separadas por espaços ou outros delimitadores apropriados com base no idioma e no analisador configurado.

        • +
        • text: Os termos a serem pesquisados. Vários termos podem ser separados por espaços ou outros delimitadores apropriados com base no idioma e no analisador configurado.

        -

        Por padrão, TEXT_MATCH usa a lógica de correspondência OR, o que significa que ele retornará documentos que contêm qualquer uma das palavras-chave especificadas. Por exemplo, para pesquisar documentos que contenham as palavras-chave machine ou deep no campo text, use a seguinte expressão.

        +

        Por padrão, TEXT_MATCH usa a lógica de correspondência OR, o que significa que ele retornará documentos que contêm qualquer um dos termos especificados. Por exemplo, para pesquisar documentos que contenham o termo machine ou deep no campo text, use a seguinte expressão.

        +
        filter = "TEXT_MATCH(text, 'machine deep')"​
        -
         
        -

        Também é possível combinar várias expressões TEXT_MATCH usando operadores lógicos para realizar a correspondência AND. Por exemplo, para pesquisar documentos que contenham machine e deep no campo text, use a seguinte expressão.

        +
        String filter = "TEXT_MATCH(text, 'machine deep')";
        +
        +
        const filter = "TEXT_MATCH(text, 'machine deep')";
        +
        +
        export filter="\"TEXT_MATCH(text, 'machine deep')\""
        +
        +

        Também pode combinar várias expressões TEXT_MATCH utilizando operadores lógicos para efetuar a correspondência AND. Por exemplo, para procurar documentos que contenham machine e deep no campo text, utilize a seguinte expressão.

        +
        filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"​
        -
         
        -

        Pesquisar com correspondência de palavras-chave

        A correspondência de palavras-chave pode ser utilizada em combinação com a pesquisa de semelhança de vectores para limitar o âmbito da pesquisa e melhorar o desempenho da mesma. Ao filtrar a coleção utilizando a correspondência de palavras-chave antes da pesquisa de semelhança de vectores, pode reduzir o número de documentos que precisam de ser pesquisados, resultando em tempos de consulta mais rápidos.

        -

        Neste exemplo, a expressão filter filtra os resultados da pesquisa para incluir apenas documentos que correspondam às palavras-chave especificadas keyword1 ou keyword2. A pesquisa de similaridade de vetor é então executada neste subconjunto filtrado de documentos.

        +
        String filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')";
        +
        +
        const filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"
        +
        +
        export filter="\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\""
        +
        +

        Pesquisa com correspondência de texto

        A correspondência de texto pode ser usada em combinação com a pesquisa de similaridade de vetor para restringir o escopo da pesquisa e melhorar o desempenho da pesquisa. Ao filtrar a coleção utilizando a correspondência de texto antes da pesquisa de semelhança vetorial, pode reduzir o número de documentos que precisam de ser pesquisados, resultando em tempos de consulta mais rápidos.

        +

        Neste exemplo, a expressão filter filtra os resultados da pesquisa para incluir apenas documentos que correspondam ao termo especificado keyword1 ou keyword2. A pesquisa de similaridade de vetor é então executada neste subconjunto filtrado de documentos.

        +
        # Match entities with `keyword1` or `keyword2`​
         filter = "TEXT_MATCH(text, 'keyword1 keyword2')"​
         ​
        @@ -150,8 +296,58 @@ result = MilvusClient.search(​
         )​
         
         
        -

        Consulta com correspondência de palavras-chave

        A correspondência de palavras-chave também pode ser utilizada para filtragem escalar em operações de consulta. Especificando uma expressão TEXT_MATCH no parâmetro expr do método query(), pode obter documentos que correspondem às palavras-chave fornecidas.

        -

        O exemplo abaixo recupera documentos em que o campo text contém as palavras-chave keyword1 e keyword2.

        +
        String filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
        +
        +SearchResp searchResp = client.search(SearchReq.builder()
        +        .collectionName("YOUR_COLLECTION_NAME")
        +        .annsField("embeddings")
        +        .data(Collections.singletonList(queryVector)))
        +        .filter(filter)
        +        .topK(10)
        +        .outputFields(Arrays.asList("id", "text"))
        +        .build());
        +
        +
        // Match entities with `keyword1` or `keyword2`
        +const filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
        +
        +// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field
        +const result = await client.search(
        +    collection_name: "YOUR_COLLECTION_NAME", // Your collection name
        +    anns_field: "embeddings", // Vector field name
        +    data: [query_vector], // Query vector
        +    filter: filter,
        +    params: {"nprobe": 10},
        +    limit: 10, // Max. number of results to return
        +    output_fields: ["id", "text"] //Fields to return
        +);
        +
        +
        export filter="\"TEXT_MATCH(text, 'keyword1 keyword2')\""
        +
        +export CLUSTER_ENDPOINT="http://localhost:19530"
        +export TOKEN="root:Milvus"
        +
        +curl --request POST \
        +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
        +--header "Authorization: Bearer ${TOKEN}" \
        +--header "Content-Type: application/json" \
        +-d '{
        +    "collectionName": "demo2",
        +    "annsField": "my_vector",
        +    "data": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],
        +    "filter": '"$filter"',
        +    "searchParams": {
        +        "params": {
        +            "nprobe": 10
        +        }
        +    },
        +    "limit": 3,
        +    "outputFields": ["text","id"]
        +}'
        +
        +

        Consulta com correspondência de texto

        A correspondência de texto também pode ser usada para filtragem escalar em operações de consulta. Ao especificar uma expressão TEXT_MATCH no parâmetro expr do método query(), pode obter documentos que correspondem aos termos fornecidos.

        +

        O exemplo abaixo recupera documentos em que o campo text contém os termos keyword1 e keyword2.

        +
        # Match entities with both `keyword1` and `keyword2`​
         filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')"​
         ​
        @@ -161,6 +357,39 @@ result = MilvusClient.query(​
             output_fields=["id", "text"]​
         )​
         
        +
        +
        String filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
        +
        +QueryResp queryResp = client.query(QueryReq.builder()
        +        .collectionName("YOUR_COLLECTION_NAME")
        +        .filter(filter)
        +        .outputFields(Arrays.asList("id", "text"))
        +        .build()
        +);
        +
        +
        // Match entities with both `keyword1` and `keyword2`
        +const filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
        +
        +const result = await client.query(
        +    collection_name: "YOUR_COLLECTION_NAME",
        +    filter: filter, 
        +    output_fields: ["id", "text"]
        +)
        +
        +
        export filter="\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\""
        +
        +export CLUSTER_ENDPOINT="http://localhost:19530"
        +export TOKEN="root:Milvus"
        +
        +curl --request POST \
        +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
        +--header "Authorization: Bearer ${TOKEN}" \
        +--header "Content-Type: application/json" \
        +-d '{
        +    "collectionName": "demo2",
        +    "filter": '"$filter"',
        +    "outputFields": ["id", "text"]
        +}'
         

        Considerações

          -
        • A ativação da correspondência de palavras-chave para um campo desencadeia a criação de um índice invertido, que consome recursos de armazenamento. Considere o impacto no armazenamento ao decidir ativar esta funcionalidade, uma vez que varia com base no tamanho do texto, nos tokens únicos e no analisador utilizado.

        • +
        • A ativação da correspondência de texto para um campo desencadeia a criação de um índice invertido, que consome recursos de armazenamento. Considere o impacto no armazenamento ao decidir ativar esse recurso, pois ele varia de acordo com o tamanho do texto, tokens exclusivos e o analisador usado.

        • Depois de definir um analisador no seu esquema, as suas definições tornam-se permanentes para essa coleção. Se decidir que um analisador diferente se adequa melhor às suas necessidades, pode considerar eliminar a coleção existente e criar uma nova com a configuração de analisador pretendida.

        diff --git a/localization/v2.5.x/site/pt/userGuide/search-query-get/multi-vector-search.md b/localization/v2.5.x/site/pt/userGuide/search-query-get/multi-vector-search.md index b1257d7ee..82d4a1a20 100644 --- a/localization/v2.5.x/site/pt/userGuide/search-query-get/multi-vector-search.md +++ b/localization/v2.5.x/site/pt/userGuide/search-query-get/multi-vector-search.md @@ -22,7 +22,7 @@ title: Pesquisa híbrida >

        A pesquisa híbrida refere-se a um método de pesquisa que efectua várias pesquisas ANN em simultâneo, reordena vários conjuntos de resultados dessas pesquisas ANN e, por fim, devolve um único conjunto de resultados. A utilização da Pesquisa Híbrida pode aumentar a precisão da pesquisa. O Zilliz suporta a realização de Pesquisa Híbrida numa coleção com vários campos vectoriais.

        -

        A Pesquisa híbrida é mais comumente usada em cenários que incluem pesquisas vetoriais esparso-densas e pesquisas multimodais. Este guia demonstrará como realizar uma Pesquisa Híbrida no Zilliz com um exemplo específico.

        +

        A Pesquisa híbrida é mais comumente usada em cenários que incluem pesquisas vetoriais esparso-densas e pesquisas multimodais. Este guia demonstrará como efetuar uma Pesquisa Híbrida no Zilliz com um exemplo específico.

        Cenários

        A Pesquisa Híbrida é adequada para os dois cenários seguintes.

        Pesquisa de vectores esparso-densos

        Diferentes tipos de vectores podem representar diferentes informações, e a utilização de vários modelos de incorporação pode representar de forma mais abrangente diferentes caraterísticas e aspectos dos dados. Por exemplo, a utilização de diferentes modelos de incorporação para a mesma frase pode gerar um vetor denso para representar o significado semântico e um vetor esparso para representar a frequência de palavras na frase.

          -
        • Vectores esparsos: Os vectores esparsos são caracterizados pela sua elevada dimensionalidade vetorial e pela presença de poucos valores não nulos. Esta estrutura torna-os particularmente adequados para aplicações tradicionais de recuperação de informação. Na maioria dos casos, o número de dimensões utilizadas nos vectores esparsos corresponde a diferentes tokens numa ou mais línguas. A cada dimensão é atribuído um valor que indica a importância relativa desse token no documento. Esta disposição revela-se vantajosa para tarefas que envolvem a correspondência de palavras-chave.

        • -
        • Vectores densos: Os vectores densos são incorporados a partir de redes neuronais. Quando dispostos numa matriz ordenada, estes vectores captam a essência semântica do texto de entrada. Note-se que os vectores densos não se limitam ao processamento de texto; são também amplamente utilizados na visão computacional para representar a semântica dos dados visuais. Estes vectores densos, normalmente gerados por modelos de incorporação de texto, caracterizam-se pelo facto de a maioria ou todos os elementos serem diferentes de zero. Assim, os vectores densos são particularmente eficazes para aplicações de pesquisa semântica, uma vez que podem devolver os resultados mais semelhantes com base na distância vetorial, mesmo na ausência de correspondências exactas de palavras-chave. Esta capacidade permite resultados de pesquisa mais matizados e conscientes do contexto, capturando frequentemente relações entre conceitos que podem ser perdidas por abordagens baseadas em palavras-chave.

        • +
        • Vectores esparsos: Os vectores esparsos são caracterizados pela sua elevada dimensionalidade vetorial e pela presença de poucos valores não nulos. Esta estrutura torna-os particularmente adequados para aplicações tradicionais de recuperação de informação. Na maioria dos casos, o número de dimensões utilizadas nos vectores esparsos corresponde a diferentes tokens numa ou mais línguas. A cada dimensão é atribuído um valor que indica a importância relativa desse token no documento. Esta disposição revela-se vantajosa para tarefas que envolvem correspondência de texto.

        • +
        • Vectores densos: Os vectores densos são incorporados a partir de redes neuronais. Quando dispostos numa matriz ordenada, estes vectores captam a essência semântica do texto de entrada. Note-se que os vectores densos não se limitam ao processamento de texto; são também amplamente utilizados na visão computacional para representar a semântica dos dados visuais. Estes vectores densos, normalmente gerados por modelos de incorporação de texto, caracterizam-se pelo facto de a maioria ou todos os elementos serem diferentes de zero. Assim, os vectores densos são particularmente eficazes para aplicações de pesquisa semântica, uma vez que podem devolver os resultados mais semelhantes com base na distância vetorial, mesmo na ausência de correspondências exactas de texto. Esta capacidade permite resultados de pesquisa mais matizados e conscientes do contexto, capturando frequentemente relações entre conceitos que podem ser perdidas por abordagens baseadas em palavras-chave.

        Para obter mais detalhes, consulte Vetor esparso e Vetor denso.

        Pesquisa multimodal

        A pesquisa multimodal refere-se à pesquisa por semelhança de dados não estruturados em várias modalidades (como imagens, vídeos, áudio, texto, etc.). Por exemplo, uma pessoa pode ser representada utilizando várias modalidades de dados, como impressões digitais, impressões de voz e caraterísticas faciais. A pesquisa híbrida suporta várias pesquisas em simultâneo. Por exemplo, procurar uma pessoa com impressões digitais e impressões de voz semelhantes.

        @@ -64,8 +64,8 @@ title: Pesquisa híbrida

        O fluxo de trabalho principal para efetuar uma Pesquisa Híbrida é o seguinte.

        1. Gerar vectores densos através de modelos de incorporação como BERT e Transformers.

        2. -
        3. Gerar vectores esparsos através de modelos de incorporação como o BM25, BGE-M3, SPLADE, etc.

        4. -
        5. Criar uma coleção no Zilliz e definir o esquema de coleção que inclui campos vectoriais densos e esparsos.

        6. +
        7. Gerar vectores esparsos através de modelos de incorporação como BM25, BGE-M3, SPLADE, etc.

        8. +
        9. Criar uma coleção no Zilliz e definir o esquema da coleção que inclui campos vectoriais densos e esparsos.

        10. Insira os vectores esparsos-densos na coleção criada no passo anterior.

        11. Efectue uma pesquisa híbrida: A pesquisa ANN em vectores densos devolverá um conjunto de resultados top-K mais semelhantes e a correspondência de texto em vectores esparsos também devolverá um conjunto de resultados top-K.

        12. Normalização: Normalize as pontuações dos dois conjuntos de resultados top-K, convertendo as pontuações para um intervalo entre [0,1].

        13. diff --git a/localization/v2.5.x/site/pt/userGuide/search-query-get/single-vector-search.md b/localization/v2.5.x/site/pt/userGuide/search-query-get/single-vector-search.md index 769fa981d..6c822be06 100644 --- a/localization/v2.5.x/site/pt/userGuide/search-query-get/single-vector-search.md +++ b/localization/v2.5.x/site/pt/userGuide/search-query-get/single-vector-search.md @@ -37,7 +37,7 @@ title: Pesquisa ANN básica d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

          A pesquisa ANN e a pesquisa k-Nearest Neighbors (kNN) são os métodos habituais nas pesquisas de semelhança de vectores. Numa pesquisa kNN, é necessário comparar todos os vectores de um espaço vetorial com o vetor de consulta transportado no pedido de pesquisa antes de descobrir os mais semelhantes, o que consome muito tempo e recursos.

          +

          A pesquisa ANN e a pesquisa k-Nearest Neighbors (kNN) são os métodos habituais nas pesquisas de semelhança de vectores. Numa pesquisa kNN, é necessário comparar todos os vectores num espaço vetorial com o vetor de consulta transportado no pedido de pesquisa antes de descobrir os mais semelhantes, o que consome muito tempo e recursos.

          Ao contrário das pesquisas kNN, um algoritmo de pesquisa ANN pede um ficheiro de índice que regista a ordem ordenada das incorporações vectoriais. Quando chega um pedido de pesquisa, pode utilizar o ficheiro de índice como referência para localizar rapidamente um subgrupo que provavelmente contém as incorporações vectoriais mais semelhantes ao vetor de consulta. Em seguida, pode utilizar o tipo de métrica especificado para medir a semelhança entre o vetor de consulta e os do subgrupo, ordenar os membros do grupo com base na semelhança com o vetor de consulta e descobrir os membros do grupo K de topo.

          As pesquisas ANN dependem de índices pré-construídos, e o rendimento da pesquisa, a utilização de memória e a correção da pesquisa podem variar com os tipos de índice escolhidos. É necessário equilibrar o desempenho e a correção da pesquisa.

          Para reduzir a curva de aprendizagem, Milvus fornece AUTOINDEX. Com o AUTOINDEX, o Milvus pode analisar a distribuição de dados dentro da sua coleção enquanto constrói o índice e define os parâmetros de índice mais optimizados com base na análise para atingir um equilíbrio entre o desempenho da pesquisa e a correção.

          @@ -255,7 +255,7 @@ curl --request POST \​

        O Milvus classifica os resultados da pesquisa pelas suas pontuações de semelhança com o vetor de consulta, por ordem descendente. A pontuação de semelhança é também designada por distância ao vetor de consulta e os seus intervalos de valores variam consoante os tipos de métricas utilizados.

        -

        A tabela a seguir lista os tipos de métrica aplicáveis e os intervalos de distância correspondentes.

        +

        A tabela seguinte lista os tipos de métricas aplicáveis e os intervalos de distância correspondentes.

        Tipo de métrica

        Caraterísticas

        Intervalo de distância

        @@ -626,7 +626,7 @@ curl --request POST \​ d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

        Num resultado de pesquisa, o Milvus inclui os valores do campo primário e as distâncias/pontuações de similaridade das entidades que contêm os top-K vetor embeddings por defeito. Você pode incluir os nomes dos campos de destino em uma solicitação de pesquisa como os campos de saída para fazer com que os resultados da pesquisa carreguem os valores de outros campos nessas entidades.

        +

        Num resultado de pesquisa, o Milvus inclui os valores do campo primário e as distâncias/pontuações de similaridade das entidades que contêm os vectores de incorporação top-K por defeito. Você pode incluir os nomes dos campos de destino em uma solicitação de pesquisa como os campos de saída para fazer com que os resultados da pesquisa carreguem os valores de outros campos nessas entidades.

        # 4. Single vector search​
        @@ -778,7 +778,7 @@ curl --request POST \​
         

        A tabela abaixo descreve como definir os parâmetros Limite e Deslocamento para consultas paginadas ao retornar 100 Entidades de cada vez.

        Consultas

        Entidades a devolver por consulta

        -

        Entidades já devolvidas no total

        +

        Entidades já retornadas no total

        A **1ª** consulta

        100

        0

        @@ -891,22 +891,22 @@ curl --request POST \​

        Para obter mais informações sobre filtragem de metadados e condições de filtragem, consulte Pesquisa filtrada e Filtragem de metadados.

      • Pesquisa de intervalo

        É possível melhorar a relevância dos resultados da pesquisa restringindo a distância ou a pontuação das entidades retornadas dentro de um intervalo específico. No Milvus, uma pesquisa de intervalo envolve o desenho de dois círculos concêntricos com a incorporação de vetor mais semelhante ao vetor de consulta como o centro. O pedido de pesquisa especifica o raio de ambos os círculos, e o Milvus devolve todas as incorporações vectoriais que se enquadram no círculo exterior mas não no círculo interior.

        -

        Para saber mais sobre a pesquisa de intervalo, consulte Pesquisa de intervalo.

      • +

        Para obter mais informações sobre a pesquisa de intervalo, consulte Pesquisa de intervalo.

      • Pesquisa de agrupamento

        Se as entidades retornadas tiverem o mesmo valor em um campo específico, os resultados da pesquisa podem não representar a distribuição de todas as incorporações vetoriais no espaço vetorial. Para diversificar os resultados da pesquisa, considere o uso da pesquisa de agrupamento.

        Para saber mais sobre a pesquisa de agrupamento, consulte Pesquisa de agrupamento.

      • Pesquisa híbrida

        -

        Uma coleção pode incluir até quatro campos vectoriais para guardar as incorporações vectoriais geradas utilizando diferentes modelos de incorporação. Ao fazer isso, é possível usar uma pesquisa híbrida para ranquear novamente os resultados de pesquisa desses campos vetoriais, melhorando a taxa de recuperação.

        +

        Uma coleção pode incluir até quatro campos vectoriais para guardar as incorporações vectoriais geradas utilizando diferentes modelos de incorporação. Ao fazer isso, é possível usar uma pesquisa híbrida para ranquear novamente os resultados da pesquisa desses campos vetoriais, melhorando a taxa de recuperação.

        Para saber mais sobre a pesquisa híbrida, consulte Pesquisa híbrida.

      • Iterador de pesquisa

        -

        Uma única pesquisa ANN retorna um máximo de 16.384 entidades. Considere o uso de iteradores de pesquisa se precisar de mais entidades para retornar em uma única pesquisa.

        +

        Uma única pesquisa ANN retorna um máximo de 16.384 entidades. Considere usar iteradores de pesquisa se precisar de mais entidades para retornar em uma única pesquisa.

        Para obter detalhes sobre iteradores de pesquisa, consulte Iterador de pesquisa.

      • Pesquisa de texto completo

        A pesquisa de texto completo é um recurso que recupera documentos que contêm termos ou frases específicos em conjuntos de dados de texto e, em seguida, classifica os resultados com base na relevância. Esta funcionalidade ultrapassa as limitações da pesquisa semântica, que pode ignorar termos precisos, garantindo que recebe os resultados mais exactos e contextualmente relevantes. Além disso, simplifica as pesquisas vectoriais ao aceitar a entrada de texto em bruto, convertendo automaticamente os seus dados de texto em embeddings esparsos sem a necessidade de gerar manualmente embeddings vectoriais.

        -

        Para obter detalhes sobre a pesquisa de texto integral, consulte Pesquisa de texto integral.

      • -
      • Correspondência de palavras-chave

        -

        A correspondência de palavras-chave no Milvus permite a recuperação precisa de documentos com base em termos específicos. Esta funcionalidade é utilizada principalmente para pesquisa filtrada para satisfazer condições específicas e pode incorporar filtragem escalar para refinar os resultados da consulta, permitindo pesquisas de semelhança dentro de vectores que satisfaçam critérios escalares.

        -

        Para obter detalhes sobre a correspondência de palavras-chave, consulte Correspondência de palavras-chave.

      • +

        Para mais informações sobre a pesquisa de texto integral, consulte Pesquisa de texto integral.

        +
      • Correspondência de texto

        +

        A correspondência de texto em Milvus permite a recuperação precisa de documentos com base em termos específicos. Esta funcionalidade é utilizada principalmente para pesquisa filtrada para satisfazer condições específicas e pode incorporar filtragem escalar para refinar os resultados da consulta, permitindo pesquisas de semelhança dentro de vectores que satisfaçam critérios escalares.

        +

        Para obter detalhes sobre a correspondência de texto, consulte Correspondência de texto.

      • Usar chave de partição

        O envolvimento de vários campos escalares na filtragem de metadados e a utilização de uma condição de filtragem bastante complicada pode afetar a eficiência da pesquisa. Depois de definir um campo escalar como chave de partição e utilizar uma condição de filtragem que envolva a chave de partição no pedido de pesquisa, pode ajudar a restringir o âmbito da pesquisa dentro das partições correspondentes aos valores de chave de partição especificados.

        Para obter detalhes sobre a chave de partição, consulte Usar chave de partição.

      • @@ -915,5 +915,5 @@ curl --request POST \​

        Para obter detalhes sobre as configurações de mmap, consulte Usar mmap.

      • Compactação de clustering

        A compactação de clustering foi concebida para melhorar o desempenho da pesquisa e reduzir os custos em grandes colecções. Este guia ajudará a entender a compactação de clustering e como esse recurso pode melhorar o desempenho da pesquisa.

        -

        Para obter detalhes sobre compactações de clustering, consulte Compactação de clustering.

      • +

        Para obter detalhes sobre compactações de agrupamento, consulte Compactação de agrupamento.

        diff --git a/localization/v2.5.x/site/zh/adminGuide/upgrade-pulsar-v3.json b/localization/v2.5.x/site/zh/adminGuide/upgrade-pulsar-v3.json index f9e3dc2e7..25657ee4e 100644 --- a/localization/v2.5.x/site/zh/adminGuide/upgrade-pulsar-v3.json +++ b/localization/v2.5.x/site/zh/adminGuide/upgrade-pulsar-v3.json @@ -1 +1 @@ -{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n\"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\ncreationTimestamp: \"2024-11-22T08:06:59Z\"​\nfinalizers:​\n- milvus.milvus.io/finalizer​\ngeneration: 3​\nlabels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\ncomponents:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\ndependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\nenabled: false # set to false​\n# you may also clean up rest fields under pulsar field​\n# it's ok to keep them though.​\npulsarv3:​\nenabled: true​\n# append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\nannotations: null # this field should be removed or set to null​\nresourceVersion: null # this field should be removed or set to null​\nuid: null # this field should be removed or set to null​\nspec:​\ndependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"升级 Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"路线图","href":"Roadmap","type":2,"isActive":false},{"label":"步骤","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 &​\n","[1] 8116​\nForwarding from 127.0.0.1:9091 -> 9091​\n\n","pid=8116​\n\n","curl 127.0.0.1:9091/api/v1/collections \\​\n|curl 127.0.0.1:9091/api/v1/persist -d @/dev/stdin\\​\n|jq '.flush_coll_segIDs'| jq '[.[] | .data[]]' | jq '{segmentIDs: (.)}' \\​\n> flushing_segments.json​\ncat flushing_segments.json​\n\n","{​\n \"segmentIDs\": [​\n 454097953998181000,​\n 454097953999383600,​\n 454097953998180800​\n ]​\n}​\n\n","cat flushing_segments.json| curl -X GET 127.0.0.1:9091/api/v1/persist/state -d @/dev/stdin ​\n\n","{\"status\":{},\"flushed\":true}​\n\n","kill $pid​\n\n","[1] + 8116 terminated kubectl -n default port-forward deploy/my-release-milvus-proxy 9091:9091 ​\n\n","helm -n default get values my-release -o yaml > values.yaml​\ncat values.yaml​\n\n","helm -n default uninstall my-release​\n\n","These resources were kept due to the resource policy:​\n[PersistentVolumeClaim] my-release-minio​\n​\nrelease \"my-release\" uninstalled​\n\n","kubectl -n default get pvc -lapp=pulsar,release=my-release |grep -v NAME |awk '{print $1}' > pulsar-pvcs.txt​\nkubectl -n default get pvc -lapp=pulsar,release=my-release -o custom-columns=VOL:.spec.volumeName|grep -v VOL > pulsar-pvs.txt​\necho \"Volume Claims:\"​\ncat pulsar-pvcs.txt​\necho \"Volumes:\"​\ncat pulsar-pvs.txt​\n\n","Volume Claims:​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-journal-my-release-pulsar-bookie-1​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0​\nmy-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1​\nmy-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0​\nVolumes:​\npvc-f590a4de-df31-4ca8-a424-007eac3c619a​\npvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3​\npvc-72f83c25-6ea1-45ee-9559-0b783f2c530b​\npvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf​\npvc-2da33f64-c053-42b9-bb72-c5d50779aa0a​\n\n","cat pulsar-pvcs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","persistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-journal-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-0\" deleted​\npersistentvolumeclaim \"my-release-pulsar-bookie-ledgers-my-release-pulsar-bookie-1\" deleted​\npersistentvolumeclaim \"my-release-pulsar-zookeeper-data-my-release-pulsar-zookeeper-0\" deleted​\n\n","cat pulsar-pvs.txt |xargs -I {} kubectl -n default delete pvc {} --wait=false​\n\n","Error from server (NotFound): persistentvolumeclaims \"pvc-f590a4de-df31-4ca8-a424-007eac3c619a\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-17b0e215-3e14-4d14-901e-1a1dda9ff5a3\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-72f83c25-6ea1-45ee-9559-0b783f2c530b\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-60dcb6e4-760d-46c7-af1a-d1fc153b0caf\" not found​\nError from server (NotFound): persistentvolumeclaims \"pvc-2da33f64-c053-42b9-bb72-c5d50779aa0a\" not found​\n\n","kubectl -n default get milvus my-release -o yaml > milvus.yaml​\nhead milvus.yaml -n 20​\n\n","apiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations:​\n milvus.io/dependency-values-merged: \"true\"​\n milvus.io/pod-service-label-added: \"true\"​\n milvus.io/querynode-current-group-id: \"0\"​\n creationTimestamp: \"2024-11-22T08:06:59Z\"​\n finalizers:​\n - milvus.milvus.io/finalizer​\n generation: 3​\n labels:​\n app: milvus​\n milvus.io/operator-version: 1.1.2​\nname: my-release​\nnamespace: default​\nresourceVersion: \"692217324\"​\nuid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​\nspec:​\n components:​\n\n","# a patch to retain etcd & storage data and delete pulsar data while delete milvus​\nspec:​\n dependencies:​\n etcd:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n storage:​\n inCluster:​\n deletionPolicy: Retain​\n pvcDeletion: false​\n pulsar:​\n inCluster:​\n deletionPolicy: Delete​\n pvcDeletion: true​\n\n","kubectl -n default patch milvus my-release --patch-file patch.yaml --type=merge​\n\n","milvus.milvus.io/my-release patched​\n\n","kubectl -n default delete milvus my-release --wait=false​\nkubectl -n default get milvus my-release​\nkubectl -n default delete milvus my-release --wait=true​\n\n","milvus.milvus.io \"my-release\" deleted​\nNAME MODE STATUS UPDATED AGE​\nmy-release cluster Deleting True 41m​\nmilvus.milvus.io \"my-release\" deleted​\n\n","kubectl -n default get milvus my-release​\n\n","No resources found in default namespace.​\n\n","# change the following:​\npulsar:​\n enabled: false # set to false​\n # you may also clean up rest fields under pulsar field​\n # it's ok to keep them though.​\npulsarv3:​\n enabled: true​\n # append other values for pulsar v3 chart if needs​\n\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm​\nhelm repo update zilliztech​\n\n","\"zilliztech\" already exists with the same configuration, skipping​\nHang tight while we grab the latest from your chart repositories...​\n...Successfully got an update from the \"zilliztech\" chart repository​\nUpdate Complete. ⎈Happy Helming!⎈​\n\n","helm -n default install my-release zilliztech/milvus --reset-values -f values.yaml​\n\n","NAME: my-release​\nLAST DEPLOYED: Fri Nov 22 15:31:27 2024​\nNAMESPACE: default​\nSTATUS: deployed​\nREVISION: 1​\nTEST SUITE: None​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 4m3s​\nmy-release-milvus-datanode-56487bc4bc-s6mbd 1/1 Running 0 4m5s​\nmy-release-milvus-indexnode-6476894d6-rv85d 1/1 Running 0 4m5s​\nmy-release-milvus-mixcoord-6d8875cb9c-67fcq 1/1 Running 0 4m4s​\nmy-release-milvus-proxy-7bc45d57c5-2qf8m 1/1 Running 0 4m4s​\nmy-release-milvus-querynode-77465747b-kt7f4 1/1 Running 0 4m4s​\nmy-release-minio-684ff4f5df-pnc97 1/1 Running 0 4m5s​\nmy-release-pulsarv3-bookie-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-1 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-2 1/1 Running 0 4m3s​\nmy-release-pulsarv3-bookie-init-6z4tk 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-broker-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-broker-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-proxy-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-pulsar-init-wvqpc 0/1 Completed 0 4m1s​\nmy-release-pulsarv3-recovery-0 1/1 Running 0 4m3s​\nmy-release-pulsarv3-zookeeper-0 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-1 1/1 Running 0 4m2s​\nmy-release-pulsarv3-zookeeper-2 1/1 Running 0 4m2s​\n\n","# change the followings fields:​\napiVersion: milvus.io/v1beta1​\nkind: Milvus​\nmetadata:​\n annotations: null # this field should be removed or set to null​\n resourceVersion: null # this field should be removed or set to null​\n uid: null # this field should be removed or set to null​\nspec:​\n dependencies:​\n pulsar:​\n inCluster:​\n chartVersion: pulsar-v3​\n # delete all previous values for pulsar v2 and set it to null.​\n # you may add additional values here for pulsar v3 if you're sure about it.​\n values: null​\n\n","helm repo add milvus-operator https://zilliztech.github.io/milvus-operator​\nhelm repo update milvus-operator​\nhelm -n milvus-operator upgrade milvus-operator milvus-operator/milvus-operator​\n\n","kubectl create -f milvus.yaml​\n\n","milvus.milvus.io/my-release created​\n\n","NAME READY STATUS RESTARTS AGE​\nmy-release-etcd-0 1/1 Running 0 65m​\nmy-release-milvus-datanode-57fd59ff58-5mdrk 1/1 Running 0 93s​\nmy-release-milvus-indexnode-67867c6b9b-4wsbw 1/1 Running 0 93s​\nmy-release-milvus-mixcoord-797849f9bb-sf8z5 1/1 Running 0 93s​\nmy-release-milvus-proxy-5d5bf98445-c55m6 1/1 Running 0 93s​\nmy-release-milvus-querynode-0-64797f5c9-lw4rh 1/1 Running 0 92s​\nmy-release-minio-79476ccb49-zvt2h 1/1 Running 0 65m​\nmy-release-pulsar-bookie-0 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-1 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-2 1/1 Running 0 5m10s​\nmy-release-pulsar-bookie-init-v8fdj 0/1 Completed 0 5m11s​\nmy-release-pulsar-broker-0 1/1 Running 0 5m11s​\nmy-release-pulsar-broker-1 1/1 Running 0 5m10s​\nmy-release-pulsar-proxy-0 1/1 Running 0 5m11s​\nmy-release-pulsar-proxy-1 1/1 Running 0 5m10s​\nmy-release-pulsar-pulsar-init-5lhx7 0/1 Completed 0 5m11s​\nmy-release-pulsar-recovery-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-0 1/1 Running 0 5m11s​\nmy-release-pulsar-zookeeper-1 1/1 Running 0 5m10s​\nmy-release-pulsar-zookeeper-2 1/1 Running 0 5m10s​\n\n"],"headingContent":"Upgrading Pulsar ​","anchorList":[{"label":"升级 Pulsar","href":"Upgrading-Pulsar-​","type":1,"isActive":false},{"label":"路线图","href":"Roadmap","type":2,"isActive":false},{"label":"步骤","href":"Procedures","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/zh/adminGuide/upgrade-pulsar-v3.md b/localization/v2.5.x/site/zh/adminGuide/upgrade-pulsar-v3.md index c244bacee..c561a9ddc 100644 --- a/localization/v2.5.x/site/zh/adminGuide/upgrade-pulsar-v3.md +++ b/localization/v2.5.x/site/zh/adminGuide/upgrade-pulsar-v3.md @@ -24,7 +24,7 @@ title: 将 Milvus 的脉冲星从 V2 升级到 V3
        1. 升级过程需要短暂的服务中断(通常需要几分钟到十多分钟,视数据量而定)。

        2. 操作前,需要停止所有正在运行的客户端向 Milvus 写入数据。否则,写入的数据可能会丢失。

        3. -
        4. 本文假设 Milvus 安装在命名空间default 并命名为my-release 。在执行从本页复制的命令时,请将参数更改为您自己的命名空间和发布名称。

        5. +
        6. 本文假定 Milvus 安装在命名空间default 并命名为my-release 。在执行从本页复制的命令时,请将参数更改为您自己的命名空间和发布名称。

        7. 确保您的工作环境在 Kubernetes 集群的上述命名空间下拥有权限,并安装了以下命令。

          a.kubectl >= 1.20

          b.helm >= 3.14.0

          @@ -69,7 +69,7 @@ title: 将 Milvus 的脉冲星从 V2 升级到 V3 >

          本节提供在 Milvus 中将 Pulsar 从 V2 升级到 V3 的详细步骤。

          -

          保留 Pulsar 中未消耗的数据

          在这一步中,需要确保 Pulsar 中的现有数据已持久化到对象存储服务。 有两种方法可供选择,你可以根据自己的需要来选择。

          +

          保留 Pulsar 中未消耗的数据

          在这一步中,需要确保 Pulsar 中的现有数据已持久化到对象存储服务中。 有两种方法可供选择,你可以根据自己的需要进行选择。

          方法 1:使用 Attu

          如果你的工作 Milvus 部署中只有少量的 Collections,且分段不多,你可以使用 Attu 将数据持久化到对象存储服务。

          1. 选择所有数据库中的每个 Collections,进入Segments 面板,点击Flush 按钮

            @@ -111,11 +111,11 @@ Forwarding from 127.

            输出。

            {​
            -"segmentIDs": [​
            +  "segmentIDs": [​
                 454097953998181000,​
                 454097953999383600,​
                 454097953998180800​
            -]​
            +  ]​
             }​
             
             
          2. @@ -210,7 +210,7 @@ persistentvolumeclaim "my-release-pulsar-zookeepe
          -

          使用 Milvus 操作符删除 Pulsar V2

          如果使用 Milvus 操作符安装了 Milvus,请按照以下步骤停止 Milvus pod 并删除 Pulsar V2 部署。

          +

          使用 Milvus 操作符删除 Pulsar V2

          如果使用 Milvus Operator 安装了 Milvus,请按照以下步骤停止 Milvus pod 并删除 Pulsar V2 部署。

          1. 将当前 Milvus Manifest 保存到milvus.yaml 以备后用。

            kubectl -n default get milvus my-release -o yaml > milvus.yaml​
            @@ -221,15 +221,15 @@ head milvus.yaml -n 20
            apiVersion: milvus.io/v1beta1​
             kind: Milvus​
             metadata:​
            -annotations:​
            +  annotations:​
                 milvus.io/dependency-values-merged: "true"​
                 milvus.io/pod-service-label-added: "true"​
                 milvus.io/querynode-current-group-id: "0"​
            -creationTimestamp: "2024-11-22T08:06:59Z"​
            -finalizers:​
            -- milvus.milvus.io/finalizer​
            -generation: 3​
            -labels:​
            +  creationTimestamp: "2024-11-22T08:06:59Z"​
            +  finalizers:​
            +  - milvus.milvus.io/finalizer​
            +  generation: 3​
            +  labels:​
                 app: milvus​
                 milvus.io/operator-version: 1.1.2​
             name: my-release​
            @@ -237,23 +237,23 @@ namespace: default​
             resourceVersion: "692217324"​
             uid: 7a469ed0-9df1-494e-bd9a-340fac4305b5​
             spec:​
            -components:​
            +  components:​
             
             
          2. 创建包含以下内容的patch.yaml 文件。

            # a patch to retain etcd & storage data and delete pulsar data while delete milvus​
             spec:​
            -dependencies:​
            +  dependencies:​
                 etcd:​
            -    inCluster:​
            +      inCluster:​
                     deletionPolicy: Retain​
                     pvcDeletion: false​
                 storage:​
            -    inCluster:​
            +      inCluster:​
                     deletionPolicy: Retain​
                     pvcDeletion: false​
                 pulsar:​
            -    inCluster:​
            +      inCluster:​
                     deletionPolicy: Delete​
                     pvcDeletion: true​
             
            @@ -300,12 +300,12 @@ milvus.milvus.io "my-release" deleted
             
          3. 编辑上一步保存的values.yaml

            # change the following:​
             pulsar:​
            -enabled: false # set to false​
            -# you may also clean up rest fields under pulsar field​
            -# it's ok to keep them though.​
            +  enabled: false # set to false​
            +  # you may also clean up rest fields under pulsar field​
            +  # it's ok to keep them though.​
             pulsarv3:​
            -enabled: true​
            -# append other values for pulsar v3 chart if needs​
            +  enabled: true​
            +  # append other values for pulsar v3 chart if needs​
             
             
          4. 更新本地 Helm repo

            @@ -366,13 +366,13 @@ my-release-pulsarv3-zookeeper-2 < apiVersion: milvus.io/v1beta1​ kind: Milvus​ metadata:​ -annotations: null # this field should be removed or set to null​ -resourceVersion: null # this field should be removed or set to null​ -uid: null # this field should be removed or set to null​ + annotations: null # this field should be removed or set to null​ + resourceVersion: null # this field should be removed or set to null​ + uid: null # this field should be removed or set to null​ spec:​ -dependencies:​ + dependencies:​ pulsar:​ - inCluster:​ + inCluster:​ chartVersion: pulsar-v3​ # delete all previous values for pulsar v2 and set it to null.​ # you may add additional values here for pulsar v3 if you're sure about it.​ diff --git a/localization/v2.5.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.json b/localization/v2.5.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.json index c26ab5ee3..f07b3539c 100644 --- a/localization/v2.5.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.json +++ b/localization/v2.5.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.json @@ -1 +1 @@ -{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://github.com/zilliztech/milvus-helm\n","helm repo add zilliztech https://github.com/zilliztech/milvus-helm\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"使用 Helm 在 Kubernetes 中运行 Milvus","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"概述","href":"Overview","type":2,"isActive":false},{"label":"前提条件","href":"Prerequisites","type":2,"isActive":false},{"label":"安装 Milvus Helm 图表","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"在线安装","href":"Online-install","type":2,"isActive":false},{"label":"离线安装","href":"Offline-install","type":2,"isActive":false},{"label":"升级运行中的 Milvus 群集","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"卸载 Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"下一步","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["$ kubectl get sc\n\nNAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDINGMODE ALLOWVOLUMEEXPANSION AGE\nstandard (default) k8s.io/minikube-hostpath Delete Immediate false \n","$ helm repo add milvus https://zilliztech.github.io/milvus-helm/\n","helm repo add zilliztech https://zilliztech.github.io/milvus-helm/\nhelm repo update\n# upgrade existing helm release\nhelm upgrade my-release zilliztech/milvus\n","$ helm repo update\n","$ helm install my-release milvus/milvus\n","$ kubectl get pods\n","NAME READY STATUS RESTARTS AGE\nmy-release-etcd-0 1/1 Running 0 3m23s\nmy-release-etcd-1 1/1 Running 0 3m23s\nmy-release-etcd-2 1/1 Running 0 3m23s\nmy-release-milvus-datanode-68cb87dcbd-4khpm 1/1 Running 0 3m23s\nmy-release-milvus-indexnode-5c5f7b5bd9-l8hjg 1/1 Running 0 3m24s\nmy-release-milvus-mixcoord-7fb9488465-dmbbj 1/1 Running 0 3m23s\nmy-release-milvus-proxy-6bd7f5587-ds2xv 1/1 Running 0 3m24s\nmy-release-milvus-querynode-5cd8fff495-k6gtg 1/1 Running 0 3m24s\nmy-release-minio-0 1/1 Running 0 3m23s\nmy-release-minio-1 1/1 Running 0 3m23s\nmy-release-minio-2 1/1 Running 0 3m23s\nmy-release-minio-3 1/1 Running 0 3m23s\nmy-release-pulsar-autorecovery-86f5dbdf77-lchpc 1/1 Running 0 3m24s\nmy-release-pulsar-bookkeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-bookkeeper-1 1/1 Running 0 98s\nmy-release-pulsar-broker-556ff89d4c-2m29m 1/1 Running 0 3m23s\nmy-release-pulsar-proxy-6fbd75db75-nhg4v 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-0 1/1 Running 0 3m23s\nmy-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s\n","$ kubectl get pod my-release-milvus-proxy-6bd7f5587-ds2xv --template\n='{{(index (index .spec.containers 0).ports 0).containerPort}}{{\"\\n\"}}'\n19530\n","$ kubectl port-forward service/my-release-milvus 27017:19530\nForwarding from 127.0.0.1:27017 -> 19530\n","$ kubectl port-forward --address 0.0.0.0 service/my-release-milvus 27017:19530\nForwarding from 0.0.0.0:27017 -> 19530\n","$ helm template my-release milvus/milvus > milvus_manifest.yaml\n","$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/requirements.txt\n$ wget https://raw.githubusercontent.com/milvus-io/milvus/master/deployments/offline/save_image.py\n","$ pip3 install -r requirements.txt\n$ python3 save_image.py --manifest milvus_manifest.yaml\n","$ for image in $(find . -type f -name \"*.tar.gz\") ; do gunzip -c $image | docker load; done\n","$ kubectl apply -f milvus_manifest.yaml\n","$ helm repo update\n$ helm upgrade my-release zilliztech/milvus\n","$ helm uninstall my-release\n"],"headingContent":"Run Milvus in Kubernetes with Helm","anchorList":[{"label":"使用 Helm 在 Kubernetes 中运行 Milvus","href":"Run-Milvus-in-Kubernetes-with-Helm","type":1,"isActive":false},{"label":"概述","href":"Overview","type":2,"isActive":false},{"label":"前提条件","href":"Prerequisites","type":2,"isActive":false},{"label":"安装 Milvus Helm 图表","href":"Install-Milvus-Helm-Chart","type":2,"isActive":false},{"label":"在线安装","href":"Online-install","type":2,"isActive":false},{"label":"离线安装","href":"Offline-install","type":2,"isActive":false},{"label":"升级运行中的 Milvus 群集","href":"Upgrade-running-Milvus-cluster","type":2,"isActive":false},{"label":"卸载 Milvus","href":"Uninstall-Milvus","type":2,"isActive":false},{"label":"下一步","href":"Whats-next","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.md b/localization/v2.5.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.md index a07941f33..8d1efc698 100644 --- a/localization/v2.5.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.md +++ b/localization/v2.5.x/site/zh/getstarted/run-milvus-k8s/install_cluster-helm.md @@ -65,7 +65,7 @@ NAME PROVISIONER RECLAIMPOLICY VOLUMEBIINDI
          5. 安装 Milvus 之前,建议使用Milvus 大小工具,根据数据大小估算硬件需求。这有助于确保 Milvus 安装的最佳性能和资源分配。

          6. -

            如果您在绘制图像时遇到任何问题,请通过community@zilliz.com联系我们,并提供有关问题的详细信息,我们将为您提供必要的支持。

            +

            如果您在拉动图像时遇到任何问题,请通过community@zilliz.com联系我们,并提供有关问题的详细信息,我们将为您提供必要的支持。

            安装 Milvus Helm 图表

            在安装 Milvus Helm 图表之前,您需要添加 Milvus Helm 资源库。

            -
            $ helm repo add milvus https://github.com/zilliztech/milvus-helm
            +
            $ helm repo add milvus https://zilliztech.github.io/milvus-helm/
             

            位于https://github.com/milvus-io/milvus-helm 的 Milvus Helm Charts 软件仓库已经存档,您可以从https://github.com/zilliztech/milvus-helm 获取进一步更新,具体如下:

            -
            helm repo add zilliztech https://github.com/zilliztech/milvus-helm
            +
            helm repo add zilliztech https://zilliztech.github.io/milvus-helm/
             helm repo update
             # upgrade existing helm release
             helm upgrade my-release zilliztech/milvus
             
            -

            归档软件源仍可用于 4.0.31 之前的图表。对于以后的版本,请使用新版本库。

            +

            存档软件源仍可用于 4.0.31 之前的图表。对于后续版本,请使用新版本库。

            然后从软件源中获取 Milvus 图表,如下所示:

            $ helm repo update
            @@ -156,7 +156,7 @@ my-release-pulsar-zookeeper-metadata-98zbr       0/1   Completed  0        3m24s
             ='{{(index (index .spec.containers 0).ports 0).containerPort}}{{"\n"}}'
             19530
             
            -

            输出结果显示,Milvus 实例的默认端口为19530

            +

            输出结果显示,Milvus 实例的默认服务端口为19530

            如果以独立模式部署了 Milvus,请将 pod 名称从my-release-milvus-proxy-xxxxxxxxxx-xxxxx 更改为my-release-milvus-xxxxxxxxxx-xxxxx

            @@ -184,7 +184,7 @@ my-release-pulsar-zookeeper-metadata-98zbr 0/1 Completed 0 3m24s d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

            如果处于网络受限的环境,请按照本节的步骤启动 Milvus 群集。

            +

            如果您处于网络受限的环境,请按照本节的步骤启动 Milvus 群集。

            1.获取 Milvus 清单

            运行以下命令获取 Milvus 清单。

            $ helm template my-release milvus/milvus > milvus_manifest.yaml
             
            diff --git a/localization/v2.5.x/site/zh/home/home.md b/localization/v2.5.x/site/zh/home/home.md index edf7c0efb..523b30f1d 100644 --- a/localization/v2.5.x/site/zh/home/home.md +++ b/localization/v2.5.x/site/zh/home/home.md @@ -114,7 +114,7 @@ id: home.md

            2024 年 11 月 - Milvus 2.5.0 发布

            • 添加了如何进行全文搜索的指导。
            • -
            • 添加了如何进行关键字匹配的指导。
            • +
            • 添加了如何进行文本匹配的指导。
            • 添加了如何启用可空值和默认值的指导。
            • 已添加分析器说明。
            • 已添加位图索引的说明。
            • diff --git a/localization/v2.5.x/site/zh/menuStructure/zh.json b/localization/v2.5.x/site/zh/menuStructure/zh.json index 48059ebea..786683572 100644 --- a/localization/v2.5.x/site/zh/menuStructure/zh.json +++ b/localization/v2.5.x/site/zh/menuStructure/zh.json @@ -286,6 +286,18 @@ "order": 8, "children": [] }, + { + "label": "公制类型", + "id": "metric.md", + "order": 9, + "children": [] + }, + { + "label": "一致性等级", + "id": "consistency.md", + "order": 10, + "children": [] + }, { "label": "内存复制", "id": "replica.md", @@ -602,31 +614,39 @@ ] }, { - "label": "管理索引", + "label": "索引", "id": "manage_indexes", "order": 4, "isMenu": true, "children": [ { - "label": "索引向量场", + "label": "向量索引", "id": "index-vector-fields.md", "order": 0, "children": [] }, { - "label": "索引标量字段", - "id": "index-scalar-fields.md", + "label": "标量索引", + "id": "scalar-index", "order": 1, - "children": [] - }, - { - "label": "BITMAP 指数", - "id": "bitmap.md", - "order": 2, - "children": [] + "isMenu": true, + "children": [ + { + "label": "索引标量字段", + "id": "index-scalar-fields.md", + "order": 1, + "children": [] + }, + { + "label": "位图索引", + "id": "bitmap.md", + "order": 2, + "children": [] + } + ] }, { - "label": "使用 GPU 的索引", + "label": "支持 GPU 的索引", "id": "index-with-gpu.md", "order": 3, "children": [] @@ -682,7 +702,7 @@ "children": [] }, { - "label": "关键词匹配", + "label": "文本匹配", "id": "keyword-match.md", "order": 7, "children": [] @@ -699,30 +719,6 @@ "order": 9, "children": [] }, - { - "label": "使用 mmap", - "id": "mmap.md", - "order": 10, - "children": [] - }, - { - "label": "聚类压缩", - "id": "clustering-compaction.md", - "order": 11, - "children": [] - }, - { - "label": "一致性等级", - "id": "consistency.md", - "order": 12, - "children": [] - }, - { - "label": "公制类型", - "id": "metric.md", - "order": 13, - "children": [] - }, { "label": "元数据过滤", "id": "boolean.md", @@ -736,26 +732,6 @@ "children": [] } ] - }, - { - "label": "数据导入", - "id": "data_import", - "order": 6, - "isMenu": true, - "children": [ - { - "label": "准备源数据", - "id": "prepare-source-data.md", - "order": 0, - "children": [] - }, - { - "label": "导入数据", - "id": "import-data.md", - "order": 1, - "children": [] - } - ] } ] }, @@ -897,11 +873,31 @@ } ] }, + { + "label": "数据导入", + "id": "data_import", + "order": 5, + "isMenu": true, + "children": [ + { + "label": "准备源数据", + "id": "prepare-source-data.md", + "order": 0, + "children": [] + }, + { + "label": "导入数据", + "id": "import-data.md", + "order": 1, + "children": [] + } + ] + }, { "label": "Milvus 迁移", "id": "milvus_migration", "isMenu": true, - "order": 5, + "order": 6, "children": [ { "label": "概述", @@ -1321,10 +1317,30 @@ } ] }, + { + "label": "存储优化", + "id": "storage_optimization", + "order": 10, + "isMenu": true, + "children": [ + { + "label": "使用 mmap", + "id": "mmap.md", + "order": 0, + "children": [] + }, + { + "label": "聚类压缩", + "id": "clustering-compaction.md", + "order": 1, + "children": [] + } + ] + }, { "label": "安全", "id": "security", - "order": 10, + "order": 11, "isMenu": true, "children": [ { @@ -1395,7 +1411,7 @@ ] }, { - "label": "Milvus 后备电源", + "label": "Milvus 备份", "id": "milvus_backup", "order": 1, "children": [ diff --git a/localization/v2.5.x/site/zh/release_notes.md b/localization/v2.5.x/site/zh/release_notes.md index a39e49076..8dc0f04cd 100644 --- a/localization/v2.5.x/site/zh/release_notes.md +++ b/localization/v2.5.x/site/zh/release_notes.md @@ -50,19 +50,19 @@ title: 版本说明

              集群管理 WebUI(测试版)

              为了更好地支持海量数据和丰富功能,Milvus 的复杂设计包括各种依赖关系、众多节点角色、复杂数据结构等。这些方面都会给使用和维护带来挑战。

              Milvus 2.5 引入了内置的集群管理 WebUI,通过可视化 Milvus 复杂的运行环境信息,降低了系统维护难度。这包括数据库和 Collections、网段、通道、依赖关系、节点健康状态、任务信息、缓慢查询等详细信息。

              文本匹配

              Milvus 2.5 利用 Tantivy 的分析器和索引进行文本预处理和索引构建,支持根据特定术语对文本数据进行精确的自然语言匹配。该功能主要用于满足特定条件的过滤搜索,并可结合标量过滤来细化查询结果,允许在满足标量标准的向量内进行相似性搜索。

              -

              有关详情,请参阅关键字匹配

              +

              有关详情,请参阅文本匹配

              位图索引

              Milvus 系列新增了一种标量数据索引。位图索引使用长度与行数相等的位数组来表示值的存在并加快搜索速度。

              -

              位图索引传统上对低Cardinality字段很有效,这些字段的不同值数量不多--例如,包含性别信息的列只有两个可能的值:男性和女性。

              +

              位图索引传统上对低Cardinality字段非常有效,这些字段的不同值数量不多--例如,包含性别信息的列只有两个可能的值:男性和女性。

              有关详细信息,请参阅位图索引

              可归零和默认值

              Milvus 现在支持为主键字段以外的标量字段设置可归零属性和默认值。对于标记为nullable=True 的标量字段,用户可以在插入数据时省略该字段;系统会将其视为空值或默认值(如果已设置),而不会出错。

              -

              默认值和可归零属性为 Milvus 提供了更大的灵活性。用户在创建 Collections 时,可以利用这一功能来处理具有不确定值的字段。它还简化了从其他数据库系统到 Milvus 的数据迁移,允许处理包含空值的数据集,同时保留原始默认值设置。

              +

              默认值和可归零属性为 Milvus 提供了更大的灵活性。用户在创建 Collections 时,可以对具有不确定值的字段利用这一功能。它还简化了从其他数据库系统到 Milvus 的数据迁移,允许处理包含空值的数据集,同时保留原始默认值设置。

              有关详情,请参阅 "可空值和默认值"。

              基于 Faiss 的 HNSW SQ/PQ/PRQ

              通过与 Faiss 社区的密切合作,Faiss 中的 HNSW 算法在功能和性能方面都有了显著的改进。出于稳定性和可维护性的考虑,Milvus 2.5 正式将对 HNSW 的支持从 hnswlib 迁移到 Faiss。

              -

              基于 Faiss,Milvus 2.5 支持 HNSW 上的多种量化方法,以满足不同场景的需求:SQ(标量量化器)、PQ(乘积量化器)和 PRQ(乘积残差量化器)。SQ 和 PQ 比较常见;SQ 提供了良好的查询性能和构建速度,而 PQ 在相同压缩比的情况下提供了更好的召回率。许多向量数据库通常使用二进制量化,这是 SQ 量化的一种简单形式。

              -

              PRQ 是 PQ 和 AQ(加法量化器)的融合。与 PQ 相比,PRQ 需要更长的构建时间,但却能提供更好的召回率,尤其是在高压缩率的情况下,二进制压缩的召回率更高。

              -

              聚类压缩(测试版)

              Milvus 2.5 引入了聚类压缩(Clustering Compaction)功能,以加快搜索速度并降低大型 Collections 的成本。通过指定一个标量字段作为聚类关键字,数据会按范围重新分配,以优化存储和检索。该功能的作用类似于全局索引,可使 Milvus 在基于聚类元数据的查询过程中有效地剪裁数据,从而在应用标量过滤器时提高搜索性能。

              +

              基于 Faiss,Milvus 2.5 支持 HNSW 上的多种量化方法,以满足不同场景的需求:SQ(标量量化器)、PQ(乘积量化器)和 PRQ(乘积残差量化器)。SQ 和 PQ 比较常见;SQ 具有良好的查询性能和构建速度,而 PQ 在相同压缩比的情况下具有更好的召回率。许多向量数据库通常使用二进制量化,这是 SQ 量化的一种简单形式。

              +

              PRQ 是 PQ 和 AQ(加法量化器)的融合。与 PQ 相比,它需要更长的构建时间,但却能提供更好的召回率,尤其是在高压缩率的情况下,比如二进制压缩。

              +

              聚类压缩(测试版)

              Milvus 2.5 引入了聚类压缩(Clustering Compaction)功能,以加快搜索速度并降低大型 Collections 的成本。通过指定一个标量字段作为聚类关键字,数据会按范围重新分配,以优化存储和检索。该功能的作用类似于全局索引,可让 Milvus 在基于聚类元数据的查询过程中有效地剪裁数据,从而在应用标量过滤器时提高搜索性能。

              有关详情,请参阅聚类压缩

              -

              其他功能

              流节点(测试版)

              Milvus 2.5 引入了一个名为流节点的新组件,它提供了先写日志(WAL)服务。这使 Milvus 能够在读写通道前后达成共识,解锁新特性、功能和优化。Milvus 2.5 版默认禁用此功能,3.0 版将正式启用。

              +

              其他功能

              流节点(测试版)

              Milvus 2.5 引入了一个名为流节点的新组件,它可提供先写日志(WAL)服务。这使 Milvus 能够在读写通道前后达成共识,解锁新特性、功能和优化。Milvus 2.5 默认禁用此功能,将在 3.0 版本中正式启用。

              支持 IPv6

              Milvus 现在支持 IPv6,从而扩大了网络连接和兼容性。

              CSV 批量导入

              除 JSON 和 Parquet 格式外,Milvus 现在还支持直接批量导入 CSV 格式的数据。

              加速查询的表达式模板

              Milvus 现在支持表达式模板,提高了表达式解析效率,尤其是在使用复杂表达式的情况下。

              @@ -78,6 +78,6 @@ title: 版本说明

              依赖关系升级

              升级至 ETCD 3.5.16 和 Pulsar 3.0.7 LTS,修复了现有的 CVE 并增强了安全性。注意:升级到 Pulsar 3.x 与之前的 2.x 版本不兼容。

              对于已经拥有可正常使用的 Milvus 部署的用户,需要先升级 ETCD 和 Pulsar 组件,然后才能使用新特性和功能。详情请参阅将 Pulsar 从 2.x 升级到 3.x。

              本地存储 V2

              在 Milvus 2.5 中引入了新的本地文件格式,提高了标量数据的加载和查询效率,减少了内存开销,并为未来的优化奠定了基础。

              -

              表达式解析优化

              通过对重复表达式实施缓存、升级 ANTLR 和优化NOT IN 子句的性能,改进了表达式解析。

              +

              表达式解析优化

              通过对重复表达式实施缓存、升级 ANTLR 以及优化NOT IN 子句的性能,改进了表达式解析。

              改进 DDL 并发性能

              优化了数据定义语言 (DDL) 操作的并发性能。

              RESTful API 功能调整

              使 RESTful API 的功能与其他 SDK 保持一致。

              diff --git a/localization/v2.5.x/site/zh/tutorials/hybrid_search_with_milvus.md b/localization/v2.5.x/site/zh/tutorials/hybrid_search_with_milvus.md index abb3de842..2a457e633 100644 --- a/localization/v2.5.x/site/zh/tutorials/hybrid_search_with_milvus.md +++ b/localization/v2.5.x/site/zh/tutorials/hybrid_search_with_milvus.md @@ -25,14 +25,14 @@ title: 使用 Milvus 进行混合搜索

              Milvus 支持密集、稀疏和混合检索方法:

              • 密集检索:利用语义上下文来理解查询背后的含义。
              • -
              • 稀疏检索:强调关键词匹配,根据特定术语查找结果,相当于全文检索。
              • +
              • 稀疏检索:强调文本匹配,根据特定术语查找结果,相当于全文检索。
              • 混合检索:结合了密集和稀疏两种方法,捕捉完整的上下文和特定的关键词,从而获得全面的搜索结果。
              -

              通过整合这些方法,Milvus 混合搜索平衡了语义和词汇的相似性,提高了搜索结果的整体相关性。本笔记本将介绍这些检索策略的设置和使用过程,并重点介绍它们在各种搜索场景中的有效性。

              +

              通过整合这些方法,Milvus 混合搜索平衡了语义和词汇的相似性,提高了搜索结果的整体相关性。本笔记本将介绍设置和使用这些检索策略的过程,并重点介绍它们在各种搜索场景中的有效性。

              依赖关系和环境

              $ pip install --upgrade pymilvus "pymilvus[model]"
               

              下载数据集

              要演示搜索,我们需要一个文档语料库。让我们使用 Quora 重复问题数据集,并将其放在本地目录中。

              -

              数据集来源:第一个 Quora 数据集发布:问题对

              +

              数据集来源:首次发布的 Quora 数据集:问题对

              # Run this cell to download the dataset
               $ wget http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv
               
              @@ -291,7 +291,7 @@ formatted_results = doc_text_formatting(ef, query, hybrid_results)

              在海得拉巴做哪一行比较好

              启动机器人技术的最佳方式是什么哪种开发板最适合我 开始工作

              新手需要掌握哪些数学知识 才能理解计算机 编程的算法哪些算法书籍适合完全初学者

              -

              如何让生活适合自己,让生活不再从精神和情感上虐待自己

              +

              如何让生活适合自己,不让生活在精神上和情感上虐待自己

              混合搜索结果:

              开始学习机器人技术的最佳方法是什么哪种开发板最好

              什么是 Java 编程?如何学习 Java 编程语言?

              diff --git a/localization/v2.5.x/site/zh/userGuide/collections/manage-collections.md b/localization/v2.5.x/site/zh/userGuide/collections/manage-collections.md index 92097ce23..8b5efe1be 100644 --- a/localization/v2.5.x/site/zh/userGuide/collections/manage-collections.md +++ b/localization/v2.5.x/site/zh/userGuide/collections/manage-collections.md @@ -148,7 +148,7 @@ title: 集合说明 d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

              创建索引并加载 Collections 后,就可以通过输入一个或多个查询向量开始相似性搜索。例如,当接收到搜索请求中携带的查询向量表示时,Milvus 会使用指定的度量类型来衡量查询向量与目标 Collections 中的向量之间的相似性,然后再返回那些与查询语义相似的向量。

              +

              创建索引并加载 Collections 后,就可以通过输入一个或多个查询向量开始相似性搜索。例如,当接收到搜索请求中携带的查询向量表示时,Milvus 会使用指定的度量类型来衡量查询向量与目标 Collections 中的向量之间的相似性,然后再返回与查询语义相似的向量。

              你还可以在搜索和查询中加入元数据过滤功能,以提高搜索结果的相关性。请注意,元数据过滤条件在查询中是必须的,但在搜索中是可选的。

              有关适用度量类型的详细信息,请参阅度量类型

              有关搜索和查询的更多信息,请参阅搜索和 Rerankers章节中的文章,其中包括基本功能。

              @@ -161,7 +161,7 @@ title: 集合说明
            • 搜索迭代器

            • 查询

            • 全文搜索

            • -
            • 关键词匹配

            • +
            • 文本匹配

            此外,Milvus 还提供了提高搜索性能和效率的增强功能。这些增强功能默认为禁用,您可以根据自己的服务要求启用和使用它们。它们是

              @@ -219,7 +219,7 @@ title: 集合说明 d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

              您可以为您的集合创建别名。一个集合可以有多个别名,但集合不能共享一个别名。收到针对某个 Collection 的请求后,Milvus 会根据提供的名称定位该 Collection。如果所提供名称的 Collection 不存在,Milvus 会继续定位所提供名称的别名。你可以使用 Collections 别名来调整代码以适应不同的情况。

              +

              您可以为您的集合创建别名。一个集合可以有多个别名,但集合不能共享一个别名。收到针对某个 Collection 的请求后,Milvus 会根据所提供的名称定位该 Collection。如果所提供名称的 Collection 不存在,Milvus 会继续定位所提供名称的别名。你可以使用 Collections 别名来调整代码以适应不同的情况。

              更多详情,请参阅管理别名

              函数

              分布式数据库系统通常使用一致性级别来定义跨数据节点和副本的数据相同性。在创建 Collections 或在 Collections 中进行相似性搜索时,可以分别设置不同的一致性级别。适用的一致性级别包括有限制的不稳定性会话最终

              +

              分布式数据库系统通常使用一致性级别来定义跨数据节点和副本的数据相同性。在创建 Collections 或在 Collections 中进行相似性搜索时,可以分别设置不同的一致性级别。适用的一致性级别有强有限制的不稳定性会话最终

              有关这些一致性级别的详细信息,请参阅一致性级别。

              限制

              Milvus 使用全局图形内存池分配 GPU 内存。

              -

              它支持Milvus 配置文件中的两个参数initMemSizemaxMemSize 。显存池大小初始设置为initMemSize ,超过此限制后将自动扩展至maxMemSize

              +

              它支持Milvus 配置文件中的两个参数initMemSizemaxMemSize 。内存池大小初始设置为initMemSize ,超过此限制后将自动扩展至maxMemSize

              Milvus 启动时,默认initMemSize 为可用 GPU 内存的 1/2,默认maxMemSize 等于所有可用 GPU 内存。

              在 Milvus 2.4.1(包括 2.4.1 版)之前,Milvus 使用统一的 GPU 内存池。对于 2.4.1 之前的版本(包括 2.4.1 版),建议将这两个值都设为 0。

              gpu:
              @@ -91,7 +91,7 @@ title: 使用 GPU 建立索引
               
            • IVF_PQ:提供更高的质量,但构建时间较慢。

            • NN_DESCENT:提供更快的生成速度,但可能会降低召回率。

          7. -
          8. cache_dataset_on_device(字符串"true"|"false"):决定是否在 GPU 内存中缓存原始数据集。将其设置为"true "可通过完善搜索结果提高召回率,而将其设置为"false "则可节省 GPU 内存。

          9. +
          10. cache_dataset_on_device(字符串"true"|"false"):决定是否在 GPU 内存中缓存原始数据集。将其设置为"true "可通过细化搜索结果提高召回率,而将其设置为"false "则可节省 GPU 内存。

          11. GPU_IVF_FLATGPU_IVF_PQ索引

            index_params = {
            @@ -163,7 +163,7 @@ collection.create_index(
             
          12. itopk_size:决定搜索过程中保留的中间结果的大小。较大的值可能会提高召回率,但会降低搜索性能。它至少应等于最终的 top-k(极限)值,通常是 2 的幂次(如 16、32、64、128)。

          13. search_width:指定搜索过程中进入 CAGRA 图的入口点数量。增加该值可以提高召回率,但可能会影响搜索性能。

          14. min_iterations/max_ iterations:这些参数控制搜索迭代过程。默认情况下,它们被设置为0,CAGRA 会根据itopk_sizesearch_width 自动确定迭代次数。手动调整这些值有助于平衡性能和准确性。

          15. -
          16. team_size团队规模):指定用于在 GPU 上计算度量距离的 CUDA 线程数。常用值为 2 的幂次,最高为 32(例如 2、4、8、16、32)。它对搜索性能影响不大。默认值为0,Milvus 会根据向量维度自动选择team_size

          17. +
          18. team_size团队规模):指定用于在 GPU 上计算度量距离的 CUDA 线程数。常用值为 2 的幂次,最高为 32(如 2、4、8、16、32)。它对搜索性能影响不大。默认值为0,Milvus 会根据向量维度自动选择team_size

          19. GPU_IVF_FLATGPU_IVF_PQ索引

            search_params = {
            @@ -201,7 +201,7 @@ collection.search(
                   
                 

            使用 GPU 索引时,请注意某些限制:

              -
            • 对于GPU_IVF_FLAT限制的最大值为 256。

            • +
            • 对于GPU_IVF_FLAT限制的最大值为 1024。

            • 对于GPU_IVF_PQGPU_CAGRAlimit的最大值为 1024。

            • 虽然GPU_BRUTE_FORCE 没有设定限制,但建议不要超过 4096,以避免潜在的性能问题。

            • 目前,GPU 索引不支持 COSINE 距离。如果需要使用 COSINE 距离,应首先对数据进行归一化处理,然后使用内积(IP)距离作为替代。

            • diff --git a/localization/v2.5.x/site/zh/userGuide/schema/analyzer/analyzer-overview.md b/localization/v2.5.x/site/zh/userGuide/schema/analyzer/analyzer-overview.md index 35f8b0780..42291efe3 100644 --- a/localization/v2.5.x/site/zh/userGuide/schema/analyzer/analyzer-overview.md +++ b/localization/v2.5.x/site/zh/userGuide/schema/analyzer/analyzer-overview.md @@ -20,12 +20,12 @@ summary: >- >

              在文本处理中,分析器是将原始文本转换为结构化可搜索格式的关键组件。每个分析器通常由两个核心部件组成:标记器过滤器。它们共同将输入文本转换为标记,完善这些标记,并为高效索引和检索做好准备。

              -

              在 Milvus 中,创建 Collections 时,将VARCHAR 字段添加到 Collections Schema 时,会对分析器进行配置。分析器生成的标记可用于建立关键字匹配索引,或转换为稀疏嵌入以进行全文检索。有关详细信息,请参阅关键字匹配全文搜索

              +

              在 Milvus 中,创建 Collections 时,将VARCHAR 字段添加到 Collections Schema 时,会对分析器进行配置。分析器生成的标记可用于建立文本匹配索引,或转换为稀疏嵌入以进行全文检索。更多信息,请参阅文本匹配全文搜索

              使用分析器可能会影响性能。

                -
              • 全文搜索:对于全文搜索,数据节点和查询节点通道消耗数据的速度更慢,因为它们必须等待标记化完成。因此,新输入的数据需要更长的时间才能用于搜索。

              • -
              • 关键词匹配:对于关键字匹配,索引创建速度也较慢,因为标记化需要在索引建立之前完成。

              • +
              • 全文搜索:对于全文搜索,数据节点和查询节点通道消耗数据的速度更慢,因为它们必须等待标记化完成。因此,新输入的数据需要更长时间才能用于搜索。

              • +
              • 文本匹配:对于文本匹配,索引创建速度也较慢,因为标记化需要在建立索引之前完成。

              分析器剖析

              Milvus 的分析器由一个标记化器零个或多个过滤器组成。

              • 标记化器:标记器将输入文本分解为称为标记的离散单元。根据标记符类型的不同,这些标记符可以是单词或短语。

              • -
              • 过滤器:可以对标记符进行过滤,进一步细化标记符,例如,将标记符变成小写或删除常用词。

              • +
              • 过滤器:可以对标记符进行过滤,以进一步细化标记符,例如,将标记符变成小写或删除常用词。

              下面的工作流程显示了分析器是如何处理文本的。

              analyzer-overview

              @@ -67,7 +67,7 @@ summary: >-

              Milvus 提供两种类型的分析器,以满足不同的文本处理需求。

                -
              • 内置分析器:这些是预定义的配置,只需最少的设置即可完成常见的文本处理任务。内置分析器不需要复杂的配置,是通用搜索的理想选择。

              • +
              • 内置分析器:这些是预定义配置,只需最少的设置即可完成常见的文本处理任务。内置分析器不需要复杂的配置,是通用搜索的理想选择。

              • 自定义分析器:对于更高级的需求,自定义分析器允许你通过指定标记器和零个或多个过滤器来定义自己的配置。这种自定义级别对于需要精确控制文本处理的特殊用例尤其有用。

              @@ -100,7 +100,7 @@ summary: >-
            • english:针对英语文本进行了优化,支持英语停止词。

            • chinese:专门用于处理中文文本,包括针对中文语言结构的标记化。

            -

            自定义分析器

            对于更高级的文本处理,Milvus 中的自定义分析器允许您通过指定标记符号化器和过滤器来建立一个定制的文本处理管道。这种设置非常适合需要精确控制的特殊用例。

            +

            自定义分析器

            对于更高级的文本处理,Milvus 中的自定义分析器允许您通过指定标记化器和过滤器来构建定制的文本处理管道。这种设置非常适合需要精确控制的特殊用例。

            标记器

            标记化器是自定义分析器的必备组件,它通过将输入文本分解为离散单元或标记来启动分析器管道。标记化遵循特定的规则,例如根据标记化器的类型用空白或标点符号分割。这一过程可以更精确、更独立地处理每个单词或短语。

            例如,标记化器会将文本"Vector Database Built for Scale" 转换为单独的标记。

            ["Vector", "Database", "Built", "for", "Scale"]​
            diff --git a/localization/v2.5.x/site/zh/userGuide/schema/sparse_vector.json b/localization/v2.5.x/site/zh/userGuide/schema/sparse_vector.json
            index 86a3c9a62..c7ef266d7 100644
            --- a/localization/v2.5.x/site/zh/userGuide/schema/sparse_vector.json
            +++ b/localization/v2.5.x/site/zh/userGuide/schema/sparse_vector.json
            @@ -1 +1 @@
            -{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n    auto_id=True,​\n    enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n        .uri(\"http://localhost:19530\")​\n        .build());​\n        ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n        .fieldName(\"pk\")​\n        .dataType(DataType.VarChar)​\n        .isPrimaryKey(true)​\n        .autoID(true)​\n        .maxLength(100)​\n        .build());​\n​\nschema.addField(AddFieldReq.builder()​\n        .fieldName(\"sparse_vector\")​\n        .dataType(DataType.SparseFloatVector)​\n        .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n  {​\n    name: \"metadata\",​\n    data_type: DataType.JSON,​\n  },​\n  {​\n    name: \"pk\",​\n    data_type: DataType.Int64,​\n    is_primary_key: true,​\n  },​\n  {​\n    name: \"sparse_vector\",​\n    data_type: DataType.SparseFloatVector,​\n  }​\n];​\n​\n\n","export primaryField='{​\n    \"fieldName\": \"pk\",​\n    \"dataType\": \"VarChar\",​\n    \"isPrimary\": true,​\n    \"elementTypeParams\": {​\n        \"max_length\": 100​\n    }​\n}'​\n​\nexport vectorField='{​\n    \"fieldName\": \"sparse_vector\",​\n    \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n    \\\"autoID\\\": true,​\n    \\\"fields\\\": [​\n        $primaryField,​\n        $vectorField​\n    ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n    field_name=\"sparse_vector\",​\n    index_name=\"sparse_inverted_index\",​\n    index_type=\"SPARSE_INVERTED_INDEX\",​\n    metric_type=\"IP\",​\n    params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n        .fieldName(\"sparse_vector\")​\n        .indexName(\"sparse_inverted_index\")​\n        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n        .metricType(IndexParam.MetricType.IP)​\n        .extraParams(extraParams)​\n        .build());​\n\n","const indexParams = await client.createIndex({​\n    index_name: 'sparse_inverted_index',​\n    field_name: 'sparse_vector',​\n    metric_type: MetricType.IP,​\n    index_type: IndexType.SPARSE_WAND,​\n    params: {​\n      drop_ratio_build: 0.2,​\n    },​\n});​\n\n","export indexParams='[​\n        {​\n            \"fieldName\": \"sparse_vector\",​\n            \"metricType\": \"IP\",​\n            \"indexName\": \"sparse_inverted_index\",​\n            \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n            \"params\":{\"drop_ratio_build\": 0.2}​\n        }​\n    ]'​\n\n","client.create_collection(​\n    collection_name=\"my_sparse_collection\",​\n    schema=schema,​\n    index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n        .uri(\"http://localhost:19530\")​\n        .build());​\n        ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n        .collectionName(\"my_sparse_collection\")​\n        .collectionSchema(schema)​\n        .indexParams(indexes)​\n        .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n    address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n    collection_name: 'my_sparse_collection',​\n    schema: schema,​\n    index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n    \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n    \\\"schema\\\": $schema,​\n    \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n    {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n    {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n    collection_name=\"my_sparse_collection\",​\n    data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n    JsonObject row = new JsonObject();​\n    SortedMap sparse = new TreeMap<>();​\n    sparse.put(1L, 0.5f);​\n    sparse.put(100L, 0.3f);​\n    sparse.put(500L, 0.8f);​\n    row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n    rows.add(row);​\n}​\n{​\n    JsonObject row = new JsonObject();​\n    SortedMap sparse = new TreeMap<>();​\n    sparse.put(10L, 0.1f);​\n    sparse.put(200L, 0.7f);​\n    sparse.put(1000L, 0.9f);​\n    row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n    rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n        .collectionName(\"my_sparse_collection\")​\n        .data(rows)​\n        .build());​\n\n","const data = [​\n  { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n  { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n  collection_name: \"my_sparse_collection\",​\n  data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n    \"data\": [​\n        {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n        {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}}        ​\n    ],​\n    \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n    \"params\": {\"drop_ratio_search\": 0.2},  # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n    collection_name=\"my_sparse_collection\",​\n    data=query_vector,​\n    limit=3,​\n    output_fields=[\"pk\"],​\n    search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n        .collectionName(\"my_sparse_collection\")​\n        .data(Collections.singletonList(queryVector))​\n        .annsField(\"sparse_vector\")​\n        .searchParams(searchParams)​\n        .topK(3)​\n        .outputFields(Collections.singletonList(\"pk\"))​\n        .build());​\n        ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n    collection_name: 'my_sparse_collection',​\n    data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n    limit: 3,​\n    output_fields: ['pk'],​\n    params: {​\n        drop_ratio_search: 0.2​\n    }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n    \"collectionName\": \"my_sparse_collection\",​\n    \"data\": [​\n        {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n    ],​\n    \"annsField\": \"sparse_vector\",​\n    \"limit\": 3,​\n    \"searchParams\":{​\n        \"params\":{\"drop_ratio_search\": 0.2}​\n    },​\n    \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"稀疏向量","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"稀疏向量概述","href":"Overview​","type":2,"isActive":false},{"label":"在 Milvus 中使用稀疏向量","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false}]}
            \ No newline at end of file
            +{"codeList":["from scipy.sparse import csr_matrix​\n​\n# Create a sparse matrix​\nrow = [0, 0, 1, 2, 2, 2]​\ncol = [0, 2, 2, 0, 1, 2]​\ndata = [1, 2, 3, 4, 5, 6]​\nsparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))​\n​\n# Represent sparse vector using the sparse matrix​\nsparse_vector = sparse_matrix.getrow(0)​\n\n","# Represent sparse vector using a dictionary​\nsparse_vector = [{1: 0.5, 100: 0.3, 500: 0.8, 1024: 0.2, 5000: 0.6}]​\n\n","SortedMap sparseVector = new TreeMap<>();​\nsparseVector.put(1L, 0.5f);​\nsparseVector.put(100L, 0.3f);​\nsparseVector.put(500L, 0.8f);​\nsparseVector.put(1024L, 0.2f);​\nsparseVector.put(5000L, 0.6f);​\n\n","# Represent sparse vector using a list of tuples​\nsparse_vector = [[(1, 0.5), (100, 0.3), (500, 0.8), (1024, 0.2), (5000, 0.6)]]​\n\n","from pymilvus import MilvusClient, DataType​\n​\nclient = MilvusClient(uri=\"http://localhost:19530\")​\n​\nclient.drop_collection(collection_name=\"my_sparse_collection\")​\n​\nschema = client.create_schema(​\n    auto_id=True,​\n    enable_dynamic_fields=True,​\n)​\n​\nschema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)​\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nimport io.milvus.v2.common.DataType;​\nimport io.milvus.v2.service.collection.request.AddFieldReq;​\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n        .uri(\"http://localhost:19530\")​\n        .build());​\n        ​\nCreateCollectionReq.CollectionSchema schema = client.createSchema();​\nschema.setEnableDynamicField(true);​\nschema.addField(AddFieldReq.builder()​\n        .fieldName(\"pk\")​\n        .dataType(DataType.VarChar)​\n        .isPrimaryKey(true)​\n        .autoID(true)​\n        .maxLength(100)​\n        .build());​\n​\nschema.addField(AddFieldReq.builder()​\n        .fieldName(\"sparse_vector\")​\n        .dataType(DataType.SparseFloatVector)​\n        .build());​\n\n","import { DataType } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst schema = [​\n  {​\n    name: \"metadata\",​\n    data_type: DataType.JSON,​\n  },​\n  {​\n    name: \"pk\",​\n    data_type: DataType.Int64,​\n    is_primary_key: true,​\n  },​\n  {​\n    name: \"sparse_vector\",​\n    data_type: DataType.SparseFloatVector,​\n  }​\n];​\n​\n\n","export primaryField='{​\n    \"fieldName\": \"pk\",​\n    \"dataType\": \"VarChar\",​\n    \"isPrimary\": true,​\n    \"elementTypeParams\": {​\n        \"max_length\": 100​\n    }​\n}'​\n​\nexport vectorField='{​\n    \"fieldName\": \"sparse_vector\",​\n    \"dataType\": \"SparseFloatVector\"​\n}'​\n​\nexport schema=\"{​\n    \\\"autoID\\\": true,​\n    \\\"fields\\\": [​\n        $primaryField,​\n        $vectorField​\n    ]​\n}\"​\n\n","index_params = client.prepare_index_params()​\n​\nindex_params.add_index(​\n    field_name=\"sparse_vector\",​\n    index_name=\"sparse_inverted_index\",​\n    index_type=\"SPARSE_INVERTED_INDEX\",​\n    metric_type=\"IP\",​\n    params={\"drop_ratio_build\": 0.2},​\n)​\n\n","import io.milvus.v2.common.IndexParam;​\nimport java.util.*;​\n​\nList indexes = new ArrayList<>();​\nMap extraParams = new HashMap<>();​\nextraParams.put(\"drop_ratio_build\", 0.2);​\nindexes.add(IndexParam.builder()​\n        .fieldName(\"sparse_vector\")​\n        .indexName(\"sparse_inverted_index\")​\n        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)​\n        .metricType(IndexParam.MetricType.IP)​\n        .extraParams(extraParams)​\n        .build());​\n\n","const indexParams = await client.createIndex({​\n    index_name: 'sparse_inverted_index',​\n    field_name: 'sparse_vector',​\n    metric_type: MetricType.IP,​\n    index_type: IndexType.SPARSE_WAND,​\n    params: {​\n      drop_ratio_build: 0.2,​\n    },​\n});​\n\n","export indexParams='[​\n        {​\n            \"fieldName\": \"sparse_vector\",​\n            \"metricType\": \"IP\",​\n            \"indexName\": \"sparse_inverted_index\",​\n            \"indexType\": \"SPARSE_INVERTED_INDEX\",​\n            \"params\":{\"drop_ratio_build\": 0.2}​\n        }​\n    ]'​\n\n","client.create_collection(​\n    collection_name=\"my_sparse_collection\",​\n    schema=schema,​\n    index_params=index_params​\n)​\n\n","import io.milvus.v2.client.ConnectConfig;​\nimport io.milvus.v2.client.MilvusClientV2;​\n​\nMilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()​\n        .uri(\"http://localhost:19530\")​\n        .build());​\n        ​\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()​\n        .collectionName(\"my_sparse_collection\")​\n        .collectionSchema(schema)​\n        .indexParams(indexes)​\n        .build();​\nclient.createCollection(requestCreate);​\n\n","import { MilvusClient } from \"@zilliz/milvus2-sdk-node\";​\n​\nconst client = new MilvusClient({​\n    address: 'http://localhost:19530'​\n});​\n​\nawait client.createCollection({​\n    collection_name: 'my_sparse_collection',​\n    schema: schema,​\n    index_params: indexParams​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d \"{​\n    \\\"collectionName\\\": \\\"my_sparse_collection\\\",​\n    \\\"schema\\\": $schema,​\n    \\\"indexParams\\\": $indexParams​\n}\"​\n\n","sparse_vectors = [​\n    {\"sparse_vector\": {1: 0.5, 100: 0.3, 500: 0.8}},​\n    {\"sparse_vector\": {10: 0.1, 200: 0.7, 1000: 0.9}},​\n]​\n​\nclient.insert(​\n    collection_name=\"my_sparse_collection\",​\n    data=sparse_vectors​\n)​\n\n","import com.google.gson.Gson;​\nimport com.google.gson.JsonObject;​\nimport io.milvus.v2.service.vector.request.InsertReq;​\nimport io.milvus.v2.service.vector.response.InsertResp;​\n​\nList rows = new ArrayList<>();​\nGson gson = new Gson();​\n{​\n    JsonObject row = new JsonObject();​\n    SortedMap sparse = new TreeMap<>();​\n    sparse.put(1L, 0.5f);​\n    sparse.put(100L, 0.3f);​\n    sparse.put(500L, 0.8f);​\n    row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n    rows.add(row);​\n}​\n{​\n    JsonObject row = new JsonObject();​\n    SortedMap sparse = new TreeMap<>();​\n    sparse.put(10L, 0.1f);​\n    sparse.put(200L, 0.7f);​\n    sparse.put(1000L, 0.9f);​\n    row.add(\"sparse_vector\", gson.toJsonTree(sparse));​\n    rows.add(row);​\n}​\n​\nInsertResp insertR = client.insert(InsertReq.builder()​\n        .collectionName(\"my_sparse_collection\")​\n        .data(rows)​\n        .build());​\n\n","const data = [​\n  { sparse_vector: { \"1\": 0.5, \"100\": 0.3, \"500\": 0.8 } },​\n  { sparse_vector: { \"10\": 0.1, \"200\": 0.7, \"1000\": 0.9 } },​\n];​\nclient.insert({​\n  collection_name: \"my_sparse_collection\",​\n  data: data,​\n});​\n​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n    \"data\": [​\n        {\"sparse_vector\": {\"1\": 0.5, \"100\": 0.3, \"500\": 0.8}},​\n        {\"sparse_vector\": {\"10\": 0.1, \"200\": 0.7, \"1000\": 0.9}}        ​\n    ],​\n    \"collectionName\": \"my_sparse_collection\"​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":{\"insertCount\":2,\"insertIds\":[\"453577185629572534\",\"453577185629572535\"]}}​\n\n","# Prepare search parameters​\nsearch_params = {​\n    \"params\": {\"drop_ratio_search\": 0.2},  # Additional optional search parameters​\n}​\n​\n# Prepare the query vector​\nquery_vector = [{1: 0.2, 50: 0.4, 1000: 0.7}]​\n\n","res = client.search(​\n    collection_name=\"my_sparse_collection\",​\n    data=query_vector,​\n    limit=3,​\n    output_fields=[\"pk\"],​\n    search_params=search_params,​\n)​\n​\nprint(res)​\n​\n# Output​\n# data: [\"[{'id': '453718927992172266', 'distance': 0.6299999952316284, 'entity': {'pk': '453718927992172266'}}, {'id': '453718927992172265', 'distance': 0.10000000149011612, 'entity': {'pk': '453718927992172265'}}]\"]​\n\n","import io.milvus.v2.service.vector.request.SearchReq;​\nimport io.milvus.v2.service.vector.request.data.SparseFloatVec;​\nimport io.milvus.v2.service.vector.response.SearchResp;​\n​\nMap searchParams = new HashMap<>();​\nsearchParams.put(\"drop_ratio_search\", 0.2);​\n​\nSortedMap sparse = new TreeMap<>();​\nsparse.put(10L, 0.1f);​\nsparse.put(200L, 0.7f);​\nsparse.put(1000L, 0.9f);​\n​\nSparseFloatVec queryVector = new SparseFloatVec(sparse);​\n​\nSearchResp searchR = client.search(SearchReq.builder()​\n        .collectionName(\"my_sparse_collection\")​\n        .data(Collections.singletonList(queryVector))​\n        .annsField(\"sparse_vector\")​\n        .searchParams(searchParams)​\n        .topK(3)​\n        .outputFields(Collections.singletonList(\"pk\"))​\n        .build());​\n        ​\nSystem.out.println(searchR.getSearchResults());​\n​\n// Output​\n//​\n// [[SearchResp.SearchResult(entity={pk=453444327741536759}, score=1.31, id=453444327741536759), SearchResp.SearchResult(entity={pk=453444327741536756}, score=1.31, id=453444327741536756), SearchResp.SearchResult(entity={pk=453444327741536753}, score=1.31, id=453444327741536753)]]​\n\n","client.search({​\n    collection_name: 'my_sparse_collection',​\n    data: {1: 0.2, 50: 0.4, 1000: 0.7},​\n    limit: 3,​\n    output_fields: ['pk'],​\n    params: {​\n        drop_ratio_search: 0.2​\n    }​\n});​\n\n","curl --request POST \\​\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\​\n--header \"Authorization: Bearer ${TOKEN}\" \\​\n--header \"Content-Type: application/json\" \\​\n-d '{​\n    \"collectionName\": \"my_sparse_collection\",​\n    \"data\": [​\n        {\"1\": 0.2, \"50\": 0.4, \"1000\": 0.7}​\n    ],​\n    \"annsField\": \"sparse_vector\",​\n    \"limit\": 3,​\n    \"searchParams\":{​\n        \"params\":{\"drop_ratio_search\": 0.2}​\n    },​\n    \"outputFields\": [\"pk\"]​\n}'​\n​\n## {\"code\":0,\"cost\":0,\"data\":[{\"distance\":0.63,\"id\":\"453577185629572535\",\"pk\":\"453577185629572535\"},{\"distance\":0.1,\"id\":\"453577185629572534\",\"pk\":\"453577185629572534\"}]}​\n\n"],"headingContent":"Sparse Vector​","anchorList":[{"label":"稀疏向量","href":"Sparse-Vector​","type":1,"isActive":false},{"label":"稀疏向量概述","href":"Overview​","type":2,"isActive":false},{"label":"在 Milvus 中使用稀疏向量","href":"Use-sparse-vectors-in-Milvus​","type":2,"isActive":false},{"label":"限制","href":"Limits","type":2,"isActive":false},{"label":"常见问题","href":"FAQ","type":2,"isActive":false}]}
            \ No newline at end of file
            diff --git a/localization/v2.5.x/site/zh/userGuide/schema/sparse_vector.md b/localization/v2.5.x/site/zh/userGuide/schema/sparse_vector.md
            index 717995523..6ea64098a 100644
            --- a/localization/v2.5.x/site/zh/userGuide/schema/sparse_vector.md
            +++ b/localization/v2.5.x/site/zh/userGuide/schema/sparse_vector.md
            @@ -35,7 +35,7 @@ summary: >-
                       d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
                     >
                   
            -    

            稀疏向量是高维向量的一种特殊表示形式,其中大部分元素为零,只有少数维度具有非零值。这一特性使得稀疏向量在处理大规模、高维但稀疏的数据时特别有效。常见的应用包括

            +

            稀疏向量是高维向量的一种特殊表示形式,其中大部分元素为零,只有少数维度的值不为零。这一特性使得稀疏向量在处理大规模、高维但稀疏的数据时特别有效。常见的应用包括

            • 文本分析:将文档表示为词袋向量,其中每个维度对应一个单词,只有在文档中出现的单词才有非零值。

            • 推荐系统:用户-物品交互矩阵,其中每个维度代表用户对特定物品的评分,大多数用户只与少数物品交互。

            • @@ -48,7 +48,7 @@ summary: >- 稀疏向量表示法

              稀疏向量可以使用多种方法生成,例如文本处理中的TF-IDF(词频-反向文档频率)和BM25。此外,Milvus 还提供了帮助生成和处理稀疏向量的便捷方法。详情请参阅Embeddings

              对于文本数据,Milvus 还提供全文搜索功能,让您可以直接在原始文本数据上执行向量搜索,而无需使用外部嵌入模型来生成稀疏向量。更多信息,请参阅全文搜索

              -

              矢量化后,数据可存储在 Milvus 中,以便进行管理和矢量检索。下图说明了基本流程。

              +

              矢量化后,数据可存储在 Milvus 中进行管理和矢量检索。下图说明了基本流程。

              Use sparse vector in Milvus @@ -259,7 +259,7 @@ indexes.add(SPARSE_INVERTED_INDEX 类型的索引。对于稀疏向量,可以指定SPARSE_INVERTED_INDEXSPARSE_WAND 。有关详情,请参阅稀疏向量索引

            • 对于稀疏向量,metric_type 只支持IP (内积),用于衡量两个稀疏向量之间的相似性。有关相似性的更多信息,请参阅 "度量类型"。

            • -
            • drop_ratio_build 是一个可选的索引参数,专门用于稀疏向量。它可以控制在建立索引时排除小向量值的比例。例如,使用 时,最小的 20% 向量值将在索引创建过程中被排除,从而减少搜索过程中的计算量。{"drop_ratio_build": 0.2}

            • +
            • drop_ratio_build 是一个可选的索引参数,专门用于稀疏向量。它可以控制索引建立过程中排除小向量值的比例。例如,使用 时,最小的 20% 向量值将在索引创建过程中被排除,从而减少搜索过程中的计算量。{"drop_ratio_build": 0.2}

            创建 Collections

            完成稀疏向量和索引设置后,就可以创建包含稀疏向量的 Collections。下面的示例使用 create_collection方法创建一个名为my_sparse_collection 的 Collection。

            @@ -474,3 +474,60 @@ sparse.put(1000L,

            有关相似性搜索参数的更多信息,请参阅基本 ANN 搜索

            +

            限制

            在 Milvus 中使用稀疏向量时,请考虑以下限制:

            +
              +
            • 目前,稀疏向量只支持IP距离度量。稀疏向量的高维度使得 L2 和余弦距离不切实际。

            • +
            • 对于稀疏向量场,只支持SPARSE_INVERTED_INDEXSPARSE_WAND索引类型。

            • +
            • 稀疏向量支持的数据类型:

              +
                +
              • 维数部分必须是无符号 32 位整数;
              • +
              • 值部分可以是非负 32 位浮点数。
              • +
            • +
            • 稀疏向量在插入和搜索时必须满足以下要求:

              +
                +
              • 向量中至少有一个值为非零;
              • +
              • 向量索引为非负。
              • +
            • +
            +

            常见问题

              +
            • 能否解释 SPARSE_INVERTED_INDEX 和 SPARSE_WAND 之间的区别,以及如何在两者之间进行选择?

              +

              SPARSE_INVERTED_INDEX是一种传统的倒排索引,而SPARSE_WAND则使用弱-AND算法来减少搜索过程中全 IP 距离评估的次数。SPARSE_WAND通常速度更快,但其性能会随着向量密度的增加而下降。要在它们之间做出选择,请根据您的特定数据集和使用案例进行实验和基准测试。

            • +
            • 如何选择 drop_ratio_build 和 drop_ratio_search 参数?

              +

              drop_ratio_builddrop_ratio_search的选择取决于数据的特性以及对搜索延迟/吞吐量和准确性的要求。

            • +
            • 稀疏嵌入的维度可以是 uint32 空间内的任何离散值吗?

              +

              可以,但有一个例外。稀疏嵌入的维度可以是[0, maximum of uint32) 范围内的任何值。这意味着不能使用 uint32 的最大值。

            • +
            • 是通过索引还是蛮力来搜索不断增长的数据段?

              +

              对增长的数据段的搜索是通过与密封数据段索引相同类型的索引进行的。对于索引建立前的新增长区段,则使用蛮力搜索。

            • +
            • 是否可以在一个 Collections 中同时包含稀疏向量和密集向量?

              +

              可以,通过多向量类型支持,您可以创建既有稀疏向量列又有密集向量列的 Collections,并对它们执行混合搜索。

            • +
            diff --git a/localization/v2.5.x/site/zh/userGuide/search-query-get/boolean.md b/localization/v2.5.x/site/zh/userGuide/search-query-get/boolean.md index d871d3621..9e3695d22 100644 --- a/localization/v2.5.x/site/zh/userGuide/search-query-get/boolean.md +++ b/localization/v2.5.x/site/zh/userGuide/search-query-get/boolean.md @@ -697,9 +697,9 @@ curl --request POST \​

            Match operators​

            Match operators include:​

            • like: Match constants or prefixes (prefix%), infixes (%infix%), and suffixes (%suffix) within constants. It relies on a brute-force search mechanism using wildcards and does not involve text tokenization. While it can achieve exact matches, its query efficiency is relatively low, making it suitable for simple matching tasks or queries on smaller datasets.​

            • -
            • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​ -

              -

              To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to ​Keyword Match.​

              +
            • TEXT_MATCH: Match specific terms or keywords on VARCHAR fields, using tokenization and inverted index to enable efficient text search. Compared to like, TEXT_MATCH offers more advanced text tokenization and filtering capabilities. It is suited for large-scale datasets where higher query performance is required for complex text search scenarios.​

              +

              +

              To use the TEXT_MATCH filter expression, you must enable text matching for the target VARCHAR field when creating the collection. For details, refer to Text Match.​

            Example 1: Apply filter on scalar field​

            The following example demonstrates how to filter products whose color is red. In this case, you can quickly filter all red products by matching the prefix 'red%’. Similarly, you can use the expression color in ['red_7025’, 'red_4794’, ‘red_9392’] to filter all red products. However, when the data is more complex, we recommend using the like operator for more efficient filtering.​

            @@ -854,8 +854,8 @@ curl --request POST \​ ]​
            -

            Example 3: Keyword match on VARCHAR fields​

            The TEXT_MATCH expression is used for keyword match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to ​Keyword Match.​

            -

            The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the keyword "Apple" or "iPhone":​

            +

            Example 3: Text match on VARCHAR fields​

            The TEXT_MATCH expression is used for text match on VARCHAR fields. By default, it applies an OR logic, but you can combine it with other logical operators to create more complex query conditions. For details, refer to Text Match.​

            +

            The following example demonstrates how to use the TEXT_MATCH expression to filter products where the description field contains either the term "Apple" or "iPhone":​

            Python Java diff --git a/localization/v2.5.x/site/zh/userGuide/search-query-get/full-text-search.json b/localization/v2.5.x/site/zh/userGuide/search-query-get/full-text-search.json index 11c810d55..5064957ed 100644 --- a/localization/v2.5.x/site/zh/userGuide/search-query-get/full-text-search.json +++ b/localization/v2.5.x/site/zh/userGuide/search-query-get/full-text-search.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","MilvusClient.insert('demo', [​\n {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​\n {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​\n {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​\n])​\n\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['Who started AI research?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"全文搜索","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"概述","href":"Overview​","type":2,"isActive":false},{"label":"创建用于全文搜索的 Collections","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"插入文本数据","href":"Insert-text-data","type":2,"isActive":false},{"label":"执行全文搜索","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType, Function, FunctionType​\n​\nschema = MilvusClient.create_schema()​\n​\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True, auto_id=True)​\nschema.add_field(field_name=\"text\", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​\nschema.add_field(field_name=\"sparse\", datatype=DataType.SPARSE_FLOAT_VECTOR)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .build();\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .build());\nschema.addField(AddFieldReq.builder()\n .fieldName(\"sparse\")\n .dataType(DataType.SparseFloatVector)\n .build());\n","import { MilvusClient, DataType } from \"@zilliz/milvus2-sdk-node\";\n\nconst address = \"http://localhost:19530\";\nconst token = \"root:Milvus\";\nconst client = new MilvusClient({address, token});\nconst schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n\nconsole.log(res.results)\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n","bm25_function = Function(​\n name=\"text_bm25_emb\", # Function name​\n input_field_names=[\"text\"], # Name of the VARCHAR field containing raw text data​\n output_field_names=[\"sparse\"], # Name of the SPARSE_FLOAT_VECTOR field reserved to store generated embeddings​\n function_type=FunctionType.BM25,​\n)​\n​\nschema.add_function(bm25_function)​\n\n","import io.milvus.common.clientenum.FunctionType;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq.Function;\n\nimport java.util.*;\n\nschema.addFunction(Function.builder()\n .functionType(FunctionType.BM25)\n .name(\"text_bm25_emb\")\n .inputFieldNames(Collections.singletonList(\"text\"))\n .outputFieldNames(Collections.singletonList(\"vector\"))\n .build());\n","const functions = [\n {\n name: 'text_bm25_emb',\n description: 'bm25 function',\n type: FunctionType.BM25,\n input_field_names: ['text'],\n output_field_names: ['vector'],\n params: {},\n },\n];\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ],\n \"functions\": [\n {\n \"name\": \"text_bm25_emb\",\n \"type\": \"BM25\",\n \"inputFieldNames\": [\"text\"],\n \"outputFieldNames\": [\"sparse\"],\n \"params\": {}\n }\n ]\n }'\n","index_params = MilvusClient.prepare_index_params()​\n​\nindex_params.add_index(​\n field_name=\"sparse\",​\n index_type=\"AUTOINDEX\", ​\n metric_type=\"BM25\"​\n)​\n\n","import io.milvus.v2.common.IndexParam;\n\nList indexes = new ArrayList<>();\nindexes.add(IndexParam.builder()\n .fieldName(\"sparse\")\n .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)\n .metricType(IndexParam.MetricType.BM25)\n .build());\n","const index_params = [\n {\n fieldName: \"sparse\",\n metricType: \"BM25\",\n indexType: \"AUTOINDEX\",\n },\n];\n","export indexParams='[\n {\n \"fieldName\": \"sparse\",\n \"metricType\": \"BM25\",\n \"indexType\": \"AUTOINDEX\"\n }\n ]'\n","MilvusClient.create_collection(​\n collection_name='demo', ​\n schema=schema, ​\n index_params=index_params​\n)​\n\n","import io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq requestCreate = CreateCollectionReq.builder()\n .collectionName(\"demo\")\n .collectionSchema(schema)\n .indexParams(indexes)\n .build();\nclient.createCollection(requestCreate);\n","await client.create_collection(\n collection_name: 'demo', \n schema: schema, \n index_params: index_params\n);\n","export CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/collections/create\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d \"{\n \\\"collectionName\\\": \\\"demo\\\",\n \\\"schema\\\": $schema,\n \\\"indexParams\\\": $indexParams\n}\"\n","client.insert('demo', [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n])\n\n","import com.google.gson.Gson;\nimport com.google.gson.JsonObject;\n\nimport io.milvus.v2.service.vector.request.InsertReq;\n\nGson gson = new Gson();\nList rows = Arrays.asList(\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval is a field of study.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"information retrieval focuses on finding relevant information in large datasets.\\\"}\", JsonObject.class),\n gson.fromJson(\"{\\\"text\\\": \\\"data mining and information retrieval overlap in research.\\\"}\", JsonObject.class)\n);\n\nclient.insert(InsertReq.builder()\n .collectionName(\"demo\")\n .data(rows)\n .build());\n","await client.insert({\ncollection_name: 'demo', \ndata: [\n {'text': 'information retrieval is a field of study.'},\n {'text': 'information retrieval focuses on finding relevant information in large datasets.'},\n {'text': 'data mining and information retrieval overlap in research.'},\n]);\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"data\": [\n {\"text\": \"information retrieval is a field of study.\"},\n {\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"},\n {\"text\": \"data mining and information retrieval overlap in research.\"} \n ],\n \"collectionName\": \"demo\"\n}'\n","search_params = {​\n 'params': {'drop_ratio_search': 0.6},​\n}​\n​\nMilvusClient.search(​\n collection_name='demo', ​\n data=['whats the focus of information retrieval?'],​\n anns_field='sparse',​\n limit=3,​\n search_params=search_params​\n)​\n\n","import io.milvus.v2.service.vector.request.SearchReq;\nimport io.milvus.v2.service.vector.request.data.EmbeddedText;\nimport io.milvus.v2.service.vector.response.SearchResp;\n\nMap searchParams = new HashMap<>();\nsearchParams.put(\"drop_ratio_search\", 0.6);\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"demo\")\n .data(Collections.singletonList(new EmbeddedText(\"whats the focus of information retrieval?\")))\n .annsField(\"sparse\")\n .topK(3)\n .searchParams(searchParams)\n .outputFields(Collections.singletonList(\"text\"))\n .build());\n","await client.search(\n collection_name: 'demo', \n data: ['whats the focus of information retrieval?'],\n anns_field: 'sparse',\n limit: 3,\n params: {'drop_ratio_search': 0.6},\n)\n","curl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"demo\",\n \"data\": [\n \"whats the focus of information retrieval?\"\n ],\n \"annsField\": \"sparse\",\n \"limit\": 3,\n \"outputFields\": [\n \"text\"\n ],\n \"searchParams\":{\n \"params\":{\n \"drop_ratio_search\":0.6\n }\n }\n}'\n"],"headingContent":"Full Text Search​","anchorList":[{"label":"全文搜索","href":"Full-Text-Search​","type":1,"isActive":false},{"label":"概述","href":"Overview​","type":2,"isActive":false},{"label":"创建用于全文搜索的 Collections","href":"Create-a-collection-for-full-text-search​","type":2,"isActive":false},{"label":"插入文本数据","href":"Insert-text-data","type":2,"isActive":false},{"label":"执行全文搜索","href":"Perform-full-text-search","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/zh/userGuide/search-query-get/full-text-search.md b/localization/v2.5.x/site/zh/userGuide/search-query-get/full-text-search.md index 4e182495b..8e01b78c0 100644 --- a/localization/v2.5.x/site/zh/userGuide/search-query-get/full-text-search.md +++ b/localization/v2.5.x/site/zh/userGuide/search-query-get/full-text-search.md @@ -53,7 +53,7 @@ summary: 全文搜索是一种在文本数据集中检索包含特定术语或 全文搜索

            要使用全文搜索,请遵循以下主要步骤。

              -
            1. 创建 Collections:设置带有必要字段的 Collections,并定义一个将原始文本转换为稀疏嵌入的函数。

            2. +
            3. 创建 Collections:用必要的字段设置一个 Collections,并定义一个函数将原始文本转换为稀疏嵌入。

            4. 插入数据:将原始文本文档插入 Collections。

            5. 执行搜索:使用查询文本搜索你的 Collections 并检索相关结果。

            @@ -79,6 +79,8 @@ summary: 全文搜索是一种在文本数据集中检索包含特定术语或
          20. 一个SPARSE_FLOAT_VECTOR 字段,预留用于存储稀疏嵌入,Milvus 将为VARCHAR 字段自动生成稀疏嵌入。

          21. 定义 Collections 模式

            首先,创建 Schema 并添加必要的字段。

            +
            from pymilvus import MilvusClient, DataType, Function, FunctionType​
             ​
             schema = MilvusClient.create_schema()​
            @@ -87,6 +89,80 @@ schema.add_field(field_name="id", dat
             schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=1000, enable_analyzer=True)​
             schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)​
             
            +
            +
            import io.milvus.v2.common.DataType;
            +import io.milvus.v2.service.collection.request.AddFieldReq;
            +import io.milvus.v2.service.collection.request.CreateCollectionReq;
            +
            +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
            +        .build();
            +schema.addField(AddFieldReq.builder()
            +        .fieldName("id")
            +        .dataType(DataType.Int64)
            +        .isPrimaryKey(true)
            +        .autoID(true)
            +        .build());
            +schema.addField(AddFieldReq.builder()
            +        .fieldName("text")
            +        .dataType(DataType.VarChar)
            +        .maxLength(1000)
            +        .enableAnalyzer(true)
            +        .build());
            +schema.addField(AddFieldReq.builder()
            +        .fieldName("sparse")
            +        .dataType(DataType.SparseFloatVector)
            +        .build());
            +
            +
            import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
            +
            +const address = "http://localhost:19530";
            +const token = "root:Milvus";
            +const client = new MilvusClient({address, token});
            +const schema = [
            +  {
            +    name: "id",
            +    data_type: DataType.Int64,
            +    is_primary_key: true,
            +  },
            +  {
            +    name: "text",
            +    data_type: "VarChar",
            +    enable_analyzer: true,
            +    enable_match: true,
            +    max_length: 1000,
            +  },
            +  {
            +    name: "sparse",
            +    data_type: DataType.SparseFloatVector,
            +  },
            +];
            +
            +
            +console.log(res.results)
            +
            +
            export schema='{
            +        "autoId": true,
            +        "enabledDynamicField": false,
            +        "fields": [
            +            {
            +                "fieldName": "id",
            +                "dataType": "Int64",
            +                "isPrimary": true
            +            },
            +            {
            +                "fieldName": "text",
            +                "dataType": "VarChar",
            +                "elementTypeParams": {
            +                    "max_length": 1000,
            +                    "enable_analyzer": true
            +                }
            +            },
            +            {
            +                "fieldName": "sparse",
            +                "dataType": "SparseFloatVector"
            +            }
            +        ]
            +    }'
             

            在此配置中

              @@ -95,6 +171,8 @@ schema.add_field(field_name="sparse",
            • sparse矢量字段:保留一个矢量字段,用于存储内部生成的稀疏嵌入,以进行全文搜索操作。数据类型必须是SPARSE_FLOAT_VECTOR

            现在,定义一个将文本转换为稀疏向量表示的函数,然后将其添加到 Schema 中。

            +
            bm25_function = Function(​
                 name="text_bm25_emb", # Function name​
                 input_field_names=["text"], # Name of the VARCHAR field containing raw text data​
            @@ -104,6 +182,62 @@ schema.add_field(field_name="sparse",
             ​
             schema.add_function(bm25_function)​
             
            +
            +
            import io.milvus.common.clientenum.FunctionType;
            +import io.milvus.v2.service.collection.request.CreateCollectionReq.Function;
            +
            +import java.util.*;
            +
            +schema.addFunction(Function.builder()
            +        .functionType(FunctionType.BM25)
            +        .name("text_bm25_emb")
            +        .inputFieldNames(Collections.singletonList("text"))
            +        .outputFieldNames(Collections.singletonList("vector"))
            +        .build());
            +
            +
            const functions = [
            +    {
            +      name: 'text_bm25_emb',
            +      description: 'bm25 function',
            +      type: FunctionType.BM25,
            +      input_field_names: ['text'],
            +      output_field_names: ['vector'],
            +      params: {},
            +    },
            +];
            +
            +
            export schema='{
            +        "autoId": true,
            +        "enabledDynamicField": false,
            +        "fields": [
            +            {
            +                "fieldName": "id",
            +                "dataType": "Int64",
            +                "isPrimary": true
            +            },
            +            {
            +                "fieldName": "text",
            +                "dataType": "VarChar",
            +                "elementTypeParams": {
            +                    "max_length": 1000,
            +                    "enable_analyzer": true
            +                }
            +            },
            +            {
            +                "fieldName": "sparse",
            +                "dataType": "SparseFloatVector"
            +            }
            +        ],
            +        "functions": [
            +            {
            +                "name": "text_bm25_emb",
            +                "type": "BM25",
            +                "inputFieldNames": ["text"],
            +                "outputFieldNames": ["sparse"],
            +                "params": {}
            +            }
            +        ]
            +    }'
             

            参数

            说明

            @@ -121,6 +255,8 @@ schema.add_function(bm25_function)​

            对于有多个VARCHAR 字段需要进行文本到稀疏向量转换的 Collections,请在 Collections Schema 中添加单独的函数,确保每个函数都有唯一的名称和output_field_names 值。

            配置索引

            在定义了包含必要字段和内置函数的 Schema 后,请为 Collections 设置索引。为简化这一过程,请使用AUTOINDEX 作为index_type ,该选项允许 Milvus 根据数据结构选择和配置最合适的索引类型。

            +
            index_params = MilvusClient.prepare_index_params()​
             ​
             index_params.add_index(​
            @@ -129,23 +265,78 @@ index_params.add_index(​
                 metric_type="BM25"​
             )​
             
            +
            +
            import io.milvus.v2.common.IndexParam;
            +
            +List<IndexParam> indexes = new ArrayList<>();
            +indexes.add(IndexParam.builder()
            +        .fieldName("sparse")
            +        .indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
            +        .metricType(IndexParam.MetricType.BM25)
            +        .build());
            +
            +
            const index_params = [
            +  {
            +    fieldName: "sparse",
            +    metricType: "BM25",
            +    indexType: "AUTOINDEX",
            +  },
            +];
            +
            +
            export indexParams='[
            +        {
            +            "fieldName": "sparse",
            +            "metricType": "BM25",
            +            "indexType": "AUTOINDEX"
            +        }
            +    ]'
             

            参数

            说明

            field_name

            -

            要索引的向量字段的名称。对于全文检索,这应该是存储生成的稀疏向量的字段。在本例中,将值设为sparse

            +

            要索引的向量字段的名称。对于全文搜索,这应该是存储生成的稀疏向量的字段。在本例中,将值设为sparse

            index_type

            要创建的索引类型。AUTOINDEX 允许 Milvus 自动优化索引设置。如果需要对索引设置进行更多控制,可以从 Milvus 中稀疏向量可用的各种索引类型中进行选择。更多信息,请参阅Milvus 支持的索引

            metric_type

            该参数的值必须设置为BM25 ,专门用于全文搜索功能。

            创建 Collections

            现在使用定义的 Schema 和索引参数创建 Collections。

            +
            MilvusClient.create_collection(​
                 collection_name='demo', ​
                 schema=schema, ​
                 index_params=index_params​
             )​
             
            +
            +
            import io.milvus.v2.service.collection.request.CreateCollectionReq;
            +
            +CreateCollectionReq requestCreate = CreateCollectionReq.builder()
            +        .collectionName("demo")
            +        .collectionSchema(schema)
            +        .indexParams(indexes)
            +        .build();
            +client.createCollection(requestCreate);
            +
            +
            await client.create_collection(
            +    collection_name: 'demo', 
            +    schema: schema, 
            +    index_params: index_params
            +);
            +
            +
            export CLUSTER_ENDPOINT="http://localhost:19530"
            +export TOKEN="root:Milvus"
            +
            +curl --request POST \
            +--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
            +--header "Authorization: Bearer ${TOKEN}" \
            +--header "Content-Type: application/json" \
            +-d "{
            +    \"collectionName\": \"demo\",
            +    \"schema\": $schema,
            +    \"indexParams\": $indexParams
            +}"
             

            插入文本数据

            设置好集合和索引后,就可以插入文本数据了。在此过程中,您只需提供原始文本。我们之前定义的内置函数会自动为每个文本条目生成相应的稀疏向量。

            -
            MilvusClient.insert('demo', [​
            -    {'text': 'Artificial intelligence was founded as an academic discipline in 1956.'},​
            -    {'text': 'Alan Turing was the first person to conduct substantial research in AI.'},​
            -    {'text': 'Born in Maida Vale, London, Turing was raised in southern England.'},​
            -])​
            +    

            设置好集合和索引后,就可以插入文本数据了。在此过程中,您只需提供原始文本。我们之前定义的内置函数会为每个文本条目自动生成相应的稀疏向量。

            + +
            client.insert('demo', [
            +    {'text': 'information retrieval is a field of study.'},
            +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
            +    {'text': 'data mining and information retrieval overlap in research.'},
            +])
             
            +
            +
            import com.google.gson.Gson;
            +import com.google.gson.JsonObject;
            +
            +import io.milvus.v2.service.vector.request.InsertReq;
            +
            +Gson gson = new Gson();
            +List<JsonObject> rows = Arrays.asList(
            +        gson.fromJson("{\"text\": \"information retrieval is a field of study.\"}", JsonObject.class),
            +        gson.fromJson("{\"text\": \"information retrieval focuses on finding relevant information in large datasets.\"}", JsonObject.class),
            +        gson.fromJson("{\"text\": \"data mining and information retrieval overlap in research.\"}", JsonObject.class)
            +);
            +
            +client.insert(InsertReq.builder()
            +        .collectionName("demo")
            +        .data(rows)
            +        .build());
            +
            +
            await client.insert({
            +collection_name: 'demo', 
            +data: [
            +    {'text': 'information retrieval is a field of study.'},
            +    {'text': 'information retrieval focuses on finding relevant information in large datasets.'},
            +    {'text': 'data mining and information retrieval overlap in research.'},
            +]);
            +
            +
            curl --request POST \
            +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
            +--header "Authorization: Bearer ${TOKEN}" \
            +--header "Content-Type: application/json" \
            +-d '{
            +    "data": [
            +        {"text": "information retrieval is a field of study."},
            +        {"text": "information retrieval focuses on finding relevant information in large datasets."},
            +        {"text": "data mining and information retrieval overlap in research."}       
            +    ],
            +    "collectionName": "demo"
            +}'
             

            将数据插入 Collections 后,就可以使用原始文本查询执行全文搜索了。Milvus 会自动将你的查询转换成稀疏向量,并使用 BM25 算法对匹配的搜索结果进行排序,然后返回 topK (limit) 结果。

            +

            将数据插入 Collections 后,就可以使用原始文本查询执行全文检索了。Milvus 会自动将你的查询转换成稀疏向量,并使用 BM25 算法对匹配的搜索结果进行排序,然后返回 topK (limit) 结果。

            +
            search_params = {​
                 'params': {'drop_ratio_search': 0.6},​
             }​
             ​
             MilvusClient.search(​
                 collection_name='demo', ​
            -    data=['Who started AI research?'],​
            +    data=['whats the focus of information retrieval?'],​
                 anns_field='sparse',​
                 limit=3,​
                 search_params=search_params​
             )​
             
            +
            +
            import io.milvus.v2.service.vector.request.SearchReq;
            +import io.milvus.v2.service.vector.request.data.EmbeddedText;
            +import io.milvus.v2.service.vector.response.SearchResp;
            +
            +Map<String,Object> searchParams = new HashMap<>();
            +searchParams.put("drop_ratio_search", 0.6);
            +SearchResp searchResp = client.search(SearchReq.builder()
            +        .collectionName("demo")
            +        .data(Collections.singletonList(new EmbeddedText("whats the focus of information retrieval?")))
            +        .annsField("sparse")
            +        .topK(3)
            +        .searchParams(searchParams)
            +        .outputFields(Collections.singletonList("text"))
            +        .build());
            +
            +
            await client.search(
            +    collection_name: 'demo', 
            +    data: ['whats the focus of information retrieval?'],
            +    anns_field: 'sparse',
            +    limit: 3,
            +    params: {'drop_ratio_search': 0.6},
            +)
            +
            +
            curl --request POST \
            +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
            +--header "Authorization: Bearer ${TOKEN}" \
            +--header "Content-Type: application/json" \
            +--data-raw '{
            +    "collectionName": "demo",
            +    "data": [
            +        "whats the focus of information retrieval?"
            +    ],
            +    "annsField": "sparse",
            +    "limit": 3,
            +    "outputFields": [
            +        "text"
            +    ],
            +    "searchParams":{
            +        "params":{
            +            "drop_ratio_search":0.6
            +        }
            +    }
            +}'
             

            参数

            说明

            diff --git a/localization/v2.5.x/site/zh/userGuide/search-query-get/keyword-match.json b/localization/v2.5.x/site/zh/userGuide/search-query-get/keyword-match.json index 7b27c50ae..1a6660b6d 100644 --- a/localization/v2.5.x/site/zh/userGuide/search-query-get/keyword-match.json +++ b/localization/v2.5.x/site/zh/userGuide/search-query-get/keyword-match.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n"],"headingContent":"Keyword Match​","anchorList":[{"label":"关键词匹配","href":"Keyword-Match​","type":1,"isActive":false},{"label":"概述","href":"Overview","type":2,"isActive":false},{"label":"启用关键字匹配","href":"Enable-keyword-match","type":2,"isActive":false},{"label":"使用关键词匹配","href":"Use-keyword-match","type":2,"isActive":false},{"label":"注意事项","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus import MilvusClient, DataType​\n​\nschema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=1000, ​\n enable_analyzer=True, # Whether to enable text analysis for this field​\n enable_match=True # Whether to enable text match​\n)​\n\n","import io.milvus.v2.common.DataType;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nCreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()\n .enableDynamicField(false)\n .build();\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(1000)\n .enableAnalyzer(true)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 1000,\n \"enable_analyzer\": true,\n \"enable_match\": true\n }\n },\n {\n \"fieldName\": \"sparse\",\n \"dataType\": \"SparseFloatVector\"\n }\n ]\n }'\n\n","analyzer_params={​\n \"type\": \"english\"​\n}​\n​\nschema.add_field(​\n field_name='text', ​\n datatype=DataType.VARCHAR, ​\n max_length=200, ​\n enable_analyzer=True,​\n analyzer_params=analyzer_params,​\n enable_match=True, ​\n)​\n\n","Map analyzerParams = new HashMap<>();\nanalyzerParams.put(\"type\", \"english\");\nschema.addField(AddFieldReq.builder()\n .fieldName(\"text\")\n .dataType(DataType.VarChar)\n .maxLength(200)\n .enableAnalyzer(true)\n .analyzerParams(analyzerParams)\n .enableMatch(true)\n .build());\n\n","const schema = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n },\n {\n name: \"text\",\n data_type: \"VarChar\",\n enable_analyzer: true,\n enable_match: true,\n max_length: 1000,\n analyzer_params: { type: 'english' },\n },\n {\n name: \"sparse\",\n data_type: DataType.SparseFloatVector,\n },\n];\n\n","export schema='{\n \"autoId\": true,\n \"enabledDynamicField\": false,\n \"fields\": [\n {\n \"fieldName\": \"id\",\n \"dataType\": \"Int64\",\n \"isPrimary\": true\n },\n {\n \"fieldName\": \"text\",\n \"dataType\": \"VarChar\",\n \"elementTypeParams\": {\n \"max_length\": 200,\n \"enable_analyzer\": true,\n \"enable_match\": true,\n \"analyzer_params\": {\"type\": \"english\"}\n }\n },\n {\n \"fieldName\": \"my_vector\",\n \"dataType\": \"FloatVector\",\n \"elementTypeParams\": {\n \"dim\": \"5\"\n }\n }\n ]\n }'\n\n","TEXT_MATCH(field_name, text)​\n\n","filter = \"TEXT_MATCH(text, 'machine deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine deep')\";\n","export filter=\"\\\"TEXT_MATCH(text, 'machine deep')\\\"\"\n","filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"​\n","String filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\";\n","const filter = \"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\"\n","export filter=\"\\\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\\\"\"\n","# Match entities with `keyword1` or `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1 keyword2')\"​\n​\n# Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field​\nresult = MilvusClient.search(​\n collection_name=\"YOUR_COLLECTION_NAME\", # Your collection name​\n anns_field=\"embeddings\", # Vector field name​\n data=[query_vector], # Query vector​\n filter=filter,​\n search_params={\"params\": {\"nprobe\": 10}},​\n limit=10, # Max. number of results to return​\n output_fields=[\"id\", \"text\"] # Fields to return​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\nSearchResp searchResp = client.search(SearchReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .annsField(\"embeddings\")\n .data(Collections.singletonList(queryVector)))\n .filter(filter)\n .topK(10)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build());\n","// Match entities with `keyword1` or `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1 keyword2')\";\n\n// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field\nconst result = await client.search(\n collection_name: \"YOUR_COLLECTION_NAME\", // Your collection name\n anns_field: \"embeddings\", // Vector field name\n data: [query_vector], // Query vector\n filter: filter,\n params: {\"nprobe\": 10},\n limit: 10, // Max. number of results to return\n output_fields: [\"id\", \"text\"] //Fields to return\n);\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1 keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/search\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"annsField\": \"my_vector\",\n \"data\": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],\n \"filter\": '\"$filter\"',\n \"searchParams\": {\n \"params\": {\n \"nprobe\": 10\n }\n },\n \"limit\": 3,\n \"outputFields\": [\"text\",\"id\"]\n}'\n","# Match entities with both `keyword1` and `keyword2`​\nfilter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\"​\n​\nresult = MilvusClient.query(​\n collection_name=\"YOUR_COLLECTION_NAME\",​\n filter=filter, ​\n output_fields=[\"id\", \"text\"]​\n)​\n\n","String filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nQueryResp queryResp = client.query(QueryReq.builder()\n .collectionName(\"YOUR_COLLECTION_NAME\")\n .filter(filter)\n .outputFields(Arrays.asList(\"id\", \"text\"))\n .build()\n);\n","// Match entities with both `keyword1` and `keyword2`\nconst filter = \"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\";\n\nconst result = await client.query(\n collection_name: \"YOUR_COLLECTION_NAME\",\n filter: filter, \n output_fields: [\"id\", \"text\"]\n)\n","export filter=\"\\\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\\\"\"\n\nexport CLUSTER_ENDPOINT=\"http://localhost:19530\"\nexport TOKEN=\"root:Milvus\"\n\ncurl --request POST \\\n--url \"${CLUSTER_ENDPOINT}/v2/vectordb/entities/query\" \\\n--header \"Authorization: Bearer ${TOKEN}\" \\\n--header \"Content-Type: application/json\" \\\n-d '{\n \"collectionName\": \"demo2\",\n \"filter\": '\"$filter\"',\n \"outputFields\": [\"id\", \"text\"]\n}'\n"],"headingContent":"Text Match​","anchorList":[{"label":"文本匹配","href":"Text-Match​","type":1,"isActive":false},{"label":"概述","href":"Overview","type":2,"isActive":false},{"label":"启用文本匹配","href":"Enable-text-match","type":2,"isActive":false},{"label":"使用文本匹配","href":"Use-text-match","type":2,"isActive":false},{"label":"注意事项","href":"Considerations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.5.x/site/zh/userGuide/search-query-get/keyword-match.md b/localization/v2.5.x/site/zh/userGuide/search-query-get/keyword-match.md index 7f7adc756..c4ca80bf4 100644 --- a/localization/v2.5.x/site/zh/userGuide/search-query-get/keyword-match.md +++ b/localization/v2.5.x/site/zh/userGuide/search-query-get/keyword-match.md @@ -2,10 +2,10 @@ id: keyword-match.md summary: >- Milvus - 中的关键词匹配功能可根据特定术语进行精确的文档检索。该功能主要用于满足特定条件的过滤搜索,并可结合标量过滤来细化查询结果,允许在符合标量标准的向量内进行相似性搜索。 -title: 关键词匹配 + 中的文本匹配功能可根据特定术语进行精确的文档检索。该功能主要用于满足特定条件的过滤搜索,并可结合标量过滤来细化查询结果,允许在符合标量标准的向量内进行相似性搜索。 +title: 文本匹配 --- -

            关键词匹配

            Milvus 的关键词匹配功能可根据特定术语精确检索文档。该功能主要用于满足特定条件的过滤搜索,并可结合标量过滤来细化查询结果,允许在符合标量标准的向量内进行相似性搜索。

            +

            Milvus 的文本匹配功能可根据特定术语精确检索文档。该功能主要用于满足特定条件的过滤搜索,并可结合标量过滤功能来细化查询结果,允许在符合标量标准的向量内进行相似性搜索。

            -

            关键词匹配侧重于查找查询词的精确出现,而不对匹配文档的相关性进行评分。如果您想根据查询词的语义和重要性检索最相关的文档,我们建议您使用全文检索

            +

            文本匹配侧重于查找查询术语的精确出现,而不对匹配文档的相关性进行评分。如果您想根据查询词的语义和重要性检索最相关的文档,我们建议您使用全文搜索

            概述

            Milvus 整合了Tantivy来支持其底层的倒排索引和关键词搜索。对于每个文本条目,Milvus 都会按照以下程序建立索引。

            +

            Milvus 整合了Tantivy来支持其底层的倒排索引和基于术语的文本搜索。对于每个文本条目,Milvus 都会按照以下程序建立索引。

            1. 分析器:分析器将输入文本标记化为单个词或标记,然后根据需要应用过滤器。这样,Milvus 就能根据这些标记建立索引。

            2. 编制索引:文本分析完成后,Milvus 会创建一个倒排索引,将每个独特的标记映射到包含该标记的文档。

            -

            当用户执行关键字匹配时,倒排索引可用于快速检索包含关键字的所有文档。这比逐一扫描每个文档要快得多。

            +

            当用户进行文本匹配时,倒排索引可用于快速检索包含该术语的所有文档。这比逐个扫描每个文档要快得多。

            - Keyword Match - 关键词匹配

            -

            启用关键字匹配

            关键词匹配适用于VARCHAR 字段类型,它在 milvus 中本质上是字符串数据类型。要启用关键字匹配,请将enable_analyzerenable_match 都设置为True ,然后在定义 Collections Schema 时选择性地配置文本分析的分析器。

            -

            设置enable_analyzerenable_match

            要启用特定VARCHAR 字段的关键字匹配,可在定义字段 Schema 时将enable_analyzerenable_match 参数都设为True 。这将指示 Milvus 对文本进行标记化处理,并为指定字段创建反向索引,从而实现快速、高效的关键字匹配。

            +

            文本匹配适用于VARCHAR 字段类型,这在 milvus 中本质上是字符串数据类型。要启用文本匹配,请将enable_analyzerenable_match 设置为True ,然后在定义 Collections Schema 时选择性地配置文本分析分析器。

            +

            设置enable_analyzerenable_match

            要启用特定VARCHAR 字段的文本匹配,可在定义字段 Schema 时将enable_analyzerenable_match 参数都设为True 。这将指示 Milvus 对文本进行标记化处理,并为指定字段创建反向索引,从而实现快速高效的文本匹配。

            +
            from pymilvus import MilvusClient, DataType​
             ​
             schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=False)​
            @@ -79,9 +81,74 @@ schema.add_field(​
             )​
             
             
            -

            可选:配置分析器

            关键词匹配的性能和准确性取决于所选的分析器。不同的分析器适用于不同的语言和文本结构,因此选择正确的分析器会对特定用例的搜索结果产生重大影响。

            +
            import io.milvus.v2.common.DataType;
            +import io.milvus.v2.service.collection.request.AddFieldReq;
            +import io.milvus.v2.service.collection.request.CreateCollectionReq;
            +
            +CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
            +        .enableDynamicField(false)
            +        .build();
            +
            +schema.addField(AddFieldReq.builder()
            +        .fieldName("text")
            +        .dataType(DataType.VarChar)
            +        .maxLength(1000)
            +        .enableAnalyzer(true)
            +        .enableMatch(true)
            +        .build());
            +
            +
            +
            const schema = [
            +  {
            +    name: "id",
            +    data_type: DataType.Int64,
            +    is_primary_key: true,
            +  },
            +  {
            +    name: "text",
            +    data_type: "VarChar",
            +    enable_analyzer: true,
            +    enable_match: true,
            +    max_length: 1000,
            +  },
            +  {
            +    name: "sparse",
            +    data_type: DataType.SparseFloatVector,
            +  },
            +];
            +
            +
            +
            export schema='{
            +        "autoId": true,
            +        "enabledDynamicField": false,
            +        "fields": [
            +            {
            +                "fieldName": "id",
            +                "dataType": "Int64",
            +                "isPrimary": true
            +            },
            +            {
            +                "fieldName": "text",
            +                "dataType": "VarChar",
            +                "elementTypeParams": {
            +                    "max_length": 1000,
            +                    "enable_analyzer": true,
            +                    "enable_match": true
            +                }
            +            },
            +            {
            +                "fieldName": "sparse",
            +                "dataType": "SparseFloatVector"
            +            }
            +        ]
            +    }'
            +
            +
            +

            可选:配置分析器

            文本匹配的性能和准确性取决于所选的分析器。不同的分析器适用于不同的语言和文本结构,因此选择正确的分析器会对特定用例的搜索结果产生重大影响。

            默认情况下,Milvus 使用standard 分析器,该分析器根据空白和标点符号对文本进行标记,删除长度超过 40 个字符的标记,并将文本转换为小写。应用此默认设置无需额外参数。更多信息,请参阅标准

            如果需要不同的分析器,可以使用analyzer_params 参数进行配置。例如,应用english 分析器处理英文文本。

            +
            analyzer_params={​
                 "type": "english"​
             }​
            @@ -95,9 +162,72 @@ schema.add_field(​
                 enable_match=True, ​
             )​
             
            +
            +
            Map<String, Object> analyzerParams = new HashMap<>();
            +analyzerParams.put("type", "english");
            +schema.addField(AddFieldReq.builder()
            +        .fieldName("text")
            +        .dataType(DataType.VarChar)
            +        .maxLength(200)
            +        .enableAnalyzer(true)
            +        .analyzerParams(analyzerParams)
            +        .enableMatch(true)
            +        .build());
            +
            +
            +
            const schema = [
            +  {
            +    name: "id",
            +    data_type: DataType.Int64,
            +    is_primary_key: true,
            +  },
            +  {
            +    name: "text",
            +    data_type: "VarChar",
            +    enable_analyzer: true,
            +    enable_match: true,
            +    max_length: 1000,
            +    analyzer_params: { type: 'english' },
            +  },
            +  {
            +    name: "sparse",
            +    data_type: DataType.SparseFloatVector,
            +  },
            +];
            +
            +
            +
            export schema='{
            +        "autoId": true,
            +        "enabledDynamicField": false,
            +        "fields": [
            +            {
            +                "fieldName": "id",
            +                "dataType": "Int64",
            +                "isPrimary": true
            +            },
            +            {
            +                "fieldName": "text",
            +                "dataType": "VarChar",
            +                "elementTypeParams": {
            +                    "max_length": 200,
            +                    "enable_analyzer": true,
            +                    "enable_match": true,
            +                    "analyzer_params": {"type": "english"}
            +                }
            +            },
            +            {
            +                "fieldName": "my_vector",
            +                "dataType": "FloatVector",
            +                "elementTypeParams": {
            +                    "dim": "5"
            +                }
            +            }
            +        ]
            +    }'
            +
             

            Milvus 还提供适合不同语言和场景的其他各种分析器。更多详情,请参阅概述

            -

            使用关键词匹配

            为 Collections Schema 中的 VARCHAR 字段启用关键字匹配后,就可以使用TEXT_MATCH 表达式执行关键字匹配。

            -

            TEXT_MATCH 表达式语法

            TEXT_MATCH 表达式用于指定要搜索的字段和关键字。其语法如下。

            -
            TEXT_MATCH(field_name, text)​
            +    

            为 Collections Schema 中的 VARCHAR 字段启用文本匹配后,就可以使用TEXT_MATCH 表达式执行文本匹配。

            +

            文本匹配表达式语法

            TEXT_MATCH 表达式用于指定要搜索的字段和术语。其语法如下。

            +
            TEXT_MATCH(field_name, text)​
             
             
            • field_name:要搜索的 VARCHAR 字段的名称。

            • -
            • text:要搜索的关键字。根据语言和配置的分析器,多个关键词可以用空格或其他适当的分隔符分隔。

            • +
            • text:要搜索的术语。根据语言和配置的分析器,多个术语可以用空格或其他适当的分隔符分隔。

            -

            默认情况下,TEXT_MATCH 使用OR匹配逻辑,即返回包含任何指定关键词的文档。例如,要在text 字段中搜索包含关键字machinedeep 的文档,请使用以下表达式。

            +

            默认情况下,TEXT_MATCH 使用OR匹配逻辑,这意味着它会返回包含任何指定术语的文档。例如,要搜索text 字段中包含machinedeep 的文档,请使用以下表达式。

            +
            filter = "TEXT_MATCH(text, 'machine deep')"​
            -
             
            -

            您还可以使用逻辑操作符组合多个TEXT_MATCH 表达式来执行AND匹配。例如,要在text 字段中搜索同时包含machinedeep 的文档,请使用以下表达式。

            +
            String filter = "TEXT_MATCH(text, 'machine deep')";
            +
            +
            const filter = "TEXT_MATCH(text, 'machine deep')";
            +
            +
            export filter="\"TEXT_MATCH(text, 'machine deep')\""
            +
            +

            您还可以使用逻辑操作符组合多个TEXT_MATCH 表达式来执行AND匹配。例如,要搜索text 字段中同时包含machinedeep 的文档,请使用以下表达式。

            +
            filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"​
            -
             
            -

            使用关键字匹配搜索

            关键词匹配可与向量相似性搜索结合使用,以缩小搜索范围并提高搜索性能。通过在向量相似性搜索前使用关键字匹配过滤 Collections,可以减少需要搜索的文档数量,从而加快查询速度。

            -

            在本例中,filter 表达式将搜索结果过滤为只包含与指定关键字keyword1keyword2 匹配的文档。然后在此过滤后的文档子集中执行向量相似性搜索。

            +
            String filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')";
            +
            +
            const filter = "TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')"
            +
            +
            export filter="\"TEXT_MATCH(text, 'machine') and TEXT_MATCH(text, 'deep')\""
            +
            +

            使用文本匹配搜索

            文本匹配可与向量相似性搜索结合使用,以缩小搜索范围并提高搜索性能。通过在向量相似性搜索前使用文本匹配过滤 Collections,可以减少需要搜索的文档数量,从而加快查询速度。

            +

            在这个示例中,filter 表达式过滤了搜索结果,使其只包含与指定术语keyword1keyword2 匹配的文档。然后在这个过滤后的文档子集中执行向量相似性搜索。

            +
            # Match entities with `keyword1` or `keyword2`​
             filter = "TEXT_MATCH(text, 'keyword1 keyword2')"​
             ​
            @@ -146,8 +292,58 @@ result = MilvusClient.search(​
             )​
             
             
            -

            使用关键字匹配进行查询

            关键词匹配也可用于查询操作中的标量过滤。通过在query() 方法的expr 参数中指定TEXT_MATCH 表达式,可以检索与给定关键词匹配的文档。

            -

            下面的示例检索了text 字段同时包含关键字keyword1keyword2 的文档。

            +
            String filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
            +
            +SearchResp searchResp = client.search(SearchReq.builder()
            +        .collectionName("YOUR_COLLECTION_NAME")
            +        .annsField("embeddings")
            +        .data(Collections.singletonList(queryVector)))
            +        .filter(filter)
            +        .topK(10)
            +        .outputFields(Arrays.asList("id", "text"))
            +        .build());
            +
            +
            // Match entities with `keyword1` or `keyword2`
            +const filter = "TEXT_MATCH(text, 'keyword1 keyword2')";
            +
            +// Assuming 'embeddings' is the vector field and 'text' is the VARCHAR field
            +const result = await client.search(
            +    collection_name: "YOUR_COLLECTION_NAME", // Your collection name
            +    anns_field: "embeddings", // Vector field name
            +    data: [query_vector], // Query vector
            +    filter: filter,
            +    params: {"nprobe": 10},
            +    limit: 10, // Max. number of results to return
            +    output_fields: ["id", "text"] //Fields to return
            +);
            +
            +
            export filter="\"TEXT_MATCH(text, 'keyword1 keyword2')\""
            +
            +export CLUSTER_ENDPOINT="http://localhost:19530"
            +export TOKEN="root:Milvus"
            +
            +curl --request POST \
            +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
            +--header "Authorization: Bearer ${TOKEN}" \
            +--header "Content-Type: application/json" \
            +-d '{
            +    "collectionName": "demo2",
            +    "annsField": "my_vector",
            +    "data": [[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]],
            +    "filter": '"$filter"',
            +    "searchParams": {
            +        "params": {
            +            "nprobe": 10
            +        }
            +    },
            +    "limit": 3,
            +    "outputFields": ["text","id"]
            +}'
            +
            +

            文本匹配查询

            文本匹配也可用于查询操作中的标量过滤。通过在query() 方法的expr 参数中指定TEXT_MATCH 表达式,可以检索与给定术语匹配的文档。

            +

            下面的示例检索了text 字段包含keyword1keyword2 两个术语的文档。

            +
            # Match entities with both `keyword1` and `keyword2`​
             filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')"​
             ​
            @@ -157,6 +353,39 @@ result = MilvusClient.query(​
                 output_fields=["id", "text"]​
             )​
             
            +
            +
            String filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
            +
            +QueryResp queryResp = client.query(QueryReq.builder()
            +        .collectionName("YOUR_COLLECTION_NAME")
            +        .filter(filter)
            +        .outputFields(Arrays.asList("id", "text"))
            +        .build()
            +);
            +
            +
            // Match entities with both `keyword1` and `keyword2`
            +const filter = "TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')";
            +
            +const result = await client.query(
            +    collection_name: "YOUR_COLLECTION_NAME",
            +    filter: filter, 
            +    output_fields: ["id", "text"]
            +)
            +
            +
            export filter="\"TEXT_MATCH(text, 'keyword1') and TEXT_MATCH(text, 'keyword2')\""
            +
            +export CLUSTER_ENDPOINT="http://localhost:19530"
            +export TOKEN="root:Milvus"
            +
            +curl --request POST \
            +--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
            +--header "Authorization: Bearer ${TOKEN}" \
            +--header "Content-Type: application/json" \
            +-d '{
            +    "collectionName": "demo2",
            +    "filter": '"$filter"',
            +    "outputFields": ["id", "text"]
            +}'
             

            注意事项

              -
            • 为字段启用关键字匹配会触发反向索引的创建,从而消耗存储资源。在决定是否启用此功能时,请考虑对存储的影响,因为它根据文本大小、唯一标记和所使用的分析器而有所不同。

            • +
            • 为字段启用文本匹配会触发倒排索引的创建,从而消耗存储资源。在决定是否启用此功能时,请考虑对存储的影响,因为它会根据文本大小、唯一标记和所使用的分析器而有所不同。

            • 在 Schema 中定义分析器后,其设置将永久适用于该 Collections。如果您认为不同的分析器更适合您的需要,您可以考虑放弃现有的 Collections,然后使用所需的分析器配置创建一个新的 Collections。

            diff --git a/localization/v2.5.x/site/zh/userGuide/search-query-get/multi-vector-search.md b/localization/v2.5.x/site/zh/userGuide/search-query-get/multi-vector-search.md index 7dd32c56b..01d4d8624 100644 --- a/localization/v2.5.x/site/zh/userGuide/search-query-get/multi-vector-search.md +++ b/localization/v2.5.x/site/zh/userGuide/search-query-get/multi-vector-search.md @@ -39,8 +39,8 @@ title: 混合搜索

            混合搜索适用于以下两种情况。

            稀疏-密集向量搜索

            不同类型的向量可以表示不同的信息,使用各种嵌入模型可以更全面地表示数据的不同特征和方面。例如,对同一个句子使用不同的 Embeddings 模型,可以生成表示语义的密集向量和表示句子中词频的稀疏向量。

              -
            • 稀疏向量:稀疏向量的特点是其向量维度高,存在很少的非零值。这种结构使其特别适合传统的信息检索应用。在大多数情况下,稀疏向量中使用的维数对应于一种或多种语言中的不同词块。每个维度都有一个值,表示该标记在文档中的相对重要性。这种布局对于涉及关键词匹配的任务非常有利。

            • -
            • 密集向量:密集向量是从神经网络中衍生出来的嵌入。当排列成有序数组时,这些向量可以捕捉到输入文本的语义本质。需要注意的是,稠密向量并不局限于文本处理;它们还广泛应用于计算机视觉领域,以表示视觉数据的语义。这些稠密向量通常由文本 Embeddings 模型生成,其特点是大部分或所有元素都非零。因此,密集向量对于语义搜索应用特别有效,因为即使在没有精确关键词匹配的情况下,它们也能根据向量距离返回最相似的结果。这种功能可以获得更细致入微、更能感知上下文的搜索结果,通常可以捕捉到基于关键词的方法可能忽略的概念之间的关系。

            • +
            • 稀疏向量:稀疏向量的特点是其向量维度高,存在很少的非零值。这种结构使其特别适合传统的信息检索应用。在大多数情况下,稀疏向量中使用的维数对应于一种或多种语言中的不同词块。每个维度都有一个值,表示该标记在文档中的相对重要性。这种布局对于涉及文本匹配的任务非常有利。

            • +
            • 密集向量:密集向量是从神经网络中衍生出来的嵌入。当排列成有序数组时,这些向量能捕捉到输入文本的语义本质。需要注意的是,稠密向量并不局限于文本处理;它们还广泛应用于计算机视觉,以表示视觉数据的语义。这些稠密向量通常由文本 Embeddings 模型生成,其特点是大部分或所有元素都非零。因此,密集向量对于语义搜索应用特别有效,因为即使在没有精确文本匹配的情况下,它们也能根据向量距离返回最相似的结果。这种功能可以获得更细致入微、更能感知上下文的搜索结果,通常可以捕捉到基于关键词的方法可能忽略的概念之间的关系。

            更多详情,请参阅稀疏向量密集向量

            多模式搜索

            多模态搜索是指跨多种模态(如图像、视频、音频、文本等)对非结构化数据进行相似性搜索。例如,一个人可以用指纹、声纹和面部特征等多种数据模式来表示。混合搜索支持同时进行多种搜索。例如,用相似的指纹和声纹搜索一个人。

            diff --git a/localization/v2.5.x/site/zh/userGuide/search-query-get/single-vector-search.md b/localization/v2.5.x/site/zh/userGuide/search-query-get/single-vector-search.md index 641ca1ba3..ef62c2a46 100644 --- a/localization/v2.5.x/site/zh/userGuide/search-query-get/single-vector-search.md +++ b/localization/v2.5.x/site/zh/userGuide/search-query-get/single-vector-search.md @@ -38,10 +38,10 @@ title: 基本 ANN 搜索

            ANN 和 k-Nearest Neighbors (kNN) 搜索是向量相似性搜索的常用方法。在 kNN 搜索中,必须将向量空间中的所有向量与搜索请求中携带的查询向量进行比较,然后找出最相似的向量,这既耗时又耗费资源。

            与 kNN 搜索不同,ANN 搜索算法要求提供一个索引文件,记录向量 Embeddings 的排序顺序。当收到搜索请求时,可以使用索引文件作为参考,快速找到可能包含与查询向量最相似的向量嵌入的子组。然后,你可以使用指定的度量类型来测量查询向量与子组中的向量之间的相似度,根据与查询向量的相似度对组成员进行排序,并找出前 K 个组成员。

            ANN 搜索依赖于预建索引,搜索吞吐量、内存使用量和搜索正确性可能会因选择的索引类型而不同。您需要在搜索性能和正确性之间取得平衡。

            -

            为了降低学习曲线,Milvus 提供了AUTOINDEX。通过AUTOINDEX,Milvus 可以在建立索引的同时分析 Collections 内的数据分布,并根据分析结果设置最优化的索引参数,从而在搜索性能和正确性之间取得平衡。

            +

            为了减少学习曲线,Milvus 提供了AUTOINDEX。通过AUTOINDEX,Milvus 可以在建立索引的同时分析 Collections 内的数据分布,并根据分析结果设置最优化的索引参数,从而在搜索性能和正确性之间取得平衡。

            有关自动索引和适用度量类型的详细信息,请参阅自动索引度量类型。在本节中,您将找到有关以下主题的详细信息。

              -
            • 单矢量搜索

            • +
            • 单向量搜索

            • 批量向量搜索

            • 分区中的 ANN 搜索

            • 使用输出字段

            • @@ -63,7 +63,7 @@ title: 基本 ANN 搜索 d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

              在 ANN 搜索中,单向量搜索指的是只涉及一个查询向量的搜索。根据预建索引和搜索请求中携带的度量类型,Milvus 将找到与查询向量最相似的前 K 个向量。

              +

              在 ANN 搜索中,单向量搜索指的是只涉及一个查询向量的搜索。根据预先建立的索引和搜索请求中携带的度量类型,Milvus 将找到与查询向量最相似的前 K 个向量。

              本节将介绍如何进行单向量搜索。代码片段假定您已经以快速设置的方式创建了一个 Collections。搜索请求携带单个查询向量,并要求 Milvus 使用内积(IP)计算查询向量与 Collections 中向量的相似度,并返回三个最相似的向量。

              @@ -882,7 +882,7 @@ curl --request POST \​ d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z" > -

              AUTOINDEX 可大大拉平 ANN 搜索的学习曲线。但是,随着 Top-K 的增加,搜索结果不一定总是正确的。通过缩小搜索范围、提高搜索结果相关性和搜索结果多样化,Milvus 实现了以下搜索增强功能。

              +

              AUTOINDEX 可大大拉平 ANN 搜索的学习曲线。然而,随着 Top-K 的增加,搜索结果不一定总是正确的。通过缩小搜索范围、提高搜索结果相关性和搜索结果多样化,Milvus 实现了以下搜索增强功能。

              • 过滤搜索

                您可以在搜索请求中包含过滤条件,这样 Milvus 就会在进行 ANN 搜索前进行元数据过滤,将搜索范围从整个 Collections 缩小到只搜索符合指定过滤条件的实体。

                @@ -901,12 +901,12 @@ curl --request POST \​

                有关搜索迭代器的详细信息,请参阅搜索迭代器

              • 全文搜索

                全文搜索是一种在文本数据集中检索包含特定术语或短语的文档,然后根据相关性对结果进行排序的功能。该功能克服了语义搜索的局限性(语义搜索可能会忽略精确的术语),确保您获得最准确且与上下文最相关的结果。此外,它还能接受原始文本输入,自动将文本数据转换为稀疏嵌入,无需手动生成向量嵌入,从而简化了向量搜索。

                -

                有关全文搜索的详细信息,请参阅全文搜索

              • -
              • 关键词匹配

                -

                Milvus 的关键词匹配功能可根据特定术语精确检索文档。该功能主要用于满足特定条件的过滤搜索,并可结合标量过滤来完善查询结果,允许在符合标量标准的向量内进行相似性搜索。

                -

                有关关键字匹配的详细信息,请参阅关键字匹配

              • +

                有关全文搜索的详细信息,请参阅全文搜索

                +
              • 文本匹配

                +

                Milvus 中的文本匹配功能可根据特定术语精确检索文档。该功能主要用于满足特定条件的过滤搜索,并可结合标量过滤来完善查询结果,允许在符合标量标准的向量内进行相似性搜索。

                +

                有关文本匹配的详细信息,请参阅文本匹配

              • 使用 Partition Key

                -

                在元数据过滤中涉及多个标量字段并使用相当复杂的过滤条件可能会影响搜索效率。一旦将标量字段设置为分区关键字,并在搜索请求中使用涉及分区关键字的过滤条件,就能帮助将搜索范围限制在与指定分区关键字值相对应的分区内。

                +

                在元数据过滤中涉及多个标量字段并使用相当复杂的过滤条件可能会影响搜索效率。一旦将标量字段设置为分区键,并在搜索请求中使用涉及分区键的过滤条件,就能帮助将搜索范围限制在与指定分区键值相对应的分区内。

                有关分区键的详细信息,请参阅使用分区键

              • 使用 mmap

                在 Milvus 中,内存映射文件允许将文件内容直接映射到内存中。这一功能提高了内存效率,尤其是在可用内存稀缺但完全加载数据不可行的情况下。这种优化机制可以增加数据容量,同时在一定限度内确保性能;但当数据量超出内存太多时,搜索和查询性能可能会严重下降,因此请根据情况选择打开或关闭该功能。

                diff --git a/tools/cache.json b/tools/cache.json index b6873879f..564dc6b06 100644 --- a/tools/cache.json +++ b/tools/cache.json @@ -77,7 +77,7 @@ "v2.4.x/site/en/getstarted/run-milvus-gpu/install_cluster-helm-gpu.md": "2024-08-30T02:00:15.783Z", "v2.4.x/site/en/getstarted/run-milvus-gpu/install_standalone-docker-compose-gpu.md": "2024-11-25T08:32:28.861Z", "v2.4.x/site/en/getstarted/run-milvus-gpu/prerequisite-gpu.md": "2024-08-30T02:00:15.845Z", - "v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.md": "2024-11-18T03:32:00.142Z", + "v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.md": "2024-11-28T09:50:52.275Z", "v2.4.x/site/en/getstarted/run-milvus-k8s/install_cluster-milvusoperator.md": "2024-08-30T02:00:16.012Z", "v2.4.x/site/en/getstarted/run-milvus-k8s/prerequisite-helm.md": "2024-08-30T02:00:16.040Z", "v2.4.x/site/en/integrations/integrate_with_airbyte.md": "2024-08-30T02:00:16.102Z", @@ -185,7 +185,6 @@ "v2.4.x/site/en/userGuide/insert-update-delete.md": "2024-10-22T09:21:19.825Z", "v2.4.x/site/en/userGuide/manage-indexes/index-scalar-fields.md": "2024-11-18T03:34:25.465Z", "v2.4.x/site/en/userGuide/manage-indexes/index-vector-fields.md": "2024-11-25T08:33:33.904Z", - "v2.4.x/site/en/userGuide/manage-indexes/index-with-gpu.md": "2024-08-30T02:20:58.498Z", "v2.4.x/site/en/userGuide/manage-partitions.md": "2024-11-18T03:34:38.230Z", "v2.4.x/site/en/userGuide/manage_databases.md": "2024-08-30T02:20:58.747Z", "v2.4.x/site/en/userGuide/search-query-get/get-and-scalar-query.md": "2024-08-30T02:20:58.999Z", @@ -247,7 +246,8 @@ "v2.4.x/site/en/integrations/use_milvus_in_docsgpt.md": "2024-11-18T03:32:38.149Z", "v2.4.x/site/en/integrations/use_milvus_with_sambanova.md": "2024-11-18T03:32:57.671Z", "v2.4.x/site/en/tutorials/funnel_search_with_matryoshka.md": "2024-11-18T03:33:50.166Z", - "v2.4.x/site/en/tutorials/use_ColPali_with_milvus.md": "2024-11-26T09:55:44.915Z" + "v2.4.x/site/en/tutorials/use_ColPali_with_milvus.md": "2024-11-26T09:55:44.915Z", + "v2.4.x/site/en/userGuide/manage-indexes/index-with-gpu.md": "2024-11-28T09:51:07.679Z" }, "v2.3.x": { "v2.3.x/site/en/about/limitations.md": "2024-08-28T10:40:29.333Z", @@ -991,7 +991,7 @@ "v2.5.x/site/en/adminGuide/scale-dependencies.md": "2024-11-27T07:20:24.930Z", "v2.5.x/site/en/adminGuide/scaleout.md": "2024-11-27T07:20:26.060Z", "v2.5.x/site/en/adminGuide/tls.md": "2024-11-27T07:20:27.612Z", - "v2.5.x/site/en/adminGuide/upgrade-pulsar-v3.md": "2024-11-27T07:20:29.558Z", + "v2.5.x/site/en/adminGuide/upgrade-pulsar-v3.md": "2024-11-28T09:46:21.572Z", "v2.5.x/site/en/adminGuide/upgrade_milvus_cluster-docker.md": "2024-11-27T07:20:30.496Z", "v2.5.x/site/en/adminGuide/upgrade_milvus_cluster-helm.md": "2024-11-27T07:20:31.959Z", "v2.5.x/site/en/adminGuide/upgrade_milvus_cluster-operator.md": "2024-11-27T07:20:33.178Z", @@ -1028,10 +1028,10 @@ "v2.5.x/site/en/getstarted/run-milvus-gpu/install_cluster-helm-gpu.md": "2024-11-27T07:21:09.373Z", "v2.5.x/site/en/getstarted/run-milvus-gpu/install_standalone-docker-compose-gpu.md": "2024-11-27T07:21:10.554Z", "v2.5.x/site/en/getstarted/run-milvus-gpu/prerequisite-gpu.md": "2024-11-27T07:21:11.779Z", - "v2.5.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.md": "2024-11-27T07:21:13.217Z", + "v2.5.x/site/en/getstarted/run-milvus-k8s/install_cluster-helm.md": "2024-11-28T09:46:34.193Z", "v2.5.x/site/en/getstarted/run-milvus-k8s/install_cluster-milvusoperator.md": "2024-11-27T07:21:14.619Z", "v2.5.x/site/en/getstarted/run-milvus-k8s/prerequisite-helm.md": "2024-11-27T07:21:15.932Z", - "v2.5.x/site/en/home/home.md": "2024-11-27T07:21:16.415Z", + "v2.5.x/site/en/home/home.md": "2024-11-28T09:46:39.061Z", "v2.5.x/site/en/integrations/RAG_with_pii_and_milvus.md": "2024-11-27T07:21:17.720Z", "v2.5.x/site/en/integrations/apify_milvus_rag.md": "2024-11-27T07:21:19.761Z", "v2.5.x/site/en/integrations/build_RAG_with_milvus_and_fireworks.md": "2024-11-27T07:21:21.144Z", @@ -1130,7 +1130,7 @@ "v2.5.x/site/en/reference/time_sync.md": "2024-11-27T07:23:30.616Z", "v2.5.x/site/en/reference/timestamp.md": "2024-11-27T07:23:31.970Z", "v2.5.x/site/en/reference/users_and_roles.md": "2024-11-27T07:23:33.399Z", - "v2.5.x/site/en/release_notes.md": "2024-11-27T07:23:34.674Z", + "v2.5.x/site/en/release_notes.md": "2024-11-28T09:46:51.328Z", "v2.5.x/site/en/rerankers/rerankers-bge.md": "2024-11-27T07:23:35.658Z", "v2.5.x/site/en/rerankers/rerankers-cohere.md": "2024-11-27T07:23:36.671Z", "v2.5.x/site/en/rerankers/rerankers-cross-encoder.md": "2024-11-27T07:23:37.640Z", @@ -1144,7 +1144,7 @@ "v2.5.x/site/en/tutorials/funnel_search_with_matryoshka.md": "2024-11-27T07:23:48.041Z", "v2.5.x/site/en/tutorials/graph_rag_with_milvus.md": "2024-11-27T07:23:49.687Z", "v2.5.x/site/en/tutorials/hdbscan_clustering_with_milvus.md": "2024-11-27T07:23:51.422Z", - "v2.5.x/site/en/tutorials/hybrid_search_with_milvus.md": "2024-11-27T07:23:52.594Z", + "v2.5.x/site/en/tutorials/hybrid_search_with_milvus.md": "2024-11-28T09:47:02.587Z", "v2.5.x/site/en/tutorials/image_deduplication_system.md": "2024-11-27T07:23:53.533Z", "v2.5.x/site/en/tutorials/image_similarity_search.md": "2024-11-27T07:23:54.589Z", "v2.5.x/site/en/tutorials/movie_recommendation_with_milvus.md": "2024-11-27T07:23:55.933Z", @@ -1162,7 +1162,7 @@ "v2.5.x/site/en/userGuide/collections/drop-collection.md": "2024-11-27T07:24:10.355Z", "v2.5.x/site/en/userGuide/collections/load-and-release.md": "2024-11-27T07:24:11.497Z", "v2.5.x/site/en/userGuide/collections/manage-aliases.md": "2024-11-27T07:24:12.727Z", - "v2.5.x/site/en/userGuide/collections/manage-collections.md": "2024-11-27T07:24:14.163Z", + "v2.5.x/site/en/userGuide/collections/manage-collections.md": "2024-11-28T09:47:14.183Z", "v2.5.x/site/en/userGuide/collections/manage-partitions.md": "2024-11-27T07:24:15.489Z", "v2.5.x/site/en/userGuide/collections/modify-collection.md": "2024-11-27T07:24:16.607Z", "v2.5.x/site/en/userGuide/collections/view-collections.md": "2024-11-27T07:24:17.532Z", @@ -1174,12 +1174,12 @@ "v2.5.x/site/en/userGuide/manage-indexes/bitmap.md": "2024-11-27T07:24:25.526Z", "v2.5.x/site/en/userGuide/manage-indexes/index-scalar-fields.md": "2024-11-27T07:24:26.954Z", "v2.5.x/site/en/userGuide/manage-indexes/index-vector-fields.md": "2024-11-27T07:24:28.595Z", - "v2.5.x/site/en/userGuide/manage-indexes/index-with-gpu.md": "2024-11-27T07:24:29.974Z", + "v2.5.x/site/en/userGuide/manage-indexes/index-with-gpu.md": "2024-11-28T09:47:26.276Z", "v2.5.x/site/en/userGuide/manage_databases.md": "2024-11-27T07:24:31.263Z", "v2.5.x/site/en/userGuide/schema/analyzer/analyzer/chinese-analyzer.md": "2024-11-27T07:24:32.248Z", "v2.5.x/site/en/userGuide/schema/analyzer/analyzer/english-analyzer.md": "2024-11-27T07:24:33.402Z", "v2.5.x/site/en/userGuide/schema/analyzer/analyzer/standard-analyzer.md": "2024-11-27T07:24:34.493Z", - "v2.5.x/site/en/userGuide/schema/analyzer/analyzer-overview.md": "2024-11-27T07:24:35.967Z", + "v2.5.x/site/en/userGuide/schema/analyzer/analyzer-overview.md": "2024-11-28T09:47:38.707Z", "v2.5.x/site/en/userGuide/schema/analyzer/filter/alphanumonly-filter.md": "2024-11-27T07:24:36.996Z", "v2.5.x/site/en/userGuide/schema/analyzer/filter/ascii-folding-filter.md": "2024-11-27T07:24:38.011Z", "v2.5.x/site/en/userGuide/schema/analyzer/filter/cnalphanumonly-filter.md": "2024-11-27T07:24:39.038Z", @@ -1201,23 +1201,23 @@ "v2.5.x/site/en/userGuide/schema/primary-field.md": "2024-11-27T07:24:59.639Z", "v2.5.x/site/en/userGuide/schema/schema-hands-on.md": "2024-11-27T07:25:02.048Z", "v2.5.x/site/en/userGuide/schema/schema.md": "2024-11-27T07:25:03.727Z", - "v2.5.x/site/en/userGuide/schema/sparse_vector.md": "2024-11-27T07:25:05.452Z", + "v2.5.x/site/en/userGuide/schema/sparse_vector.md": "2024-11-28T09:47:52.969Z", "v2.5.x/site/en/userGuide/schema/string.md": "2024-11-27T07:25:06.902Z", "v2.5.x/site/en/userGuide/schema/use-json-fields.md": "2024-11-27T07:25:08.435Z", - "v2.5.x/site/en/userGuide/search-query-get/boolean.md": "2024-11-27T07:25:09.809Z", + "v2.5.x/site/en/userGuide/search-query-get/boolean.md": "2024-11-28T09:48:02.808Z", "v2.5.x/site/en/userGuide/search-query-get/clustering-compaction.md": "2024-11-27T07:25:11.309Z", "v2.5.x/site/en/userGuide/search-query-get/consistency.md": "2024-11-27T07:25:12.617Z", "v2.5.x/site/en/userGuide/search-query-get/filtered-search.md": "2024-11-27T07:25:13.813Z", - "v2.5.x/site/en/userGuide/search-query-get/full-text-search.md": "2024-11-27T07:25:15.257Z", + "v2.5.x/site/en/userGuide/search-query-get/full-text-search.md": "2024-11-28T09:48:15.179Z", "v2.5.x/site/en/userGuide/search-query-get/get-and-scalar-query.md": "2024-11-27T07:25:16.835Z", "v2.5.x/site/en/userGuide/search-query-get/grouping-search.md": "2024-11-27T07:25:18.446Z", - "v2.5.x/site/en/userGuide/search-query-get/keyword-match.md": "2024-11-27T07:25:19.724Z", + "v2.5.x/site/en/userGuide/search-query-get/keyword-match.md": "2024-11-28T09:48:27.368Z", "v2.5.x/site/en/userGuide/search-query-get/metric.md": "2024-11-27T07:25:21.161Z", "v2.5.x/site/en/userGuide/search-query-get/mmap.md": "2024-11-27T07:25:22.770Z", - "v2.5.x/site/en/userGuide/search-query-get/multi-vector-search.md": "2024-11-27T07:25:24.662Z", + "v2.5.x/site/en/userGuide/search-query-get/multi-vector-search.md": "2024-11-28T09:48:45.103Z", "v2.5.x/site/en/userGuide/search-query-get/range-search.md": "2024-11-27T07:25:25.809Z", "v2.5.x/site/en/userGuide/search-query-get/reranking.md": "2024-11-27T07:25:27.331Z", - "v2.5.x/site/en/userGuide/search-query-get/single-vector-search.md": "2024-11-27T07:25:29.015Z", + "v2.5.x/site/en/userGuide/search-query-get/single-vector-search.md": "2024-11-28T09:48:59.636Z", "v2.5.x/site/en/userGuide/search-query-get/use-partition-key.md": "2024-11-27T07:25:30.235Z", "v2.5.x/site/en/userGuide/search-query-get/with-iterators.md": "2024-11-27T07:25:31.386Z", "v2.5.x/site/en/userGuide/tools/birdwatcher_install_guides.md": "2024-11-27T07:25:32.349Z", @@ -1233,6 +1233,6 @@ "v2.5.x/site/en/userGuide/tools/milvus_backup_api.md": "2024-11-27T07:25:46.624Z", "v2.5.x/site/en/userGuide/tools/milvus_backup_cli.md": "2024-11-27T07:25:47.846Z", "v2.5.x/site/en/userGuide/tools/milvus_backup_overview.md": "2024-11-27T07:25:49.181Z", - "v2.5.x/site/en/menuStructure/en.json": "2024-11-27T07:27:48.374Z" + "v2.5.x/site/en/menuStructure/en.json": "2024-11-28T10:08:24.394Z" } } \ No newline at end of file diff --git a/tools/generate-en.js b/tools/generate-en.js index dde8f2205..f21d387ea 100644 --- a/tools/generate-en.js +++ b/tools/generate-en.js @@ -11,114 +11,114 @@ import fs from "fs"; import matter from "gray-matter"; import "dotenv/config"; import { - traverseDirectory, - mkdir, - remarkableToHtml, - generateMenuStructureLocales, - CACHE_FILE, + traverseDirectory, + mkdir, + remarkableToHtml, + generateMenuStructureLocales, + CACHE_FILE, } from "./utils.js"; import { remarkToHtml } from "./remark.js"; -const VERSIONS = ["v2.5.x"]; +const VERSIONS = ["v2.5.x", "v2.4.x"]; const sourceFilePath = "site/en"; const sourceLang = "en"; const targetLang = "en"; async function bootstrap() { - console.log("Starting generate en docs..."); + console.log("Starting generate en docs..."); - let newFilesFound = false; - const cache = fs.existsSync(CACHE_FILE) - ? JSON.parse(fs.readFileSync(CACHE_FILE, "utf8") || "{}") - : {}; + let newFilesFound = false; + const cache = fs.existsSync(CACHE_FILE) + ? JSON.parse(fs.readFileSync(CACHE_FILE, "utf8") || "{}") + : {}; - for (let version of VERSIONS) { - /** - * step 1: get all md files by version - */ - const sourceDirectory = `${version}/${sourceFilePath}`; - const mdFiles = traverseDirectory(sourceDirectory); + for (let version of VERSIONS) { + /** + * step 1: get all md files by version + */ + const sourceDirectory = `${version}/${sourceFilePath}`; + const mdFiles = traverseDirectory(sourceDirectory); - console.log(`--> Found ${mdFiles.length} files...`); + console.log(`--> Found ${mdFiles.length} files...`); - /** - * step 2: filter out not deprecated files - */ - const updatedFiles = mdFiles.filter((path) => { - const markdown = fs.readFileSync(path, "utf8"); - const { data = {} } = matter(markdown); - const deprecated = data.deprecate; - return !deprecated; - }); + /** + * step 2: filter out not deprecated files + */ + const updatedFiles = mdFiles.filter((path) => { + const markdown = fs.readFileSync(path, "utf8"); + const { data = {} } = matter(markdown); + const deprecated = data.deprecate; + return !deprecated; + }); - if (!newFilesFound) { - newFilesFound = updatedFiles.some((path) => { - const isUnExist = !cache[version] || !cache[version][path]; - if (isUnExist) { - console.info(`-> New file found:`, path); - } - return isUnExist; - }); - } + if (!newFilesFound) { + newFilesFound = updatedFiles.some((path) => { + const isUnExist = !cache[version] || !cache[version][path]; + if (isUnExist) { + console.info(`-> New file found:`, path); + } + return isUnExist; + }); + } - for (let path of updatedFiles) { - /** - * step 3: read & handle file content - */ - const markdown = fs.readFileSync(path, "utf8"); - const { data = {}, content } = matter(markdown); - const isMdx = path.endsWith(".mdx"); + for (let path of updatedFiles) { + /** + * step 3: read & handle file content + */ + const markdown = fs.readFileSync(path, "utf8"); + const { data = {}, content } = matter(markdown); + const isMdx = path.endsWith(".mdx"); - /** - * step 4: convert md or mdx to html - */ - const params = { content, lang: targetLang, version }; - const { - html: htmlContent, - codeList, - headingContent, - anchorList, - } = isMdx ? await remarkToHtml(params) : await remarkableToHtml(params); + /** + * step 4: convert md or mdx to html + */ + const params = { content, lang: targetLang, version }; + const { + html: htmlContent, + codeList, + headingContent, + anchorList, + } = isMdx ? await remarkToHtml(params) : await remarkableToHtml(params); - const wholeContent = matter.stringify(htmlContent, data); + const wholeContent = matter.stringify(htmlContent, data); - /** - * step 5: write to md file and json file - */ - const targetFilePath = - "localization/" + path.replace(sourceLang, `${targetLang}`); - mkdir(targetFilePath); - fs.writeFileSync(targetFilePath, wholeContent, "utf8"); - fs.writeFileSync( - targetFilePath.replace(".md", ".json"), - JSON.stringify({ codeList, headingContent, anchorList }), - "utf8" - ); - console.info( - `-> ${targetLang.toUpperCase()}: file translated successfully:`, - targetFilePath - ); - } - console.log("--> Total files:", updatedFiles.length); - } + /** + * step 5: write to md file and json file + */ + const targetFilePath = + "localization/" + path.replace(sourceLang, `${targetLang}`); + mkdir(targetFilePath); + fs.writeFileSync(targetFilePath, wholeContent, "utf8"); + fs.writeFileSync( + targetFilePath.replace(".md", ".json"), + JSON.stringify({ codeList, headingContent, anchorList }), + "utf8" + ); + console.info( + `-> ${targetLang.toUpperCase()}: file translated successfully:`, + targetFilePath + ); + } + console.log("--> Total files:", updatedFiles.length); + } - /** - * step 6: generate menu structure locales - */ - await generateMenuStructureLocales({ - versions: VERSIONS, - useCache: false, - targetLangs: [targetLang], - }); + /** + * step 6: generate menu structure locales + */ + await generateMenuStructureLocales({ + versions: VERSIONS, + useCache: false, + targetLangs: [targetLang], + }); - /** - * step 7: translate en docs to other languages if new files added - */ - if (newFilesFound) { - import("./translate.js"); - } else { - console.log("No new files found, skip translation."); - } + /** + * step 7: translate en docs to other languages if new files added + */ + if (newFilesFound) { + import("./translate.js"); + } else { + console.log("No new files found, skip translation."); + } } bootstrap(); diff --git a/tools/translate.js b/tools/translate.js index 5c7997fc5..6e683c4ab 100644 --- a/tools/translate.js +++ b/tools/translate.js @@ -23,7 +23,7 @@ import { import { remarkToHtml } from "./remark.js"; const MOCK_TRANSLATE = false; -const VERSIONS = ["v2.5.x"]; +const VERSIONS = ["v2.5.x", "v2.4.x"]; const sourceFilePath = "site/en"; const sourceLang = "en"; const targetLangs = ["zh", "ja", "ko", "fr", "de", "it", "pt", "es"];