Quantcast
Channel: Active questions tagged rest - Stack Overflow
Viewing all articles
Browse latest Browse all 3672

Cannot change the default indexType from AutoIndex to HNSW when creating an index in the Milvus collection via the RESTful API

$
0
0

I have successfully created a Milvus collection and inserted data directly without any noticeable challenges. Now, I want to create a Milvus collection using the RESTful API as detailed in the documentation at Create Collection.

I utilized the following code, which I adapted from the provided link, to create the collection, configure it, insert data, and then create an index.

import timeimport requestsimport jsonimport numpy as npdef generate_random_vector():    float_array = np.random.uniform(-1, 1, 512)    normalized_array = float_array / np.linalg.norm(float_array, ord=2)    return normalized_arraydef create_collection(collection_name: str, server_address: str):    url = f"{server_address}/v2/vectordb/collections/create"    payload = json.dumps({"collectionName": collection_name,"dimension": 512,"metricType": "COSINE","vectorFieldName": "Embedding_Features","primaryFieldName": "IDs_Features","idType": "Int64","indexType": "HNSW","primaryKey": {"name": "IDs_Features", "type": "INT64", "primaryKey": True},"vectorField": {"name": "Embedding_Features","type": "FLOAT_VECTOR","primaryKey": False,        },"indexes": [{"fieldName": "Embedding_Features","indexName": "Embedding_Features","metricType": "COSINE",        }],"auto_index": False,    })    headers = {"x-api-key": "YOUR_API_KEY","Content-Type": "application/json",    }    response = requests.post(url, headers=headers, data=payload)    return response.json()def validate_collection(collection_name: str, server_address: str):    url = f"{server_address}/v2/vectordb/collections/describe"    payload = json.dumps({"collectionName": collection_name})    headers = {"x-api-key": "YOUR_API_KEY","Content-Type": "application/json",    }    response = requests.post(url, headers=headers, data=payload)    return response.json()def drop_collection(collection_name: str, server_address: str):    url = f"{server_address}/v2/vectordb/collections/drop"    payload = json.dumps({"collectionName": collection_name})    headers = {"x-api-key": "YOUR_API_KEY","Content-Type": "application/json",    }    response = requests.post(url, headers=headers, data=payload)    return response.json()def insert_data_into_collection(    collection_name: str,    start_id: int,    n: int,    ids_name: str,    features_name: str,    server_address: str,):    url = f"{server_address}/v2/vectordb/entities/insert"    data = [{"IDs_Features": start_id + i + 1, "Embedding_Features": list(generate_random_vector())} for i in range(n)]    payload = json.dumps({"data": data, "collectionName": collection_name})    headers = {"x-api-key": "YOUR_API_KEY","Content-Type": "application/json",    }    response = requests.post(url, headers=headers, data=payload)    return response.json()def describe_collection(collection_name: str, server_address: str):    url = f"{server_address}/v2/vectordb/collections/describe"    payload = json.dumps({"collectionName": collection_name})    headers = {"x-api-key": "YOUR_API_KEY","Content-Type": "application/json",    }    response = requests.post(url, headers=headers, data=payload)    return response.json()def describe_index(collection_name: str, server_address: str):    url = f"{server_address}/v2/vectordb/indexes/describe"    payload = json.dumps({"indexName": "Embedding_Features", "collectionName": collection_name})    headers = {"x-api-key": "YOUR_API_KEY","Content-Type": "application/json",    }    response = requests.post(url, headers=headers, data=payload)    return response.json()def create_index(collection_name: str, server_address: str):    url = f"{server_address}/v2/vectordb/indexes/create"    payload = json.dumps({"collectionName": collection_name,"indexParams": [{"metricType": "COSINE","index_type": "HNSW","fieldName": "Embedding_Features","params": {"M": 128, "efConstruction": 256},        }],    })    headers = {"x-api-key": "YOUR_API_KEY","Content-Type": "application/json",    }    response = requests.post(url, headers=headers, data=payload)    return response.json()

When I attempted to create the collection using:

create_collection(collection_name=collection_name, server_address=server_address)

I received the following message:

{'code': 0, 'data': {}}

Subsequently, when I described the collection:

describe_collection(collection_name=collection_name, server_address=server_address)

I got this response:

{'code': 0,'data': {'aliases': [],'autoId': False,'collectionID': 454176377651168636,'collectionName': 'collection1','consistencyLevel': 'Bounded','description': '','enableDynamicField': True,'fields': [{'autoId': False,'clusteringKey': False,'description': '','id': 100,'name': 'IDs_Features','nullable': False,'partitionKey': False,'primaryKey': True,'type': 'Int64'},   {'autoId': False,'clusteringKey': False,'description': '','id': 101,'name': 'Embedding_Features','nullable': False,'params': [{'key': 'dim', 'value': '512'}],'partitionKey': False,'primaryKey': False,'type': 'FloatVector'}],'functions': None,'indexes': [{'fieldName': 'Embedding_Features','indexName': 'Embedding_Features','metricType': 'COSINE'}],'load': 'LoadStateLoading','partitionsNum': 1,'properties': [],'shardsNum': 1},'message': ''}

When I described the collection index:

describe_index(collection_name=collection_name, server_address=server_address)

I received this message:

{'code': 0,'data': [{'failReason': '','fieldName': 'Embedding_Features','indexName': 'Embedding_Features','indexState': 'Finished','indexType': 'AUTOINDEX','indexedRows': 0,'metricType': 'COSINE','pendingRows': 0,'totalRows': 0}]}

This indicated that the index type was set to AUTOINDEX, despite my configuration specifying HNSW, which caused issues with indexing.

Afterwards, I inserted 10,000 rows into the collection:

number_vectors = 10000for i in range(0, number_vectors, 500):    response = insert_data_into_collection(        collection_name=collection_name,        start_id=i,        n=500,        ids_name="IDs_Features",        features_name="Embedding_Features",        server_address=server_address,    )    if response["data"]["insertCount"] == 500:        print(f"Great! inserted ids {i} to {i+500} successfully")    else:        print(f"There are some errors for {i}")    time.sleep(1)

Finally, when I tried to create the index:

create_index(collection_name=collection_name, server_address=server_address)

I encountered the following error:

{'code': 65535, 'message': 'only metric type can be passed when use AutoIndex'}

Is it possible for me to set the indexType to HNSW and either ignore or remove the default AutoIndex value?

By adjusting the create_index function:

def create_index(collection_name: str, server_address: str):    url = f"{server_address}/v2/vectordb/indexes/create"    payload = json.dumps(        {"collectionName": collection_name,"indexParams": [                {"metricType": "COSINE","fieldName": "Embedding_Features","params": {"M": 128, "efConstruction": 256, "index_type": "HNSW"},                }            ],        }    )    # Example: {'metric_type': 'IP', 'index_type': 'HNSW', 'params': {'nlist': 1024, 'efConstruction': 40, 'M': 1024}}    headers = {"x-api-key": "YOUR_API_KEY","Content-Type": "application/json",    }    response = requests.post(url, headers=headers, data=payload)    response = json.loads(response.text)    return response

I encountered the following error:

{'code': 65535,'message': 'CreateIndex failed: at most one distinct index is allowed per field'}

Viewing all articles
Browse latest Browse all 3672

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>