insert api的数据结构
一个完整的insert例子:
import numpy as np
from pymilvus import (
connections,
FieldSchema, CollectionSchema, DataType,
Collection,
)
num_entities, dim = 10, 3
print("start connecting to Milvus")
connections.connect("default", host="192.168.230.71", port="19530")
fields = [
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="book_id", dtype=DataType.INT64),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")
print("Create collection `hello_milvus`")
hello_milvus = Collection("hello_milvus", schema, consistency_level="Eventually",shards_num=1)
print("Start inserting entities")
rng = np.random.default_rng(seed=19530)
entities = [
[i for i in range(num_entities)], # field book_id
rng.random((num_entities, dim)), # field embeddings
]
insert_result = hello_milvus.insert(entities)
hello_milvus.flush()
InsertRequest数据结构:
type InsertRequest struct {
Base *commonpb.MsgBase
DbName string
CollectionName string
PartitionName string
FieldsData []*schemapb.FieldData
HashKeys []uint32
NumRows uint32
XXX_NoUnkeyedLiteral struct{}
XXX_unrecognized []byte
XXX_sizecache int32
}
FieldsData是一个数组,如果insert有3列,则数组长度为3,按照插入顺序。
FieldData数据结构:
type FieldData struct {
Type DataType
FieldName string
// Types that are valid to be assigned to Field:
//
// *FieldData_Scalars
// *FieldData_Vectors
Field isFieldData_Field
FieldId int64
IsDynamic bool
XXX_NoUnkeyedLiteral struct{}
XXX_unrecognized []byte
XXX_sizecache int32
}
isFieldData_Field是一个接口:
type isFieldData_Field interface {
isFieldData_Field()
}
它有2个实现:FieldData_Scalars和FieldData_Vectors。
type FieldData_Scalars struct {
Scalars *ScalarField
}
type FieldData_Vectors struct {
Vectors *VectorField
}
FieldData_Scalars存储标量数据,FieldData_Vectors存储向量数据。
ScalarField数据结构:
type ScalarField struct {
// Types that are valid to be assigned to Data:
//
// *ScalarField_BoolData
// *ScalarField_IntData
// *ScalarField_LongData
// *ScalarField_FloatData
// *ScalarField_DoubleData
// *ScalarField_StringData
// *ScalarField_BytesData
// *ScalarField_ArrayData
// *ScalarField_JsonData
Data isScalarField_Data
XXX_NoUnkeyedLiteral struct{}
XXX_unrecognized []byte
XXX_sizecache int32
}
isScalarField_Data是一个接口。
type isScalarField_Data interface {
isScalarField_Data()
}
isScalarField_Data的实现有9个:
- ScalarField_BoolData
- ScalarField_IntData
- ScalarField_LongData
- ScalarField_FloatData
- ScalarField_DoubleData
- ScalarField_StringData
- ScalarField_BytesData
- ScalarField_ArrayData
- ScalarField_JsonData
以ScalarField_LongData为例:
type ScalarField_LongData struct {
LongData *LongArray
}
type LongArray struct {
Data []int64
XXX_NoUnkeyedLiteral struct{}
XXX_unrecognized []byte
XXX_sizecache int32
}
VectorField数据结构:
type VectorField struct {
Dim int64
// Types that are valid to be assigned to Data:
//
// *VectorField_FloatVector
// *VectorField_BinaryVector
// *VectorField_Float16Vector
Data isVectorField_Data
XXX_NoUnkeyedLiteral struct{}
XXX_unrecognized []byte
XXX_sizecache int32
}
isVectorField_Data是一个接口。
type isVectorField_Data interface {
isVectorField_Data()
}
isVectorField_Data有3种实现:
- VectorField_FloatVector
- VectorField_BinaryVector
- VectorField_Float16Vector
以VectorField_FloatVector为例:
type VectorField_FloatVector struct {
FloatVector *FloatArray
}
type FloatArray struct {
Data []float32
XXX_NoUnkeyedLiteral struct{}
XXX_unrecognized []byte
XXX_sizecache int32
}
案例
向hello_milvus插入10个3维向量。
num_entities, dim = 10, 3
rng = np.random.default_rng(seed=19530)
entities = [
[i for i in range(num_entities)],
rng.random((num_entities, dim)),
]
insert_result = hello_milvus.insert(entities)
FloatVector是一个长度为30的float32数组,插入的是10个3维向量,1个向量是3个float32,在这里展开了。