Commit 654f62a4 authored by Wade's avatar Wade

add user field

parent 8864a9d6
......@@ -23,13 +23,13 @@ import (
"errors"
"fmt"
"os"
"strings"
"sync"
"github.com/firebase/genkit/go/ai"
"github.com/firebase/genkit/go/genkit"
"github.com/milvus-io/milvus-sdk-go/v2/client"
"github.com/milvus-io/milvus-sdk-go/v2/entity"
"github.com/wade-liwei/agentchat/util"
)
// The provider used in the registry.
......@@ -266,198 +266,331 @@ type docStore struct {
// }
// newDocStore creates a docStore.
func (m *Milvus) newDocStore(ctx context.Context, cfg *CollectionConfig) (*docStore, error) {
if m.client == nil {
return nil, errors.New("milvus.Init not called")
}
// func (m *Milvus) newDocStore(ctx context.Context, cfg *CollectionConfig) (*docStore, error) {
// if m.client == nil {
// return nil, errors.New("milvus.Init not called")
// }
// Check/create collection.
exists, err := m.client.HasCollection(ctx, cfg.Collection)
if err != nil {
return nil, fmt.Errorf("failed to check collection %q: %v", cfg.Collection, err)
}
if !exists {
// Define schema with textField as primary key for unique constraint.
schema := &entity.Schema{
CollectionName: cfg.Collection,
Fields: []*entity.Field{
// {
// Name: idField, // Optional non-primary ID field
// DataType: entity.FieldTypeInt64,
// //AutoID: true,
// // No PrimaryKey or AutoID, as textField is the primary key
// },
{
Name: vectorField,
DataType: entity.FieldTypeFloatVector,
TypeParams: map[string]string{
"dim": fmt.Sprintf("%d", cfg.Dimension),
},
},
{
Name: textField,
DataType: entity.FieldTypeVarChar,
PrimaryKey: true, // Enforce unique constraint on text field
TypeParams: map[string]string{
"max_length": "65535", // Maximum length for VARCHAR, adjust if needed
},
},
{
Name: metadataField,
DataType: entity.FieldTypeJSON,
},
},
}
// Alternative: Remove idField if not needed
/*
schema := &entity.Schema{
CollectionName: cfg.Collection,
Fields: []*entity.Field{
{
Name: vectorField,
DataType: entity.FieldTypeFloatVector,
TypeParams: map[string]string{
"dim": fmt.Sprintf("%d", cfg.Dimension),
},
},
{
Name: textField,
DataType: entity.FieldTypeVarChar,
PrimaryKey: true, // Enforce unique constraint on text field
TypeParams: map[string]string{
"max_length": "65535",
},
},
{
Name: metadataField,
DataType: entity.FieldTypeJSON,
},
},
}
*/
err = m.client.CreateCollection(ctx, schema, entity.DefaultShardNumber)
if err != nil {
return nil, fmt.Errorf("failed to create collection %q: %v", cfg.Collection, err)
}
// // Check/create collection.
// exists, err := m.client.HasCollection(ctx, cfg.Collection)
// if err != nil {
// return nil, fmt.Errorf("failed to check collection %q: %v", cfg.Collection, err)
// }
// if !exists {
// // Define schema with textField as primary key for unique constraint.
// schema := &entity.Schema{
// CollectionName: cfg.Collection,
// Fields: []*entity.Field{
// // {
// // Name: idField, // Optional non-primary ID field
// // DataType: entity.FieldTypeInt64,
// // //AutoID: true,
// // // No PrimaryKey or AutoID, as textField is the primary key
// // },
// {
// Name: vectorField,
// DataType: entity.FieldTypeFloatVector,
// TypeParams: map[string]string{
// "dim": fmt.Sprintf("%d", cfg.Dimension),
// },
// },
// {
// Name: textField,
// DataType: entity.FieldTypeVarChar,
// PrimaryKey: true, // Enforce unique constraint on text field
// TypeParams: map[string]string{
// "max_length": "65535", // Maximum length for VARCHAR, adjust if needed
// },
// },
// {
// Name: metadataField,
// DataType: entity.FieldTypeJSON,
// },
// },
// }
// Create HNSW index.
index, err := entity.NewIndexHNSW(
entity.L2,
8, // M
96, // efConstruction
)
if err != nil {
return nil, fmt.Errorf("entity.NewIndexHNSW: %v", err)
}
// // Alternative: Remove idField if not needed
// /*
// schema := &entity.Schema{
// CollectionName: cfg.Collection,
// Fields: []*entity.Field{
// {
// Name: vectorField,
// DataType: entity.FieldTypeFloatVector,
// TypeParams: map[string]string{
// "dim": fmt.Sprintf("%d", cfg.Dimension),
// },
// },
// {
// Name: textField,
// DataType: entity.FieldTypeVarChar,
// PrimaryKey: true, // Enforce unique constraint on text field
// TypeParams: map[string]string{
// "max_length": "65535",
// },
// },
// {
// Name: metadataField,
// DataType: entity.FieldTypeJSON,
// },
// },
// }
// */
err = m.client.CreateIndex(ctx, cfg.Collection, vectorField, index, false)
if err != nil {
return nil, fmt.Errorf("failed to create index: %v", err)
}
}
// err = m.client.CreateCollection(ctx, schema, entity.DefaultShardNumber)
// if err != nil {
// return nil, fmt.Errorf("failed to create collection %q: %v", cfg.Collection, err)
// }
// Load collection.
err = m.client.LoadCollection(ctx, cfg.Collection, false)
if err != nil {
return nil, fmt.Errorf("failed to load collection %q: %v", cfg.Collection, err)
}
// // Create HNSW index.
// index, err := entity.NewIndexHNSW(
// entity.L2,
// 8, // M
// 96, // efConstruction
// )
// if err != nil {
// return nil, fmt.Errorf("entity.NewIndexHNSW: %v", err)
// }
// Convert EmbedderOptions to map[string]interface{}.
var embedderOptions map[string]interface{}
if cfg.EmbedderOptions != nil {
opts, ok := cfg.EmbedderOptions.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("EmbedderOptions must be a map[string]interface{}, got %T", cfg.EmbedderOptions)
}
embedderOptions = opts
} else {
embedderOptions = make(map[string]interface{})
}
// err = m.client.CreateIndex(ctx, cfg.Collection, vectorField, index, false)
// if err != nil {
// return nil, fmt.Errorf("failed to create index: %v", err)
// }
// }
return &docStore{
client: m.client,
collection: cfg.Collection,
dimension: cfg.Dimension,
embedder: cfg.Embedder,
embedderOptions: embedderOptions,
}, nil
}
// // Load collection.
// err = m.client.LoadCollection(ctx, cfg.Collection, false)
// if err != nil {
// return nil, fmt.Errorf("failed to load collection %q: %v", cfg.Collection, err)
// }
// Indexer returns the indexer for a collection.
func Indexer(g *genkit.Genkit, collection string) ai.Indexer {
return genkit.LookupIndexer(g, provider, collection)
}
// // Convert EmbedderOptions to map[string]interface{}.
// var embedderOptions map[string]interface{}
// if cfg.EmbedderOptions != nil {
// opts, ok := cfg.EmbedderOptions.(map[string]interface{})
// if !ok {
// return nil, fmt.Errorf("EmbedderOptions must be a map[string]interface{}, got %T", cfg.EmbedderOptions)
// }
// embedderOptions = opts
// } else {
// embedderOptions = make(map[string]interface{})
// }
// Retriever returns the retriever for a collection.
func Retriever(g *genkit.Genkit, collection string) ai.Retriever {
return genkit.LookupRetriever(g, provider, collection)
}
// return &docStore{
// client: m.client,
// collection: cfg.Collection,
// dimension: cfg.Dimension,
// embedder: cfg.Embedder,
// embedderOptions: embedderOptions,
// }, nil
// }
/*
更新 删除 很少用到;
*/
// Index implements the Indexer.Index method.
func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
if len(req.Documents) == 0 {
return nil
}
// Embed documents.
ereq := &ai.EmbedRequest{
Input: req.Documents,
Options: ds.embedderOptions,
}
eres, err := ds.embedder.Embed(ctx, ereq)
if err != nil {
return fmt.Errorf("milvus index embedding failed: %w", err)
}
// package graphrag
// Validate embedding count matches document count.
if len(eres.Embeddings) != len(req.Documents) {
return fmt.Errorf("mismatch: got %d embeddings for %d documents", len(eres.Embeddings), len(req.Documents))
}
// import (
// "context"
// "fmt"
// Prepare row-based data.
var rows []interface{}
for i, emb := range eres.Embeddings {
doc := req.Documents[i]
var sb strings.Builder
for _, p := range doc.Content {
if p.IsText() {
sb.WriteString(p.Text)
}
}
text := sb.String()
metadata := doc.Metadata
if metadata == nil {
metadata = make(map[string]interface{})
}
// "github.com/milvus-io/milvus-sdk-go/v2/entity"
// "github.com/pkg/errors"
// )
// Create row with explicit metadata field.
row := make(map[string]interface{})
row["vector"] = emb.Embedding // []float32
row["text"] = text
row["metadata"] = metadata // Explicitly set metadata as JSON-compatible map
rows = append(rows, row)
// // newDocStore creates a docStore.
// func (m *Milvus) newDocStore(ctx context.Context, cfg *CollectionConfig) (*docStore, error) {
// if m.client == nil {
// return nil, errors.New("milvus.Init not called")
// }
// Debug: Log row contents.
fmt.Printf("Row %d: vector_len=%d, text=%q, metadata=%v\n", i, len(emb.Embedding), text, metadata)
}
// // Check/create collection.
// exists, err := m.client.HasCollection(ctx, cfg.Collection)
// if err != nil {
// return nil, fmt.Errorf("failed to check collection %q: %v", cfg.Collection, err)
// }
// if !exists {
// // Define schema with textField as primary key, plus user_id and username fields.
// schema := &entity.Schema{
// CollectionName: cfg.Collection,
// Fields: []*entity.Field{
// {
// Name: vectorField,
// DataType: entity.FieldTypeFloatVector,
// TypeParams: map[string]string{
// "dim": fmt.Sprintf("%d", cfg.Dimension),
// },
// },
// {
// Name: textField,
// DataType: entity.FieldTypeVarChar,
// PrimaryKey: true, // Enforce unique constraint on text field
// TypeParams: map[string]string{
// "max_length": "65535", // Maximum length for VARCHAR
// },
// },
// {
// Name: metadataField,
// DataType: entity.FieldTypeJSON,
// },
// {
// Name: "user_id",
// DataType: entity.FieldTypeVarChar,
// TypeParams: map[string]string{
// "max_length": "128", // Reasonable length for user_id
// },
// },
// {
// Name: "username",
// DataType: entity.FieldTypeVarChar,
// TypeParams: map[string]string{
// "max_length": "128", // Reasonable length for username
// },
// },
// },
// }
// err = m.client.CreateCollection(ctx, schema, entity.DefaultShardNumber)
// if err != nil {
// return nil, fmt.Errorf("failed to create collection %q: %v", cfg.Collection, err)
// }
// Debug: Log total rows.
fmt.Printf("Inserting %d rows into collection %q\n", len(rows), ds.collection)
// // Create HNSW index for vectorField.
// index, err := entity.NewIndexHNSW(
// entity.L2,
// 8, // M
// 96, // efConstruction
// )
// if err != nil {
// return nil, fmt.Errorf("entity.NewIndexHNSW: %v", err)
// }
// Insert rows into Milvus.
_, err = ds.client.InsertRows(ctx, ds.collection, "", rows)
if err != nil {
return fmt.Errorf("milvus insert rows failed: %w", err)
}
// err = m.client.CreateIndex(ctx, cfg.Collection, vectorField, index, false)
// if err != nil {
// return nil, fmt.Errorf("failed to create index: %v", err)
// }
// }
return nil
}
// // Load collection.
// err = m.client.LoadCollection(ctx, cfg.Collection, false)
// if err != nil {
// return nil, fmt.Errorf("failed to load collection %q: %v", cfg.Collection, err)
// }
// // Convert EmbedderOptions to map[string]interface{}.
// var embedderOptions map[string]interface{}
// if cfg.EmbedderOptions != nil {
// opts, ok := cfg.EmbedderOptions.(map[string]interface{})
// if !ok {
// return nil, fmt.Errorf("EmbedderOptions must be a map[string]interface{}, got %T", cfg.EmbedderOptions)
// }
// embedderOptions = opts
// } else {
// embedderOptions = make(map[string]interface{})
// }
// return &docStore{
// client: m.client,
// collection: cfg.Collection,
// dimension: cfg.Dimension,
// embedder: cfg.Embedder,
// embedderOptions: embedderOptions,
// }, nil
// }
// // Indexer returns the indexer for a collection.
// func Indexer(g *genkit.Genkit, collection string) ai.Indexer {
// return genkit.LookupIndexer(g, provider, collection)
// }
// // Retriever returns the retriever for a collection.
// func Retriever(g *genkit.Genkit, collection string) ai.Retriever {
// return genkit.LookupRetriever(g, provider, collection)
// }
// /*
// 更新 删除 很少用到;
// */
// // Index implements the Indexer.Index method.
// func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
// if len(req.Documents) == 0 {
// return nil
// }
// // Embed documents.
// ereq := &ai.EmbedRequest{
// Input: req.Documents,
// Options: ds.embedderOptions,
// }
// eres, err := ds.embedder.Embed(ctx, ereq)
// if err != nil {
// return fmt.Errorf("milvus index embedding failed: %w", err)
// }
// // Validate embedding count matches document count.
// if len(eres.Embeddings) != len(req.Documents) {
// return fmt.Errorf("mismatch: got %d embeddings for %d documents", len(eres.Embeddings), len(req.Documents))
// }
// // Prepare row-based data.
// var rows []interface{}
// for i, emb := range eres.Embeddings {
// doc := req.Documents[i]
// if doc.Metadata == nil {
// // If ok, we don't use the User struct since the requirement is to error on non-nil
// return nil, fmt.Errorf("req.Query.Metadata must be not nil, got type %T", req.Options)
// }
// // Extract username and user_id from req.Query.Metadata
// userName, ok := doc.Metadata[util.UserNameKey].(string)
// if !ok {
// return nil, fmt.Errorf("req.Query.Metadata must provide username key")
// }
// userId, ok := doc.Metadata[util.UserIdKey].(string)
// if !ok {
// return nil, fmt.Errorf("req.Query.Metadata must provide user_id key")
// }
// var sb strings.Builder
// for _, p := range doc.Content {
// if p.IsText() {
// sb.WriteString(p.Text)
// }
// }
// text := sb.String()
// metadata := doc.Metadata
// if metadata == nil {
// metadata = make(map[string]interface{})
// }
// // Create row with explicit metadata field.
// row := make(map[string]interface{})
// row["vector"] = emb.Embedding // []float32
// row["text"] = text
// row["user_id"] = userId
// row["username"] = userName
// row["metadata"] = metadata // Explicitly set metadata as JSON-compatible map
// rows = append(rows, row)
// // Debug: Log row contents.
// fmt.Printf("Row %d: vector_len=%d, text=%q,userId=%s,username=%s,metadata=%v\n", i, len(emb.Embedding), text,userId,userName metadata)
// }
// // Debug: Log total rows.
// fmt.Printf("Inserting %d rows into collection %q\n", len(rows), ds.collection)
// // Insert rows into Milvus.
// _, err = ds.client.InsertRows(ctx, ds.collection, "", rows)
// if err != nil {
// return fmt.Errorf("milvus insert rows failed: %w", err)
// }
// return nil
// }
// // RetrieverOptions for Milvus retrieval.
// type RetrieverOptions struct {
......@@ -467,6 +600,25 @@ func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
// // Retrieve implements the Retriever.Retrieve method.
// func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
// if req.Query.Metadata == nil {
// // If ok, we don't use the User struct since the requirement is to error on non-nil
// return nil, fmt.Errorf("req.Query.Metadata must be not nil, got type %T", req.Options)
// }
// // Extract username and user_id from req.Query.Metadata
// userName, ok := req.Query.Metadata[util.UserNameKey].(string)
// if !ok {
// return nil, fmt.Errorf("req.Query.Metadata must provide username key")
// }
// userId, ok := req.Query.Metadata[util.UserIdKey].(string)
// if !ok {
// return nil, fmt.Errorf("req.Query.Metadata must provide user_id key")
// }
// count := 3 // Default.
// metricTypeStr := "L2"
// if req.Options != nil {
......@@ -513,13 +665,13 @@ func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
// return nil, fmt.Errorf("NewIndexHNSWSearchParam failed: %v", err)
// }
// // Perform vector search to get IDs.
// // Perform vector search to get IDs, text, and metadata.
// results, err := ds.client.Search(
// ctx,
// ds.collection,
// []string{}, // partitions
// "", // expr (TODO: add metadata filter if needed)
// []string{}, // Only need IDs for now, no output fields
// []string{textField, metadataField}, // Output fields: text and metadata
// []entity.Vector{queryVector},
// vectorField,
// metricType,
......@@ -530,51 +682,12 @@ func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
// return nil, fmt.Errorf("milvus search failed: %v", err)
// }
// // Extract IDs from search results.
// var ids []int64
// for _, result := range results {
// for i := 0; i < result.ResultCount; i++ {
// id, err := result.IDs.GetAsInt64(i)
// if err != nil {
// continue
// }
// ids = append(ids, id)
// }
// }
// if len(ids) == 0 {
// return &ai.RetrieverResponse{
// Documents: []*ai.Document{},
// }, nil
// }
// // Construct filter expression for Query (e.g., "id IN [id1, id2, ...]").
// filterExpr := fmt.Sprintf("id IN [%s]", joinInt64s(ids, ","))
// // Perform query to get text and metadata.
// queryOptions := []client.SearchQueryOptionFunc{
// client.WithLimit(int64(count)),
// }
// // Note: Consistency level omitted due to undefined WithQueryConsistencyLevel.
// // If WithConsistencyLevel is supported for Query in your SDK, uncomment below:
// // queryOptions = append(queryOptions, client.WithConsistencyLevel(entity.ConsistencyBounded))
// queryResults, err := ds.client.Query(
// ctx,
// ds.collection,
// []string{}, // partitions
// filterExpr, // filter by IDs
// []string{textField, metadataField}, // output fields
// queryOptions...,
// )
// if err != nil {
// return nil, fmt.Errorf("milvus query failed: %v", err)
// }
// // Process query results.
// // Process search results.
// var docs []*ai.Document
// // Find text and metadata columns in query results.
// for _, result := range results {
// // Find text and metadata columns in search results.
// var textCol, metaCol entity.Column
// for _, col := range queryResults {
// for _, col := range result.Fields {
// if col.Name() == textField {
// textCol = col
// }
......@@ -585,11 +698,11 @@ func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
// // Ensure text column exists.
// if textCol == nil {
// return nil, fmt.Errorf("text column %s not found in query results", textField)
// return nil, fmt.Errorf("text column %s not found in search results", textField)
// }
// // Iterate over rows (assuming columns have same length).
// for i := 0; i < textCol.Len(); i++ {
// for i := 0; i < result.ResultCount; i++ {
// // Get text value.
// text, err := textCol.GetAsString(i)
// if err != nil {
......@@ -612,29 +725,20 @@ func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
// }
// // Print text and metadata in a format similar to insertion debug log.
// fmt.Printf("Row %d: text=%q, metadata=%v\n", i, text, metadata)
// // fmt.Printf("Row %d: text=%q, metadata=%v\n", i, text, metadata)
// // Create document.
// doc := ai.DocumentFromText(text, metadata)
// docs = append(docs, doc)
// }
// }
// return &ai.RetrieverResponse{
// Documents: docs,
// }, nil
// }
// // joinInt64s converts a slice of int64 to a comma-separated string.
// func joinInt64s(ids []int64, sep string) string {
// if len(ids) == 0 {
// return ""
// }
// strs := make([]string, len(ids))
// for i, id := range ids {
// strs[i] = fmt.Sprintf("%d", id)
// }
// return strings.Join(strs, sep)
// }
// RetrieverOptions for Milvus retrieval.
type RetrieverOptions struct {
......@@ -642,8 +746,24 @@ type RetrieverOptions struct {
MetricType string `json:"metric_type,omitempty"` // Similarity metric (e.g., "L2", "IP").
}
// Retrieve implements the Retriever.Retrieve method.
func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
if req.Query.Metadata == nil {
return nil, fmt.Errorf("req.Query.Metadata must be not nil, got type %T", req.Query.Metadata)
}
// Extract username and user_id from req.Query.Metadata
userName, ok := req.Query.Metadata[util.UserNameKey].(string)
if !ok {
return nil, fmt.Errorf("req.Query.Metadata must provide username key")
}
userId, ok := req.Query.Metadata[util.UserIdKey].(string)
if !ok {
return nil, fmt.Errorf("req.Query.Metadata must provide user_id key")
}
count := 3 // Default.
metricTypeStr := "L2"
if req.Options != nil {
......@@ -690,12 +810,15 @@ func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai
return nil, fmt.Errorf("NewIndexHNSWSearchParam failed: %v", err)
}
// Define filter expression for user_id
expr := fmt.Sprintf("user_id == %q", userId)
// Perform vector search to get IDs, text, and metadata.
results, err := ds.client.Search(
ctx,
ds.collection,
[]string{}, // partitions
"", // expr (TODO: add metadata filter if needed)
expr, // Filter by user_id
[]string{textField, metadataField}, // Output fields: text and metadata
[]entity.Vector{queryVector},
vectorField,
......@@ -749,8 +872,12 @@ func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai
}
}
// Print text and metadata in a format similar to insertion debug log.
// fmt.Printf("Row %d: text=%q, metadata=%v\n", i, text, metadata)
// Ensure metadata includes user_id and username from query
if metadata == nil {
metadata = make(map[string]interface{})
}
metadata["user_id"] = userId
metadata["username"] = userName
// Create document.
doc := ai.DocumentFromText(text, metadata)
......@@ -762,3 +889,4 @@ func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai
Documents: docs,
}, nil
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment