Commit e410aba4 authored by Wade's avatar Wade

add user field

parent 654f62a4
......@@ -23,6 +23,7 @@ import (
"errors"
"fmt"
"os"
"strings"
"sync"
"github.com/firebase/genkit/go/ai"
......@@ -387,216 +388,219 @@ type docStore struct {
// package graphrag
package graphrag
// import (
// "context"
// "fmt"
import (
"context"
"fmt"
// "github.com/milvus-io/milvus-sdk-go/v2/entity"
// "github.com/pkg/errors"
// )
"github.com/milvus-io/milvus-sdk-go/v2/entity"
"github.com/pkg/errors"
)
// // newDocStore creates a docStore.
// func (m *Milvus) newDocStore(ctx context.Context, cfg *CollectionConfig) (*docStore, error) {
// if m.client == nil {
// return nil, errors.New("milvus.Init not called")
// }
// newDocStore creates a docStore.
func (m *Milvus) newDocStore(ctx context.Context, cfg *CollectionConfig) (*docStore, error) {
if m.client == nil {
return nil, errors.New("milvus.Init not called")
}
// // Check/create collection.
// exists, err := m.client.HasCollection(ctx, cfg.Collection)
// if err != nil {
// return nil, fmt.Errorf("failed to check collection %q: %v", cfg.Collection, err)
// }
// if !exists {
// // Define schema with textField as primary key, plus user_id and username fields.
// schema := &entity.Schema{
// CollectionName: cfg.Collection,
// Fields: []*entity.Field{
// {
// Name: vectorField,
// DataType: entity.FieldTypeFloatVector,
// TypeParams: map[string]string{
// "dim": fmt.Sprintf("%d", cfg.Dimension),
// },
// },
// {
// Name: textField,
// DataType: entity.FieldTypeVarChar,
// PrimaryKey: true, // Enforce unique constraint on text field
// TypeParams: map[string]string{
// "max_length": "65535", // Maximum length for VARCHAR
// },
// },
// {
// Name: metadataField,
// DataType: entity.FieldTypeJSON,
// },
// {
// Name: "user_id",
// DataType: entity.FieldTypeVarChar,
// TypeParams: map[string]string{
// "max_length": "128", // Reasonable length for user_id
// },
// },
// {
// Name: "username",
// DataType: entity.FieldTypeVarChar,
// TypeParams: map[string]string{
// "max_length": "128", // Reasonable length for username
// },
// },
// },
// }
// Check/create collection.
exists, err := m.client.HasCollection(ctx, cfg.Collection)
if err != nil {
return nil, fmt.Errorf("failed to check collection %q: %v", cfg.Collection, err)
}
if !exists {
// Define schema with textField as primary key, plus user_id and username fields.
schema := &entity.Schema{
CollectionName: cfg.Collection,
Fields: []*entity.Field{
{
Name: vectorField,
DataType: entity.FieldTypeFloatVector,
TypeParams: map[string]string{
"dim": fmt.Sprintf("%d", cfg.Dimension),
},
},
{
Name: textField,
DataType: entity.FieldTypeVarChar,
PrimaryKey: true, // Enforce unique constraint on text field
TypeParams: map[string]string{
"max_length": "65535", // Maximum length for VARCHAR
},
},
{
Name: metadataField,
DataType: entity.FieldTypeJSON,
},
{
Name: "user_id",
DataType: entity.FieldTypeVarChar,
TypeParams: map[string]string{
"max_length": "128", // Reasonable length for user_id
},
},
{
Name: "username",
DataType: entity.FieldTypeVarChar,
TypeParams: map[string]string{
"max_length": "128", // Reasonable length for username
},
},
},
}
err = m.client.CreateCollection(ctx, schema, entity.DefaultShardNumber)
if err != nil {
return nil, fmt.Errorf("failed to create collection %q: %v", cfg.Collection, err)
}
// err = m.client.CreateCollection(ctx, schema, entity.DefaultShardNumber)
// if err != nil {
// return nil, fmt.Errorf("failed to create collection %q: %v", cfg.Collection, err)
// }
// Create HNSW index for vectorField.
index, err := entity.NewIndexHNSW(
entity.L2,
8, // M
96, // efConstruction
)
if err != nil {
return nil, fmt.Errorf("entity.NewIndexHNSW: %v", err)
}
// // Create HNSW index for vectorField.
// index, err := entity.NewIndexHNSW(
// entity.L2,
// 8, // M
// 96, // efConstruction
// )
// if err != nil {
// return nil, fmt.Errorf("entity.NewIndexHNSW: %v", err)
// }
err = m.client.CreateIndex(ctx, cfg.Collection, vectorField, index, false)
if err != nil {
return nil, fmt.Errorf("failed to create index: %v", err)
}
}
// err = m.client.CreateIndex(ctx, cfg.Collection, vectorField, index, false)
// if err != nil {
// return nil, fmt.Errorf("failed to create index: %v", err)
// }
// }
// Load collection.
err = m.client.LoadCollection(ctx, cfg.Collection, false)
if err != nil {
return nil, fmt.Errorf("failed to load collection %q: %v", cfg.Collection, err)
}
// // Load collection.
// err = m.client.LoadCollection(ctx, cfg.Collection, false)
// if err != nil {
// return nil, fmt.Errorf("failed to load collection %q: %v", cfg.Collection, err)
// }
// Convert EmbedderOptions to map[string]interface{}.
var embedderOptions map[string]interface{}
if cfg.EmbedderOptions != nil {
opts, ok := cfg.EmbedderOptions.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("EmbedderOptions must be a map[string]interface{}, got %T", cfg.EmbedderOptions)
}
embedderOptions = opts
} else {
embedderOptions = make(map[string]interface{})
}
// // Convert EmbedderOptions to map[string]interface{}.
// var embedderOptions map[string]interface{}
// if cfg.EmbedderOptions != nil {
// opts, ok := cfg.EmbedderOptions.(map[string]interface{})
// if !ok {
// return nil, fmt.Errorf("EmbedderOptions must be a map[string]interface{}, got %T", cfg.EmbedderOptions)
// }
// embedderOptions = opts
// } else {
// embedderOptions = make(map[string]interface{})
// }
return &docStore{
client: m.client,
collection: cfg.Collection,
dimension: cfg.Dimension,
embedder: cfg.Embedder,
embedderOptions: embedderOptions,
}, nil
}
// return &docStore{
// client: m.client,
// collection: cfg.Collection,
// dimension: cfg.Dimension,
// embedder: cfg.Embedder,
// embedderOptions: embedderOptions,
// }, nil
// }
// Indexer returns the indexer for a collection.
func Indexer(g *genkit.Genkit, collection string) ai.Indexer {
return genkit.LookupIndexer(g, provider, collection)
}
// // Indexer returns the indexer for a collection.
// func Indexer(g *genkit.Genkit, collection string) ai.Indexer {
// return genkit.LookupIndexer(g, provider, collection)
// }
// Retriever returns the retriever for a collection.
func Retriever(g *genkit.Genkit, collection string) ai.Retriever {
return genkit.LookupRetriever(g, provider, collection)
}
// // Retriever returns the retriever for a collection.
// func Retriever(g *genkit.Genkit, collection string) ai.Retriever {
// return genkit.LookupRetriever(g, provider, collection)
// }
/*
更新 删除 很少用到;
*/
// /*
// 更新 删除 很少用到;
// */
// Index implements the Indexer.Index method.
func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
if len(req.Documents) == 0 {
return nil
}
// // Index implements the Indexer.Index method.
// func (ds *docStore) Index(ctx context.Context, req *ai.IndexerRequest) error {
// if len(req.Documents) == 0 {
// return nil
// }
// // Embed documents.
// ereq := &ai.EmbedRequest{
// Input: req.Documents,
// Options: ds.embedderOptions,
// }
// eres, err := ds.embedder.Embed(ctx, ereq)
// if err != nil {
// return fmt.Errorf("milvus index embedding failed: %w", err)
// }
// // Validate embedding count matches document count.
// if len(eres.Embeddings) != len(req.Documents) {
// return fmt.Errorf("mismatch: got %d embeddings for %d documents", len(eres.Embeddings), len(req.Documents))
// }
// Embed documents.
ereq := &ai.EmbedRequest{
Input: req.Documents,
Options: ds.embedderOptions,
}
eres, err := ds.embedder.Embed(ctx, ereq)
if err != nil {
return fmt.Errorf("milvus index embedding failed: %w", err)
}
// // Prepare row-based data.
// var rows []interface{}
// for i, emb := range eres.Embeddings {
// doc := req.Documents[i]
// Validate embedding count matches document count.
if len(eres.Embeddings) != len(req.Documents) {
return fmt.Errorf("mismatch: got %d embeddings for %d documents", len(eres.Embeddings), len(req.Documents))
}
// if doc.Metadata == nil {
// // If ok, we don't use the User struct since the requirement is to error on non-nil
// return nil, fmt.Errorf("req.Query.Metadata must be not nil, got type %T", req.Options)
// }
// Prepare row-based data.
var rows []interface{}
for i, emb := range eres.Embeddings {
doc := req.Documents[i]
// // Extract username and user_id from req.Query.Metadata
// userName, ok := doc.Metadata[util.UserNameKey].(string)
// if !ok {
// return nil, fmt.Errorf("req.Query.Metadata must provide username key")
// }
// userId, ok := doc.Metadata[util.UserIdKey].(string)
// if !ok {
// return nil, fmt.Errorf("req.Query.Metadata must provide user_id key")
// }
if doc.Metadata == nil {
// If ok, we don't use the User struct since the requirement is to error on non-nil
return nil, fmt.Errorf("req.Query.Metadata must be not nil, got type %T", req.Options)
}
// Extract username and user_id from req.Query.Metadata
userName, ok := doc.Metadata[util.UserNameKey].(string)
if !ok {
return nil, fmt.Errorf("req.Query.Metadata must provide username key")
}
userId, ok := doc.Metadata[util.UserIdKey].(string)
if !ok {
return nil, fmt.Errorf("req.Query.Metadata must provide user_id key")
}
// var sb strings.Builder
// for _, p := range doc.Content {
// if p.IsText() {
// sb.WriteString(p.Text)
// }
// }
// text := sb.String()
// metadata := doc.Metadata
// if metadata == nil {
// metadata = make(map[string]interface{})
// }
// // Create row with explicit metadata field.
// row := make(map[string]interface{})
// row["vector"] = emb.Embedding // []float32
// row["text"] = text
// row["user_id"] = userId
// row["username"] = userName
// row["metadata"] = metadata // Explicitly set metadata as JSON-compatible map
// rows = append(rows, row)
var sb strings.Builder
for _, p := range doc.Content {
if p.IsText() {
sb.WriteString(p.Text)
}
}
text := sb.String()
metadata := doc.Metadata
if metadata == nil {
metadata = make(map[string]interface{})
}
// // Debug: Log row contents.
// fmt.Printf("Row %d: vector_len=%d, text=%q,userId=%s,username=%s,metadata=%v\n", i, len(emb.Embedding), text,userId,userName metadata)
// }
// Create row with explicit metadata field.
row := make(map[string]interface{})
row["vector"] = emb.Embedding // []float32
row["text"] = text
row["user_id"] = userId
row["username"] = userName
row["metadata"] = metadata // Explicitly set metadata as JSON-compatible map
rows = append(rows, row)
// // Debug: Log total rows.
// fmt.Printf("Inserting %d rows into collection %q\n", len(rows), ds.collection)
// Debug: Log row contents.
fmt.Printf("Row %d: vector_len=%d, text=%q,userId=%s,username=%s,metadata=%v\n", i, len(emb.Embedding), text,userId,userName metadata)
}
// // Insert rows into Milvus.
// _, err = ds.client.InsertRows(ctx, ds.collection, "", rows)
// if err != nil {
// return fmt.Errorf("milvus insert rows failed: %w", err)
// }
// Debug: Log total rows.
fmt.Printf("Inserting %d rows into collection %q\n", len(rows), ds.collection)
// return nil
// }
// Insert rows into Milvus.
_, err = ds.client.InsertRows(ctx, ds.collection, "", rows)
if err != nil {
return fmt.Errorf("milvus insert rows failed: %w", err)
}
// // RetrieverOptions for Milvus retrieval.
// type RetrieverOptions struct {
// Count int `json:"count,omitempty"` // Max documents to retrieve.
// MetricType string `json:"metric_type,omitempty"` // Similarity metric (e.g., "L2", "IP").
// }
return nil
}
// RetrieverOptions for Milvus retrieval.
type RetrieverOptions struct {
Count int `json:"count,omitempty"` // Max documents to retrieve.
MetricType string `json:"metric_type,omitempty"` // Similarity metric (e.g., "L2", "IP").
}
// // Retrieve implements the Retriever.Retrieve method.
// func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
......@@ -740,13 +744,6 @@ type docStore struct {
// RetrieverOptions for Milvus retrieval.
type RetrieverOptions struct {
Count int `json:"count,omitempty"` // Max documents to retrieve.
MetricType string `json:"metric_type,omitempty"` // Similarity metric (e.g., "L2", "IP").
}
// Retrieve implements the Retriever.Retrieve method.
func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
......@@ -876,8 +873,8 @@ func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai
if metadata == nil {
metadata = make(map[string]interface{})
}
metadata["user_id"] = userId
metadata["username"] = userName
metadata[util.UserIdKey] = userId
metadata[util.UserNameKey] = userName
// Create document.
doc := ai.DocumentFromText(text, metadata)
......@@ -889,4 +886,3 @@ func (ds *docStore) Retrieve(ctx context.Context, req *ai.RetrieverRequest) (*ai
Documents: docs,
}, nil
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment