Commit 470a1620 authored by vicotor's avatar vicotor

fix bug for kafka

parent 5af25f2f
...@@ -31,6 +31,8 @@ type Node struct { ...@@ -31,6 +31,8 @@ type Node struct {
wg sync.WaitGroup wg sync.WaitGroup
status string status string
reg *registry.Registry reg *registry.Registry
receiptCh chan *odysseus.TaskReceipt
} }
func (n *Node) ServiceType() common.ServiceType { func (n *Node) ServiceType() common.ServiceType {
...@@ -66,13 +68,12 @@ func NewNode() *Node { ...@@ -66,13 +68,12 @@ func NewNode() *Node {
DbName: dbconf.DbName, DbName: dbconf.DbName,
}) })
node := &Node{ node := &Node{
cache: pay, cache: pay,
rdb: rdb, rdb: rdb,
quit: make(chan struct{}), quit: make(chan struct{}),
conf: config.GetConfig(), conf: config.GetConfig(),
receiptCh: make(chan *odysseus.TaskReceipt, 100000),
} }
brokers := strings.Split(node.conf.Kafka.Brokers, ";")
node.kafkaProducer, _ = utils.NewKafkaProducer(brokers)
node.status = "before running" node.status = "before running"
node.register() node.register()
...@@ -81,12 +82,44 @@ func NewNode() *Node { ...@@ -81,12 +82,44 @@ func NewNode() *Node {
func (n *Node) Start() error { func (n *Node) Start() error {
n.status = "running" n.status = "running"
go n.startProducer()
if n.reg != nil { if n.reg != nil {
go n.reg.Start() go n.reg.Start()
} }
return n.startAllTask() return n.startAllTask()
} }
func (n *Node) startProducer() {
brokers := strings.Split(n.conf.Kafka.Brokers, ";")
running := true
for {
producer, err := utils.NewKafkaProducer(brokers)
if err != nil {
log.WithError(err).Error("create kafka producer failed")
time.Sleep(time.Second)
continue
}
n.kafkaProducer = producer
for running {
select {
case <-n.quit:
return
case receipt, ok := <-n.receiptCh:
if !ok {
return
}
if err := utils.FireTaskReceipt(n.kafkaProducer, receipt, config.GetConfig().Kafka.ReceiptTopic); err != nil {
log.WithError(err).WithField("task-id", receipt.TaskId).Error("fire task receipt failed")
} else {
if err == sarama.ErrClosedClient {
running = false
}
}
}
}
}
}
func (n *Node) Stop() { func (n *Node) Stop() {
close(n.quit) close(n.quit)
n.wg.Wait() n.wg.Wait()
...@@ -113,118 +146,128 @@ func (n *Node) register() { ...@@ -113,118 +146,128 @@ func (n *Node) register() {
n.reg = reg n.reg = reg
} }
func (n *Node) postResult(task *odysseus.TaskContent, result *odysseus.TaskResponse) error {
d, _ := proto.Marshal(result)
err := utils.Post(task.TaskCallback, d)
if err != nil {
log.WithError(err).Error("post task result failed")
} else {
log.WithField("taskid", task.TaskId).Debug("post task result")
}
uid, _ := strconv.ParseInt(task.TaskUid, 10, 64)
fee, _ := strconv.ParseInt(task.TaskFee, 10, 64)
n.cache.RollbackForFee(uid, fee)
return err
}
func (n *Node) postReceipt(task *odysseus.TaskContent, result *odysseus.TaskResponse, err error) error {
receipt := new(odysseus.TaskReceipt)
receipt.TaskId = task.TaskId
receipt.TaskTimestamp = task.TaskTimestamp
receipt.TaskType = task.TaskType
receipt.TaskUid = task.TaskUid
receipt.TaskWorkload = task.TaskWorkload
receipt.TaskDuration = (time.Now().UnixNano() - int64(task.TaskTimestamp)) / 1000
receipt.TaskFee = 0
receipt.TaskOutLen = 0
receipt.TaskExecuteDuration = 0
receipt.TaskProfitAccount = ""
receipt.TaskWorkerAccount = ""
switch err {
case ErrNoWorker, ErrTimeout:
receipt.TaskResult = err.Error()
case ErrDispatchFailed:
receipt.TaskResult = err.Error()
default:
receipt.TaskResult = "internal error"
}
n.receiptCh <- receipt
return nil
}
func (n *Node) Loop(idx int) { func (n *Node) Loop(idx int) {
defer n.wg.Done() defer n.wg.Done()
defer log.WithField("routine", idx).Info("node loop routine exit") defer log.WithField("routine", idx).Info("node loop routine exit")
var (
err error
running bool = true
client sarama.ConsumerGroup
)
// monitor kafka // monitor kafka
taskCh := make(chan *odysseus.TaskContent, 1000) taskCh := make(chan *odysseus.TaskContent, 1000)
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
var client sarama.ConsumerGroup
var err error
for { for {
client, err = n.attachKafkaConsumer(ctx, taskCh) client, err = attachKafkaConsumer(ctx, taskCh, n)
if err != nil { if err != nil {
log.WithError(err).Error("attach kafka consumer failed try again later") log.WithError(err).Error("attach kafka consumer failed try again later")
time.Sleep(time.Second) time.Sleep(time.Second)
n.status = "attach kafka failed"
continue continue
} }
break log.WithField("routine", idx).Info("attach kafka consumer success")
} n.status = "running"
log.WithField("routine", idx).Info("attach kafka consumer success") for running {
postResult := func(task *odysseus.TaskContent, result *odysseus.TaskResponse) error { select {
d, _ := proto.Marshal(result) case clientErr := <-client.Errors():
err := utils.Post(task.TaskCallback, d) log.WithError(clientErr).Error("kafka consumer error")
if err != nil { if clientErr == sarama.ErrClosedClient {
log.WithError(err).Error("post task result failed") running = false
} else { }
log.WithField("taskid", task.TaskId).Debug("post task result") case <-n.quit:
} cancel()
uid, _ := strconv.ParseInt(task.TaskUid, 10, 64) client.Close()
fee, _ := strconv.ParseInt(task.TaskFee, 10, 64) return
n.cache.RollbackForFee(uid, fee)
return err
}
postReceipt := func(task *odysseus.TaskContent, result *odysseus.TaskResponse, err error) error {
receipt := new(odysseus.TaskReceipt)
receipt.TaskId = task.TaskId
receipt.TaskTimestamp = task.TaskTimestamp
receipt.TaskType = task.TaskType
receipt.TaskUid = task.TaskUid
receipt.TaskWorkload = task.TaskWorkload
receipt.TaskDuration = (time.Now().UnixNano() - int64(task.TaskTimestamp)) / 1000
receipt.TaskFee = 0
receipt.TaskOutLen = 0
receipt.TaskExecuteDuration = 0
receipt.TaskProfitAccount = ""
receipt.TaskWorkerAccount = ""
switch err {
case ErrNoWorker, ErrTimeout:
receipt.TaskResult = err.Error()
case ErrDispatchFailed:
receipt.TaskResult = err.Error()
default:
receipt.TaskResult = "internal error"
}
return utils.FireTaskReceipt(n.kafkaProducer, receipt, config.GetConfig().Kafka.ReceiptTopic)
}
for { case t := <-taskCh:
select { fctx, _ := context.WithTimeout(context.Background(), time.Second*time.Duration(config.GetConfig().DispatchTimeout))
case <-n.quit: go func(ctx context.Context, task *odysseus.TaskContent) {
cancel() l := log.WithField("task-id", task.TaskId)
client.Close() l.WithField("task", task).Info("get task")
return for {
worker, err := PopWorker(ctx, n.rdb)
case t := <-taskCh: if err == ErrNoWorker || err == ErrTimeout {
fctx, _ := context.WithTimeout(context.Background(), time.Second*time.Duration(config.GetConfig().DispatchTimeout)) result := &odysseus.TaskResponse{
go func(ctx context.Context, task *odysseus.TaskContent) { TaskId: task.TaskId,
l := log.WithField("task-id", task.TaskId) TaskUid: task.TaskUid,
l.WithField("task", task).Info("get task") TaskFee: task.TaskFee,
// todo: add parameter for re-dispatch count. TaskIsSucceed: false,
for { TaskError: err.Error(),
worker, err := PopWorker(ctx, n.rdb) }
if err == ErrNoWorker || err == ErrTimeout { l.WithError(err).Error("pop worker failed")
result := &odysseus.TaskResponse{ if e := n.postReceipt(task, result, err); e != nil {
TaskId: task.TaskId, l.WithError(e).Error("post task receipt failed")
TaskUid: task.TaskUid, }
TaskFee: task.TaskFee, if e := n.postResult(task, result); e != nil {
TaskIsSucceed: false, l.WithError(e).Error("post task result failed")
TaskError: err.Error(), }
break
} }
l.WithError(err).Error("pop worker failed")
if e := postReceipt(task, result, err); e != nil { if err != nil {
l.WithError(e).Error("post task receipt failed") l.WithError(err).Error("pop worker failed")
continue
} }
if e := postResult(task, result); e != nil { err = n.DispatchTask(ctx, worker, task)
l.WithError(e).Error("post task result failed") if err != nil {
l.WithError(err).Error("dispatch task failed")
continue
} else {
l.Info("dispatch task success")
break
} }
break
} }
}(fctx, t)
if err != nil { }
l.WithError(err).Error("pop worker failed")
continue
}
err = n.DispatchTask(ctx, worker, task)
if err != nil {
l.WithError(err).Error("dispatch task failed")
continue
} else {
l.Info("dispatch task success")
break
}
}
}(fctx, t)
} }
} }
} }
func (n *Node) attachKafkaConsumer(ctx context.Context, taskCh chan *odysseus.TaskContent) (sarama.ConsumerGroup, error) { func attachKafkaConsumer(ctx context.Context, taskCh chan *odysseus.TaskContent, n *Node) (sarama.ConsumerGroup, error) {
config := sarama.NewConfig() config := sarama.NewConfig()
config.Consumer.Return.Errors = true config.Consumer.Return.Errors = true
config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()} config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()}
...@@ -275,7 +318,6 @@ type Consumer struct { ...@@ -275,7 +318,6 @@ type Consumer struct {
// Setup is run at the beginning of a new session, before ConsumeClaim // Setup is run at the beginning of a new session, before ConsumeClaim
func (c *Consumer) Setup(sarama.ConsumerGroupSession) error { func (c *Consumer) Setup(sarama.ConsumerGroupSession) error {
// Mark the consumer as ready // Mark the consumer as ready
close(c.ready)
return nil return nil
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment