Commit 470a1620 authored by vicotor's avatar vicotor

fix bug for kafka

parent 5af25f2f
......@@ -31,6 +31,8 @@ type Node struct {
wg sync.WaitGroup
status string
reg *registry.Registry
receiptCh chan *odysseus.TaskReceipt
}
func (n *Node) ServiceType() common.ServiceType {
......@@ -66,13 +68,12 @@ func NewNode() *Node {
DbName: dbconf.DbName,
})
node := &Node{
cache: pay,
rdb: rdb,
quit: make(chan struct{}),
conf: config.GetConfig(),
cache: pay,
rdb: rdb,
quit: make(chan struct{}),
conf: config.GetConfig(),
receiptCh: make(chan *odysseus.TaskReceipt, 100000),
}
brokers := strings.Split(node.conf.Kafka.Brokers, ";")
node.kafkaProducer, _ = utils.NewKafkaProducer(brokers)
node.status = "before running"
node.register()
......@@ -81,12 +82,44 @@ func NewNode() *Node {
func (n *Node) Start() error {
n.status = "running"
go n.startProducer()
if n.reg != nil {
go n.reg.Start()
}
return n.startAllTask()
}
func (n *Node) startProducer() {
brokers := strings.Split(n.conf.Kafka.Brokers, ";")
running := true
for {
producer, err := utils.NewKafkaProducer(brokers)
if err != nil {
log.WithError(err).Error("create kafka producer failed")
time.Sleep(time.Second)
continue
}
n.kafkaProducer = producer
for running {
select {
case <-n.quit:
return
case receipt, ok := <-n.receiptCh:
if !ok {
return
}
if err := utils.FireTaskReceipt(n.kafkaProducer, receipt, config.GetConfig().Kafka.ReceiptTopic); err != nil {
log.WithError(err).WithField("task-id", receipt.TaskId).Error("fire task receipt failed")
} else {
if err == sarama.ErrClosedClient {
running = false
}
}
}
}
}
}
func (n *Node) Stop() {
close(n.quit)
n.wg.Wait()
......@@ -113,118 +146,128 @@ func (n *Node) register() {
n.reg = reg
}
func (n *Node) postResult(task *odysseus.TaskContent, result *odysseus.TaskResponse) error {
d, _ := proto.Marshal(result)
err := utils.Post(task.TaskCallback, d)
if err != nil {
log.WithError(err).Error("post task result failed")
} else {
log.WithField("taskid", task.TaskId).Debug("post task result")
}
uid, _ := strconv.ParseInt(task.TaskUid, 10, 64)
fee, _ := strconv.ParseInt(task.TaskFee, 10, 64)
n.cache.RollbackForFee(uid, fee)
return err
}
func (n *Node) postReceipt(task *odysseus.TaskContent, result *odysseus.TaskResponse, err error) error {
receipt := new(odysseus.TaskReceipt)
receipt.TaskId = task.TaskId
receipt.TaskTimestamp = task.TaskTimestamp
receipt.TaskType = task.TaskType
receipt.TaskUid = task.TaskUid
receipt.TaskWorkload = task.TaskWorkload
receipt.TaskDuration = (time.Now().UnixNano() - int64(task.TaskTimestamp)) / 1000
receipt.TaskFee = 0
receipt.TaskOutLen = 0
receipt.TaskExecuteDuration = 0
receipt.TaskProfitAccount = ""
receipt.TaskWorkerAccount = ""
switch err {
case ErrNoWorker, ErrTimeout:
receipt.TaskResult = err.Error()
case ErrDispatchFailed:
receipt.TaskResult = err.Error()
default:
receipt.TaskResult = "internal error"
}
n.receiptCh <- receipt
return nil
}
func (n *Node) Loop(idx int) {
defer n.wg.Done()
defer log.WithField("routine", idx).Info("node loop routine exit")
var (
err error
running bool = true
client sarama.ConsumerGroup
)
// monitor kafka
taskCh := make(chan *odysseus.TaskContent, 1000)
ctx, cancel := context.WithCancel(context.Background())
var client sarama.ConsumerGroup
var err error
for {
client, err = n.attachKafkaConsumer(ctx, taskCh)
client, err = attachKafkaConsumer(ctx, taskCh, n)
if err != nil {
log.WithError(err).Error("attach kafka consumer failed try again later")
time.Sleep(time.Second)
n.status = "attach kafka failed"
continue
}
break
}
log.WithField("routine", idx).Info("attach kafka consumer success")
log.WithField("routine", idx).Info("attach kafka consumer success")
n.status = "running"
for running {
postResult := func(task *odysseus.TaskContent, result *odysseus.TaskResponse) error {
d, _ := proto.Marshal(result)
err := utils.Post(task.TaskCallback, d)
if err != nil {
log.WithError(err).Error("post task result failed")
} else {
log.WithField("taskid", task.TaskId).Debug("post task result")
}
uid, _ := strconv.ParseInt(task.TaskUid, 10, 64)
fee, _ := strconv.ParseInt(task.TaskFee, 10, 64)
n.cache.RollbackForFee(uid, fee)
return err
}
postReceipt := func(task *odysseus.TaskContent, result *odysseus.TaskResponse, err error) error {
receipt := new(odysseus.TaskReceipt)
receipt.TaskId = task.TaskId
receipt.TaskTimestamp = task.TaskTimestamp
receipt.TaskType = task.TaskType
receipt.TaskUid = task.TaskUid
receipt.TaskWorkload = task.TaskWorkload
receipt.TaskDuration = (time.Now().UnixNano() - int64(task.TaskTimestamp)) / 1000
receipt.TaskFee = 0
receipt.TaskOutLen = 0
receipt.TaskExecuteDuration = 0
receipt.TaskProfitAccount = ""
receipt.TaskWorkerAccount = ""
switch err {
case ErrNoWorker, ErrTimeout:
receipt.TaskResult = err.Error()
case ErrDispatchFailed:
receipt.TaskResult = err.Error()
default:
receipt.TaskResult = "internal error"
}
return utils.FireTaskReceipt(n.kafkaProducer, receipt, config.GetConfig().Kafka.ReceiptTopic)
}
select {
case clientErr := <-client.Errors():
log.WithError(clientErr).Error("kafka consumer error")
if clientErr == sarama.ErrClosedClient {
running = false
}
case <-n.quit:
cancel()
client.Close()
return
for {
select {
case <-n.quit:
cancel()
client.Close()
return
case t := <-taskCh:
fctx, _ := context.WithTimeout(context.Background(), time.Second*time.Duration(config.GetConfig().DispatchTimeout))
go func(ctx context.Context, task *odysseus.TaskContent) {
l := log.WithField("task-id", task.TaskId)
l.WithField("task", task).Info("get task")
// todo: add parameter for re-dispatch count.
for {
worker, err := PopWorker(ctx, n.rdb)
if err == ErrNoWorker || err == ErrTimeout {
result := &odysseus.TaskResponse{
TaskId: task.TaskId,
TaskUid: task.TaskUid,
TaskFee: task.TaskFee,
TaskIsSucceed: false,
TaskError: err.Error(),
case t := <-taskCh:
fctx, _ := context.WithTimeout(context.Background(), time.Second*time.Duration(config.GetConfig().DispatchTimeout))
go func(ctx context.Context, task *odysseus.TaskContent) {
l := log.WithField("task-id", task.TaskId)
l.WithField("task", task).Info("get task")
for {
worker, err := PopWorker(ctx, n.rdb)
if err == ErrNoWorker || err == ErrTimeout {
result := &odysseus.TaskResponse{
TaskId: task.TaskId,
TaskUid: task.TaskUid,
TaskFee: task.TaskFee,
TaskIsSucceed: false,
TaskError: err.Error(),
}
l.WithError(err).Error("pop worker failed")
if e := n.postReceipt(task, result, err); e != nil {
l.WithError(e).Error("post task receipt failed")
}
if e := n.postResult(task, result); e != nil {
l.WithError(e).Error("post task result failed")
}
break
}
l.WithError(err).Error("pop worker failed")
if e := postReceipt(task, result, err); e != nil {
l.WithError(e).Error("post task receipt failed")
if err != nil {
l.WithError(err).Error("pop worker failed")
continue
}
if e := postResult(task, result); e != nil {
l.WithError(e).Error("post task result failed")
err = n.DispatchTask(ctx, worker, task)
if err != nil {
l.WithError(err).Error("dispatch task failed")
continue
} else {
l.Info("dispatch task success")
break
}
break
}
if err != nil {
l.WithError(err).Error("pop worker failed")
continue
}
err = n.DispatchTask(ctx, worker, task)
if err != nil {
l.WithError(err).Error("dispatch task failed")
continue
} else {
l.Info("dispatch task success")
break
}
}
}(fctx, t)
}(fctx, t)
}
}
}
}
func (n *Node) attachKafkaConsumer(ctx context.Context, taskCh chan *odysseus.TaskContent) (sarama.ConsumerGroup, error) {
func attachKafkaConsumer(ctx context.Context, taskCh chan *odysseus.TaskContent, n *Node) (sarama.ConsumerGroup, error) {
config := sarama.NewConfig()
config.Consumer.Return.Errors = true
config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()}
......@@ -275,7 +318,6 @@ type Consumer struct {
// Setup is run at the beginning of a new session, before ConsumeClaim
func (c *Consumer) Setup(sarama.ConsumerGroupSession) error {
// Mark the consumer as ready
close(c.ready)
return nil
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment