Commit b1a9a5c9 authored by duanjinfei's avatar duanjinfei

monitor nm heart status

parent e753810a
......@@ -2,7 +2,7 @@
"nm_seed": "43.198.252.255:10001",
"api_url": "https://console.aigic.ai/api/task/taskheat",
"node_manager_num": 1,
"heart_response": 30,
"heart_response": 60,
"task_validator_time": 1,
"container_num": 1,
"chain_id": 100,
......
package nm
import (
"example.com/m/log"
"example.com/m/models"
"example.com/m/operate"
nodeManagerV1 "github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v1"
......@@ -75,6 +76,7 @@ func inputNodeManagerChan(manager *NodeManager, nodeManagerClient *models.NodeMa
}
serviceClient := operate.ConnNmGrpc(manager.Info.Endpoint)
if serviceClient == nil {
log.WithField("endPoint", manager.Info.Endpoint).Error("Connect node manager failed")
return false
}
nodeManagerClient.Status = true
......
......@@ -195,11 +195,13 @@ func monitorWorker(op *operate.DockerOp) {
log.Info("------------------------Start rev msg worker thread------------------------")
for {
//if (time.Now().UnixMilli()-nodeManager.GetLastHeartTime())/conf.GetConfig().HeartRespTimeMillis > conf.GetConfig().HeartRespTimeSecond {
// nodeManager.UpdateStatus(false)
// log.Error("Node manager heartbeat is over")
// return
//}
if (time.Now().UnixMilli()-nodeManager.GetLastHeartTime())/conf.GetConfig().HeartRespTimeMillis > conf.GetConfig().HeartRespTimeSecond {
if nodeManager.Status {
nodeManager.UpdateStatus(false)
}
log.Error("Node manager heartbeat is over")
return
}
rev, err := worker.Recv()
if err == io.EOF {
log.Errorf("Node manage not work endpoint:%s", nodeManager.Endpoint)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment