Commit b6f1a32a authored by vicotor's avatar vicotor

update worker manager

parent 4827c9fc
...@@ -3,8 +3,10 @@ package config ...@@ -3,8 +3,10 @@ package config
const ( const (
NODE_MANAGER_SET = "node_manager_set" NODE_MANAGER_SET = "node_manager_set"
WORKER_STATUS_PREFIX = "worker_status_" WORKER_STATUS_PREFIX = "worker_status_"
WORKER_NONCE_KEY_PREFIX = "worker_nonce_"
WORKER_QUEUE_PREFIX = "worker_queue_" WORKER_QUEUE_PREFIX = "worker_queue_"
WORKER_DEVICE_INFO_PREFIX = "worker_device_info_" WORKER_DEVICE_INFO_PREFIX = "worker_device_info_"
WORKER_DEVICE_STATUS_PREFIX = "worker_device_status_"
WORKER_USAGE_INFO_PREFIX = "worker_usage_info_" WORKER_USAGE_INFO_PREFIX = "worker_usage_info_"
WORKER_RESOURCE_INFO_PREFIX = "worker_resource_info_" WORKER_RESOURCE_INFO_PREFIX = "worker_resource_info_"
) )
...@@ -33,6 +33,8 @@ func (n *NodeManagerService) ManagerList(ctx context.Context, request *omanager. ...@@ -33,6 +33,8 @@ func (n *NodeManagerService) ManagerList(ctx context.Context, request *omanager.
} }
func (n *NodeManagerService) RegisterWorker(client omanager.NodeManagerService_RegisterWorkerServer) error { func (n *NodeManagerService) RegisterWorker(client omanager.NodeManagerService_RegisterWorkerServer) error {
//return n.node.wm.handleNewDial(client)
uuid := utils.GetSnowflakeId() uuid := utils.GetSnowflakeId()
worker, err := n.node.wm.AddNewWorker(uuid, client) worker, err := n.node.wm.AddNewWorker(uuid, client)
......
...@@ -9,6 +9,7 @@ import ( ...@@ -9,6 +9,7 @@ import (
omanager "github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v1" omanager "github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v1"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"math/big" "math/big"
"time"
) )
func (wm *WorkerManager) taskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) { func (wm *WorkerManager) taskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) {
...@@ -17,13 +18,15 @@ func (wm *WorkerManager) taskResult(worker *Worker, task *odysseus.TaskContent, ...@@ -17,13 +18,15 @@ func (wm *WorkerManager) taskResult(worker *Worker, task *odysseus.TaskContent,
return wm.computeTaskResult(worker, task, result) return wm.computeTaskResult(worker, task, result)
case odysseus.TaskKind_StandardTask: case odysseus.TaskKind_StandardTask:
return wm.standardTaskResult(worker, task, result) return wm.standardTaskResult(worker, task, result)
} }
return nil, errors.New("unsupport task kind") return nil, errors.New("unsupport task kind")
} }
func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) { func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) {
if worker.info.nodeInfo == nil {
return nil, errors.New("unknown worker node info")
}
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"task-id": task.TaskId, "task-id": task.TaskId,
"task-type": task.TaskType, "task-type": task.TaskType,
...@@ -77,7 +80,7 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo ...@@ -77,7 +80,7 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo
paramHash := crypto.Keccak256Hash(task.TaskParam) paramHash := crypto.Keccak256Hash(task.TaskParam)
resultHash := crypto.Keccak256Hash(result.TaskResultBody) resultHash := crypto.Keccak256Hash(result.TaskResultBody)
dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:])) dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:]))
minerPubkey, _ := utils.HexToPubkey(worker.publicKey) // todo: get miner pubkey minerPubkey, _ := utils.HexToPubkey(worker.info.nodeInfo.MinerPubkey)
verified := ecdsa.VerifyASN1(minerPubkey, dataHash[:], result.MinerSignature) verified := ecdsa.VerifyASN1(minerPubkey, dataHash[:], result.MinerSignature)
log.WithField("minerSignatureVerify", verified).Debug("miner signature verify") log.WithField("minerSignatureVerify", verified).Debug("miner signature verify")
if !verified { if !verified {
...@@ -88,11 +91,13 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo ...@@ -88,11 +91,13 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo
receipt := wm.makeReceipt(worker, task, result, Succeed) receipt := wm.makeReceipt(worker, task, result, Succeed)
wm.node.PostResult(receipt) wm.node.PostResult(receipt)
//manager_signature = sign(hash((task_id+hash(task_param)+hash(task_result)+container_signature+miner_signature+workload)) //manager_signature = sign(hash((task_id+hash(task_param)+hash(task_result)+container_signature+miner_signature+workload+time))
now := time.Now().Unix()
paramHash := crypto.Keccak256Hash(task.TaskParam) paramHash := crypto.Keccak256Hash(task.TaskParam)
resultHash := crypto.Keccak256Hash(result.TaskResultBody) resultHash := crypto.Keccak256Hash(result.TaskResultBody)
dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:], dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:],
worker.ProfitAccount().Bytes(), worker.WorkerAccount().Bytes(), result.ContainerSignature, result.MinerSignature, big.NewInt(int64(task.TaskWorkload)).Bytes())) worker.ProfitAccount().Bytes(), worker.WorkerAccount().Bytes(), result.ContainerSignature, result.MinerSignature, big.NewInt(int64(task.TaskWorkload)).Bytes()),
big.NewInt(now).Bytes())
signature, err := wm.node.Sign(dataHash[:]) signature, err := wm.node.Sign(dataHash[:])
if err != nil { if err != nil {
...@@ -104,6 +109,7 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo ...@@ -104,6 +109,7 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo
proof.ProofTaskResult = &omanager.ProofTaskResult{ proof.ProofTaskResult = &omanager.ProofTaskResult{
TaskId: result.TaskId, TaskId: result.TaskId,
ManagerSignature: signature, ManagerSignature: signature,
Timestamp: uint64(now),
Workload: uint64(task.TaskWorkload), Workload: uint64(task.TaskWorkload),
ContainerPubkey: utils.CombineBytes(task.ContainerPubkey), ContainerPubkey: utils.CombineBytes(task.ContainerPubkey),
} }
...@@ -115,6 +121,9 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo ...@@ -115,6 +121,9 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo
} }
func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) { func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) {
if worker.info.nodeInfo == nil {
return nil, errors.New("unknown worker node info")
}
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"task-id": task.TaskId, "task-id": task.TaskId,
"task-type": task.TaskType, "task-type": task.TaskType,
...@@ -160,7 +169,7 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC ...@@ -160,7 +169,7 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC
paramHash := crypto.Keccak256Hash(task.TaskParam) paramHash := crypto.Keccak256Hash(task.TaskParam)
resultHash := crypto.Keccak256Hash(result.TaskResultBody) resultHash := crypto.Keccak256Hash(result.TaskResultBody)
dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:])) dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:]))
minerPubkey, _ := utils.HexToPubkey(worker.publicKey) // todo: get miner pubkey minerPubkey, _ := utils.HexToPubkey(worker.info.nodeInfo.MinerPubkey)
verified := ecdsa.VerifyASN1(minerPubkey, dataHash[:], result.MinerSignature) verified := ecdsa.VerifyASN1(minerPubkey, dataHash[:], result.MinerSignature)
log.WithField("minerSignatureVerify", verified).Debug("miner signature verify") log.WithField("minerSignatureVerify", verified).Debug("miner signature verify")
if !verified { if !verified {
...@@ -171,11 +180,13 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC ...@@ -171,11 +180,13 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC
receipt := wm.makeReceipt(worker, task, result, Succeed) receipt := wm.makeReceipt(worker, task, result, Succeed)
wm.node.PostResult(receipt) wm.node.PostResult(receipt)
//manager_signature = sign(hash((task_id+hash(task_param)+hash(task_result)+container_signature+miner_signature+workload)) now := time.Now().Unix()
//manager_signature = sign(hash((task_id+hash(task_param)+hash(task_result)+container_signature+miner_signature+workload+time))
paramHash := crypto.Keccak256Hash(task.TaskParam) paramHash := crypto.Keccak256Hash(task.TaskParam)
resultHash := crypto.Keccak256Hash(result.TaskResultBody) resultHash := crypto.Keccak256Hash(result.TaskResultBody)
dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:], dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:],
worker.ProfitAccount().Bytes(), worker.WorkerAccount().Bytes(), result.ContainerSignature, result.MinerSignature, big.NewInt(int64(task.TaskWorkload)).Bytes())) worker.ProfitAccount().Bytes(), worker.WorkerAccount().Bytes(), result.ContainerSignature, result.MinerSignature, big.NewInt(int64(task.TaskWorkload)).Bytes()),
big.NewInt(now).Bytes())
signature, err := wm.node.Sign(dataHash[:]) signature, err := wm.node.Sign(dataHash[:])
if err != nil { if err != nil {
...@@ -187,6 +198,7 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC ...@@ -187,6 +198,7 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC
proof.ProofTaskResult = &omanager.ProofTaskResult{ proof.ProofTaskResult = &omanager.ProofTaskResult{
TaskId: result.TaskId, TaskId: result.TaskId,
ManagerSignature: signature, ManagerSignature: signature,
Timestamp: uint64(now),
Workload: uint64(task.TaskWorkload), Workload: uint64(task.TaskWorkload),
ContainerPubkey: utils.CombineBytes(task.ContainerPubkey), ContainerPubkey: utils.CombineBytes(task.ContainerPubkey),
} }
......
This diff is collapsed.
...@@ -22,7 +22,7 @@ func (w workerRegistry) ServiceType() common.ServiceType { ...@@ -22,7 +22,7 @@ func (w workerRegistry) ServiceType() common.ServiceType {
} }
func (w workerRegistry) Instance() string { func (w workerRegistry) Instance() string {
return fmt.Sprintf("%s", w.worker.addr) return fmt.Sprintf("%s", w.worker.workerAddr)
} }
func (w workerRegistry) Status() string { func (w workerRegistry) Status() string {
...@@ -33,17 +33,18 @@ func (w workerRegistry) DetailInfo() (json.RawMessage, error) { ...@@ -33,17 +33,18 @@ func (w workerRegistry) DetailInfo() (json.RawMessage, error) {
if w.worker == nil { if w.worker == nil {
return nil, fmt.Errorf("worker is nil") return nil, fmt.Errorf("worker is nil")
} }
if w.worker.addr == "" { if w.worker.workerAddr == "" {
return nil, fmt.Errorf("worker address is empty") return nil, fmt.Errorf("worker address is empty")
} }
info := query.WorkerInfo{} info := query.WorkerInfo{}
info.BenefitAddress = w.worker.benefitAddr if w.worker.info.nodeInfo != nil {
if w.worker.deviceInfo != nil { info.BenefitAddress = w.worker.info.nodeInfo.BenefitAddress
info.IP = w.worker.deviceInfo.DeviceIps[0] info.IP = w.worker.info.nodeInfo.DeviceIp
} }
info.ActiveNM, _ = w.wm.WorkerNmList(w.worker) info.ActiveNM, _ = w.wm.WorkerNmList(w.worker)
info.HearBeat = w.wm.GetHeartBeat(w.worker.uuid) info.HearBeat = w.wm.GetHeartBeat(w.worker.uuid)
info.MinerAddress = w.worker.addr info.MinerAddress = w.worker.workerAddr
return json.Marshal(info) return json.Marshal(info)
} }
...@@ -3,12 +3,20 @@ package server ...@@ -3,12 +3,20 @@ package server
import ( import (
"context" "context"
"encoding/hex" "encoding/hex"
"errors"
"fmt" "fmt"
"github.com/gomodule/redigo/redis"
"github.com/odysseus/nodemanager/config" "github.com/odysseus/nodemanager/config"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"strconv" "strconv"
"strings"
"time"
) )
func (wm *WorkerManager) UpdateWorkerDeviceStatusInfo(worker *Worker, status []byte) {
wm.rdb.Set(context.Background(), workerDeviceStatusInfoKey(worker), status, 0)
}
func (wm *WorkerManager) UpdateWorkerUsageInfo(worker *Worker, usageInfo string) { func (wm *WorkerManager) UpdateWorkerUsageInfo(worker *Worker, usageInfo string) {
wm.rdb.Set(context.Background(), workerUsageInfoKey(worker), usageInfo, 0) wm.rdb.Set(context.Background(), workerUsageInfoKey(worker), usageInfo, 0)
} }
...@@ -22,24 +30,84 @@ func (wm *WorkerManager) UpdateWorkerResourceInfo(worker *Worker, resourceInfo [ ...@@ -22,24 +30,84 @@ func (wm *WorkerManager) UpdateWorkerResourceInfo(worker *Worker, resourceInfo [
wm.rdb.Set(context.Background(), workerResourceInfoKey(worker), rstr, 0) wm.rdb.Set(context.Background(), workerResourceInfoKey(worker), rstr, 0)
} }
func (wm *WorkerManager) UpdateWorkerNonce(worker *Worker, nonce int) error {
return wm.rdb.Set(context.Background(), workerNonceKey(worker), nonce, 0).Err()
}
func (wm *WorkerManager) GetWorkerNonce(worker *Worker) (int, error) {
if worker.workerAddr != "" {
nonceK := workerNonceKey(worker)
nonce, err := wm.rdb.Get(context.Background(), nonceK).Int()
if err == redis.ErrNil {
nonce = 1
if err = wm.rdb.Set(context.Background(), nonceK, nonce, 0).Err(); err != nil {
return 0, err
}
}
return nonce, nil
}
return 0, errors.New("unkown worker node info")
}
func (wm *WorkerManager) IncrWorkerNonce(worker *Worker) (int, error) {
nonce, err := wm.rdb.Incr(context.Background(), workerNonceKey(worker)).Uint64()
return int(nonce), err
}
func (wm *WorkerManager) AddWorkerFirst(worker *Worker) error { func (wm *WorkerManager) AddWorkerFirst(worker *Worker) error {
log.WithField("worker", worker.addr).Info("add worker first time.") log.WithField("worker", worker.workerAddr).Info("add worker first time.")
for _, device := range worker.deviceInfo.Devices { wm.UpdateWorkerActive(worker)
for _, device := range worker.info.deviceInfo.Devices {
if !strings.HasPrefix(device.DeviceType, "gpu") {
continue
}
// add device to redis // add device to redis
priority := 0 priority := 0
_ = device // todo: set priority with device info. _ = device // todo: set priority with device info.
// add worker to redis queue
if err := wm.rdb.RPush(context.Background(), config.WORKER_QUEUE_PREFIX+strconv.Itoa(priority), workerId(worker)).Err(); err != nil { if err := wm.rdb.RPush(context.Background(), config.WORKER_QUEUE_PREFIX+strconv.Itoa(priority), workerId(worker)).Err(); err != nil {
continue continue
} }
} }
// add worker to redis queue
wm.ActiveWorker(worker)
return nil return nil
} }
func (wm *WorkerManager) AddWorkerToQueue(worker *Worker) {
nonce, err := wm.GetWorkerNonce(worker)
if err != nil {
log.WithField("worker-addr", worker.workerAddr).Error("get worker nonce failed when get device info")
} else {
// if statekeys not exist, nonce don't change.
nmlist, err := wm.WorkerNmList(worker)
if err != nil {
if err == redis.ErrNil {
wm.UpdateWorkerActive(worker)
}
} else {
if len(nmlist) == 0 {
// if nmlist is empty, nonce incr.
nonce, err = wm.IncrWorkerNonce(worker)
if err != nil {
log.WithField("worker-addr", worker.workerAddr).Error("incr worker nonce failed when get device info")
}
} else {
// else if nmlist is not empty, clear and add self to it.
wm.rdb.Del(context.Background(), workerStatusKey(worker))
wm.UpdateWorkerActive(worker)
}
}
}
if err == nil {
worker.nonce = nonce
wm.AddWorkerFirst(worker)
worker.addFirstSucceed = true
}
}
func (wm *WorkerManager) AddWorkerSingle(worker *Worker) error { func (wm *WorkerManager) AddWorkerSingle(worker *Worker) error {
log.WithField("worker", worker.addr).Info("add worker on back.") log.WithField("worker", worker.workerAddr).Info("add worker on back.")
wm.UpdateWorkerActive(worker)
{ {
// add worker to redis queue // add worker to redis queue
priority := 0 priority := 0
...@@ -48,12 +116,45 @@ func (wm *WorkerManager) AddWorkerSingle(worker *Worker) error { ...@@ -48,12 +116,45 @@ func (wm *WorkerManager) AddWorkerSingle(worker *Worker) error {
} }
} }
// add worker to redis queue // add worker to redis queue
wm.ActiveWorker(worker)
return nil return nil
} }
func (wm *WorkerManager) ActiveWorker(worker *Worker) { func (wm *WorkerManager) UpdateWorkerActive(worker *Worker) {
wm.rdb.SAdd(context.Background(), workerStatusKey(worker), config.GetConfig().PublicEndpoint()) if !worker.online {
return
}
nonce, err := wm.GetWorkerNonce(worker)
if err != nil {
return
}
if nonce != worker.nonce {
wm.InActiveWorker(worker)
worker.nonce = nonce
}
old := worker.latestNmValue
if err := wm.activeWorker(worker); err != nil {
return
}
wm.rdb.SRem(context.Background(), workerStatusKey(worker), old)
}
func (wm *WorkerManager) activeWorker(worker *Worker) error {
split := "#"
v := fmt.Sprintf("%s%s%d", config.GetConfig().PublicEndpoint(), split, time.Now().Unix())
worker.latestNmValue = v
return wm.rdb.SAdd(context.Background(), workerStatusKey(worker), v).Err()
}
func (wm *WorkerManager) parseWorkerNmValue(nmValue string) (string, int64) {
split := "#"
strs := strings.Split(nmValue, split)
if len(strs) == 2 {
endpoint := strs[0]
timestamp, _ := strconv.ParseInt(strs[1], 10, 64)
return endpoint, timestamp
}
return "", 0
} }
func (wm *WorkerManager) WorkerNmList(worker *Worker) ([]string, error) { func (wm *WorkerManager) WorkerNmList(worker *Worker) ([]string, error) {
...@@ -61,7 +162,8 @@ func (wm *WorkerManager) WorkerNmList(worker *Worker) ([]string, error) { ...@@ -61,7 +162,8 @@ func (wm *WorkerManager) WorkerNmList(worker *Worker) ([]string, error) {
} }
func (wm *WorkerManager) InActiveWorker(worker *Worker) { func (wm *WorkerManager) InActiveWorker(worker *Worker) {
wm.rdb.SRem(context.Background(), workerStatusKey(worker), config.GetConfig().PublicEndpoint()) wm.rdb.SRem(context.Background(), workerStatusKey(worker), worker.latestNmValue)
if list, err := wm.rdb.SMembers(context.Background(), workerStatusKey(worker)).Result(); err == nil && len(list) == 0 { if list, err := wm.rdb.SMembers(context.Background(), workerStatusKey(worker)).Result(); err == nil && len(list) == 0 {
wm.rdb.Del(context.Background(), workerStatusKey(worker)) wm.rdb.Del(context.Background(), workerStatusKey(worker))
wm.rdb.Del(context.Background(), workerUsageInfoKey(worker)) wm.rdb.Del(context.Background(), workerUsageInfoKey(worker))
...@@ -71,15 +173,23 @@ func (wm *WorkerManager) InActiveWorker(worker *Worker) { ...@@ -71,15 +173,23 @@ func (wm *WorkerManager) InActiveWorker(worker *Worker) {
} }
func workerResourceInfoKey(w *Worker) string { func workerResourceInfoKey(w *Worker) string {
return config.WORKER_RESOURCE_INFO_PREFIX + w.addr return config.WORKER_RESOURCE_INFO_PREFIX + w.workerAddr
} }
func workerDeviceInfoKey(w *Worker) string { func workerDeviceInfoKey(w *Worker) string {
return config.WORKER_DEVICE_INFO_PREFIX + w.addr return config.WORKER_DEVICE_INFO_PREFIX + w.workerAddr
} }
func workerUsageInfoKey(w *Worker) string { func workerUsageInfoKey(w *Worker) string {
return config.WORKER_USAGE_INFO_PREFIX + w.addr return config.WORKER_USAGE_INFO_PREFIX + w.workerAddr
}
func workerDeviceStatusInfoKey(w *Worker) string {
return config.WORKER_DEVICE_STATUS_PREFIX + w.workerAddr
}
func workerNonceKey(w *Worker) string {
return config.WORKER_NONCE_KEY_PREFIX + w.workerAddr
} }
func workerStatusKey(w *Worker) string { func workerStatusKey(w *Worker) string {
...@@ -88,5 +198,5 @@ func workerStatusKey(w *Worker) string { ...@@ -88,5 +198,5 @@ func workerStatusKey(w *Worker) string {
} }
func workerId(w *Worker) string { func workerId(w *Worker) string {
return fmt.Sprintf("%s_%d", w.addr, w.uuid) return fmt.Sprintf("%s_%d", w.workerAddr, w.nonce)
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment