Commit b6f1a32a authored by vicotor's avatar vicotor

update worker manager

parent 4827c9fc
......@@ -3,8 +3,10 @@ package config
const (
NODE_MANAGER_SET = "node_manager_set"
WORKER_STATUS_PREFIX = "worker_status_"
WORKER_NONCE_KEY_PREFIX = "worker_nonce_"
WORKER_QUEUE_PREFIX = "worker_queue_"
WORKER_DEVICE_INFO_PREFIX = "worker_device_info_"
WORKER_DEVICE_STATUS_PREFIX = "worker_device_status_"
WORKER_USAGE_INFO_PREFIX = "worker_usage_info_"
WORKER_RESOURCE_INFO_PREFIX = "worker_resource_info_"
)
......@@ -33,6 +33,8 @@ func (n *NodeManagerService) ManagerList(ctx context.Context, request *omanager.
}
func (n *NodeManagerService) RegisterWorker(client omanager.NodeManagerService_RegisterWorkerServer) error {
//return n.node.wm.handleNewDial(client)
uuid := utils.GetSnowflakeId()
worker, err := n.node.wm.AddNewWorker(uuid, client)
......
......@@ -9,6 +9,7 @@ import (
omanager "github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v1"
log "github.com/sirupsen/logrus"
"math/big"
"time"
)
func (wm *WorkerManager) taskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) {
......@@ -17,13 +18,15 @@ func (wm *WorkerManager) taskResult(worker *Worker, task *odysseus.TaskContent,
return wm.computeTaskResult(worker, task, result)
case odysseus.TaskKind_StandardTask:
return wm.standardTaskResult(worker, task, result)
}
return nil, errors.New("unsupport task kind")
}
func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) {
if worker.info.nodeInfo == nil {
return nil, errors.New("unknown worker node info")
}
log.WithFields(log.Fields{
"task-id": task.TaskId,
"task-type": task.TaskType,
......@@ -77,7 +80,7 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo
paramHash := crypto.Keccak256Hash(task.TaskParam)
resultHash := crypto.Keccak256Hash(result.TaskResultBody)
dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:]))
minerPubkey, _ := utils.HexToPubkey(worker.publicKey) // todo: get miner pubkey
minerPubkey, _ := utils.HexToPubkey(worker.info.nodeInfo.MinerPubkey)
verified := ecdsa.VerifyASN1(minerPubkey, dataHash[:], result.MinerSignature)
log.WithField("minerSignatureVerify", verified).Debug("miner signature verify")
if !verified {
......@@ -88,11 +91,13 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo
receipt := wm.makeReceipt(worker, task, result, Succeed)
wm.node.PostResult(receipt)
//manager_signature = sign(hash((task_id+hash(task_param)+hash(task_result)+container_signature+miner_signature+workload))
//manager_signature = sign(hash((task_id+hash(task_param)+hash(task_result)+container_signature+miner_signature+workload+time))
now := time.Now().Unix()
paramHash := crypto.Keccak256Hash(task.TaskParam)
resultHash := crypto.Keccak256Hash(result.TaskResultBody)
dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:],
worker.ProfitAccount().Bytes(), worker.WorkerAccount().Bytes(), result.ContainerSignature, result.MinerSignature, big.NewInt(int64(task.TaskWorkload)).Bytes()))
worker.ProfitAccount().Bytes(), worker.WorkerAccount().Bytes(), result.ContainerSignature, result.MinerSignature, big.NewInt(int64(task.TaskWorkload)).Bytes()),
big.NewInt(now).Bytes())
signature, err := wm.node.Sign(dataHash[:])
if err != nil {
......@@ -104,6 +109,7 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo
proof.ProofTaskResult = &omanager.ProofTaskResult{
TaskId: result.TaskId,
ManagerSignature: signature,
Timestamp: uint64(now),
Workload: uint64(task.TaskWorkload),
ContainerPubkey: utils.CombineBytes(task.ContainerPubkey),
}
......@@ -115,6 +121,9 @@ func (wm *WorkerManager) computeTaskResult(worker *Worker, task *odysseus.TaskCo
}
func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskContent, result *omanager.SubmitTaskResult) (*omanager.ManagerMessage_ProofTaskResult, error) {
if worker.info.nodeInfo == nil {
return nil, errors.New("unknown worker node info")
}
log.WithFields(log.Fields{
"task-id": task.TaskId,
"task-type": task.TaskType,
......@@ -160,7 +169,7 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC
paramHash := crypto.Keccak256Hash(task.TaskParam)
resultHash := crypto.Keccak256Hash(result.TaskResultBody)
dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:]))
minerPubkey, _ := utils.HexToPubkey(worker.publicKey) // todo: get miner pubkey
minerPubkey, _ := utils.HexToPubkey(worker.info.nodeInfo.MinerPubkey)
verified := ecdsa.VerifyASN1(minerPubkey, dataHash[:], result.MinerSignature)
log.WithField("minerSignatureVerify", verified).Debug("miner signature verify")
if !verified {
......@@ -171,11 +180,13 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC
receipt := wm.makeReceipt(worker, task, result, Succeed)
wm.node.PostResult(receipt)
//manager_signature = sign(hash((task_id+hash(task_param)+hash(task_result)+container_signature+miner_signature+workload))
now := time.Now().Unix()
//manager_signature = sign(hash((task_id+hash(task_param)+hash(task_result)+container_signature+miner_signature+workload+time))
paramHash := crypto.Keccak256Hash(task.TaskParam)
resultHash := crypto.Keccak256Hash(result.TaskResultBody)
dataHash := crypto.Keccak256Hash(utils.CombineBytes([]byte(result.TaskId), paramHash[:], resultHash[:],
worker.ProfitAccount().Bytes(), worker.WorkerAccount().Bytes(), result.ContainerSignature, result.MinerSignature, big.NewInt(int64(task.TaskWorkload)).Bytes()))
worker.ProfitAccount().Bytes(), worker.WorkerAccount().Bytes(), result.ContainerSignature, result.MinerSignature, big.NewInt(int64(task.TaskWorkload)).Bytes()),
big.NewInt(now).Bytes())
signature, err := wm.node.Sign(dataHash[:])
if err != nil {
......@@ -187,6 +198,7 @@ func (wm *WorkerManager) standardTaskResult(worker *Worker, task *odysseus.TaskC
proof.ProofTaskResult = &omanager.ProofTaskResult{
TaskId: result.TaskId,
ManagerSignature: signature,
Timestamp: uint64(now),
Workload: uint64(task.TaskWorkload),
ContainerPubkey: utils.CombineBytes(task.ContainerPubkey),
}
......
This diff is collapsed.
......@@ -22,7 +22,7 @@ func (w workerRegistry) ServiceType() common.ServiceType {
}
func (w workerRegistry) Instance() string {
return fmt.Sprintf("%s", w.worker.addr)
return fmt.Sprintf("%s", w.worker.workerAddr)
}
func (w workerRegistry) Status() string {
......@@ -33,17 +33,18 @@ func (w workerRegistry) DetailInfo() (json.RawMessage, error) {
if w.worker == nil {
return nil, fmt.Errorf("worker is nil")
}
if w.worker.addr == "" {
if w.worker.workerAddr == "" {
return nil, fmt.Errorf("worker address is empty")
}
info := query.WorkerInfo{}
info.BenefitAddress = w.worker.benefitAddr
if w.worker.deviceInfo != nil {
info.IP = w.worker.deviceInfo.DeviceIps[0]
if w.worker.info.nodeInfo != nil {
info.BenefitAddress = w.worker.info.nodeInfo.BenefitAddress
info.IP = w.worker.info.nodeInfo.DeviceIp
}
info.ActiveNM, _ = w.wm.WorkerNmList(w.worker)
info.HearBeat = w.wm.GetHeartBeat(w.worker.uuid)
info.MinerAddress = w.worker.addr
info.MinerAddress = w.worker.workerAddr
return json.Marshal(info)
}
......@@ -3,12 +3,20 @@ package server
import (
"context"
"encoding/hex"
"errors"
"fmt"
"github.com/gomodule/redigo/redis"
"github.com/odysseus/nodemanager/config"
log "github.com/sirupsen/logrus"
"strconv"
"strings"
"time"
)
func (wm *WorkerManager) UpdateWorkerDeviceStatusInfo(worker *Worker, status []byte) {
wm.rdb.Set(context.Background(), workerDeviceStatusInfoKey(worker), status, 0)
}
func (wm *WorkerManager) UpdateWorkerUsageInfo(worker *Worker, usageInfo string) {
wm.rdb.Set(context.Background(), workerUsageInfoKey(worker), usageInfo, 0)
}
......@@ -22,24 +30,84 @@ func (wm *WorkerManager) UpdateWorkerResourceInfo(worker *Worker, resourceInfo [
wm.rdb.Set(context.Background(), workerResourceInfoKey(worker), rstr, 0)
}
func (wm *WorkerManager) UpdateWorkerNonce(worker *Worker, nonce int) error {
return wm.rdb.Set(context.Background(), workerNonceKey(worker), nonce, 0).Err()
}
func (wm *WorkerManager) GetWorkerNonce(worker *Worker) (int, error) {
if worker.workerAddr != "" {
nonceK := workerNonceKey(worker)
nonce, err := wm.rdb.Get(context.Background(), nonceK).Int()
if err == redis.ErrNil {
nonce = 1
if err = wm.rdb.Set(context.Background(), nonceK, nonce, 0).Err(); err != nil {
return 0, err
}
}
return nonce, nil
}
return 0, errors.New("unkown worker node info")
}
func (wm *WorkerManager) IncrWorkerNonce(worker *Worker) (int, error) {
nonce, err := wm.rdb.Incr(context.Background(), workerNonceKey(worker)).Uint64()
return int(nonce), err
}
func (wm *WorkerManager) AddWorkerFirst(worker *Worker) error {
log.WithField("worker", worker.addr).Info("add worker first time.")
for _, device := range worker.deviceInfo.Devices {
log.WithField("worker", worker.workerAddr).Info("add worker first time.")
wm.UpdateWorkerActive(worker)
for _, device := range worker.info.deviceInfo.Devices {
if !strings.HasPrefix(device.DeviceType, "gpu") {
continue
}
// add device to redis
priority := 0
_ = device // todo: set priority with device info.
// add worker to redis queue
if err := wm.rdb.RPush(context.Background(), config.WORKER_QUEUE_PREFIX+strconv.Itoa(priority), workerId(worker)).Err(); err != nil {
continue
}
}
// add worker to redis queue
wm.ActiveWorker(worker)
return nil
}
func (wm *WorkerManager) AddWorkerToQueue(worker *Worker) {
nonce, err := wm.GetWorkerNonce(worker)
if err != nil {
log.WithField("worker-addr", worker.workerAddr).Error("get worker nonce failed when get device info")
} else {
// if statekeys not exist, nonce don't change.
nmlist, err := wm.WorkerNmList(worker)
if err != nil {
if err == redis.ErrNil {
wm.UpdateWorkerActive(worker)
}
} else {
if len(nmlist) == 0 {
// if nmlist is empty, nonce incr.
nonce, err = wm.IncrWorkerNonce(worker)
if err != nil {
log.WithField("worker-addr", worker.workerAddr).Error("incr worker nonce failed when get device info")
}
} else {
// else if nmlist is not empty, clear and add self to it.
wm.rdb.Del(context.Background(), workerStatusKey(worker))
wm.UpdateWorkerActive(worker)
}
}
}
if err == nil {
worker.nonce = nonce
wm.AddWorkerFirst(worker)
worker.addFirstSucceed = true
}
}
func (wm *WorkerManager) AddWorkerSingle(worker *Worker) error {
log.WithField("worker", worker.addr).Info("add worker on back.")
log.WithField("worker", worker.workerAddr).Info("add worker on back.")
wm.UpdateWorkerActive(worker)
{
// add worker to redis queue
priority := 0
......@@ -48,12 +116,45 @@ func (wm *WorkerManager) AddWorkerSingle(worker *Worker) error {
}
}
// add worker to redis queue
wm.ActiveWorker(worker)
return nil
}
func (wm *WorkerManager) ActiveWorker(worker *Worker) {
wm.rdb.SAdd(context.Background(), workerStatusKey(worker), config.GetConfig().PublicEndpoint())
func (wm *WorkerManager) UpdateWorkerActive(worker *Worker) {
if !worker.online {
return
}
nonce, err := wm.GetWorkerNonce(worker)
if err != nil {
return
}
if nonce != worker.nonce {
wm.InActiveWorker(worker)
worker.nonce = nonce
}
old := worker.latestNmValue
if err := wm.activeWorker(worker); err != nil {
return
}
wm.rdb.SRem(context.Background(), workerStatusKey(worker), old)
}
func (wm *WorkerManager) activeWorker(worker *Worker) error {
split := "#"
v := fmt.Sprintf("%s%s%d", config.GetConfig().PublicEndpoint(), split, time.Now().Unix())
worker.latestNmValue = v
return wm.rdb.SAdd(context.Background(), workerStatusKey(worker), v).Err()
}
func (wm *WorkerManager) parseWorkerNmValue(nmValue string) (string, int64) {
split := "#"
strs := strings.Split(nmValue, split)
if len(strs) == 2 {
endpoint := strs[0]
timestamp, _ := strconv.ParseInt(strs[1], 10, 64)
return endpoint, timestamp
}
return "", 0
}
func (wm *WorkerManager) WorkerNmList(worker *Worker) ([]string, error) {
......@@ -61,7 +162,8 @@ func (wm *WorkerManager) WorkerNmList(worker *Worker) ([]string, error) {
}
func (wm *WorkerManager) InActiveWorker(worker *Worker) {
wm.rdb.SRem(context.Background(), workerStatusKey(worker), config.GetConfig().PublicEndpoint())
wm.rdb.SRem(context.Background(), workerStatusKey(worker), worker.latestNmValue)
if list, err := wm.rdb.SMembers(context.Background(), workerStatusKey(worker)).Result(); err == nil && len(list) == 0 {
wm.rdb.Del(context.Background(), workerStatusKey(worker))
wm.rdb.Del(context.Background(), workerUsageInfoKey(worker))
......@@ -71,15 +173,23 @@ func (wm *WorkerManager) InActiveWorker(worker *Worker) {
}
func workerResourceInfoKey(w *Worker) string {
return config.WORKER_RESOURCE_INFO_PREFIX + w.addr
return config.WORKER_RESOURCE_INFO_PREFIX + w.workerAddr
}
func workerDeviceInfoKey(w *Worker) string {
return config.WORKER_DEVICE_INFO_PREFIX + w.addr
return config.WORKER_DEVICE_INFO_PREFIX + w.workerAddr
}
func workerUsageInfoKey(w *Worker) string {
return config.WORKER_USAGE_INFO_PREFIX + w.addr
return config.WORKER_USAGE_INFO_PREFIX + w.workerAddr
}
func workerDeviceStatusInfoKey(w *Worker) string {
return config.WORKER_DEVICE_STATUS_PREFIX + w.workerAddr
}
func workerNonceKey(w *Worker) string {
return config.WORKER_NONCE_KEY_PREFIX + w.workerAddr
}
func workerStatusKey(w *Worker) string {
......@@ -88,5 +198,5 @@ func workerStatusKey(w *Worker) string {
}
func workerId(w *Worker) string {
return fmt.Sprintf("%s_%d", w.addr, w.uuid)
return fmt.Sprintf("%s_%d", w.workerAddr, w.nonce)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment