Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
N
nodemanager
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Odysseus
nodemanager
Commits
f3f5c792
Commit
f3f5c792
authored
May 29, 2024
by
vicotor
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update protocol
parent
14bc5d34
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
153 additions
and
174 deletions
+153
-174
dispatchTask.go
server/dispatchTask.go
+1
-1
service.go
server/service.go
+0
-8
workerManager.go
server/workerManager.go
+44
-34
worker_registry.go
server/worker_registry.go
+1
-1
workerstatu.go
server/workerstatu.go
+107
-130
No files found.
server/dispatchTask.go
View file @
f3f5c792
...
@@ -115,7 +115,7 @@ func (d *dispatchTask) finalize(wm *WorkerManager) {
...
@@ -115,7 +115,7 @@ func (d *dispatchTask) finalize(wm *WorkerManager) {
task
:=
d
.
task
task
:=
d
.
task
if
task
.
TaskKind
!=
odysseus
.
TaskKind_StandardTask
&&
d
.
worker
.
online
==
true
{
if
task
.
TaskKind
!=
odysseus
.
TaskKind_StandardTask
&&
d
.
worker
.
online
==
true
{
_
=
wm
.
AddWorkerSingle
(
d
.
worker
)
}
}
_
,
err
:=
wm
.
taskResult
(
d
.
worker
,
task
,
result
)
_
,
err
:=
wm
.
taskResult
(
d
.
worker
,
task
,
result
)
...
...
server/service.go
View file @
f3f5c792
...
@@ -6,7 +6,6 @@ import (
...
@@ -6,7 +6,6 @@ import (
"github.com/odysseus/nodemanager/utils"
"github.com/odysseus/nodemanager/utils"
omanager
"github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v2"
omanager
"github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v2"
log
"github.com/sirupsen/logrus"
log
"github.com/sirupsen/logrus"
"strconv"
"strings"
"strings"
)
)
...
@@ -64,13 +63,6 @@ func (n *NodeManagerService) DispatchTask(ctx context.Context, request *omanager
...
@@ -64,13 +63,6 @@ func (n *NodeManagerService) DispatchTask(ctx context.Context, request *omanager
if
worker
.
online
==
false
{
if
worker
.
online
==
false
{
return
nil
,
errors
.
New
(
"worker offline"
)
return
nil
,
errors
.
New
(
"worker offline"
)
}
}
{
nonceds
:=
strings
.
Split
(
mids
[
1
],
":"
)
nonce
,
_
:=
strconv
.
ParseInt
(
nonceds
[
0
],
10
,
64
)
if
nonce
<
int64
(
worker
.
nonce
)
{
return
nil
,
errors
.
New
(
"expired worker nonce"
)
}
}
dtask
:=
newDispatchTask
(
worker
,
request
.
TaskData
)
dtask
:=
newDispatchTask
(
worker
,
request
.
TaskData
)
...
...
server/workerManager.go
View file @
f3f5c792
...
@@ -3,7 +3,6 @@ package server
...
@@ -3,7 +3,6 @@ package server
import
(
import
(
"bytes"
"bytes"
"encoding/hex"
"encoding/hex"
"encoding/json"
"errors"
"errors"
"fmt"
"fmt"
"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/proto"
...
@@ -285,15 +284,17 @@ func (wm *WorkerManager) manageWorker(worker *Worker) error {
...
@@ -285,15 +284,17 @@ func (wm *WorkerManager) manageWorker(worker *Worker) error {
case
<-
workerCheckTicker
.
C
:
case
<-
workerCheckTicker
.
C
:
if
worker
.
info
.
nodeInfo
!=
nil
{
if
worker
.
info
.
nodeInfo
!=
nil
{
//nodeinfoTicker.Reset(time.Hour * 24
)
nodeinfoTicker
.
Reset
(
time
.
Minute
*
30
)
}
}
if
worker
.
usage
.
hwUsage
!=
nil
{
if
worker
.
usage
.
hwUsage
!=
nil
{
deviceUsageTicker
.
Reset
(
time
.
Second
*
time
.
Duration
(
tickerConf
.
DeviceUsageTicker
))
deviceUsageTicker
.
Reset
(
time
.
Second
*
time
.
Duration
(
tickerConf
.
DeviceUsageTicker
))
}
}
if
worker
.
registed
&&
worker
.
addFirstSucceed
==
false
&&
len
(
worker
.
deviceInfoHash
)
>
0
{
if
worker
.
registed
&&
worker
.
addFirstSucceed
==
false
{
wm
.
AddWorkerToQueue
(
worker
)
if
err
:=
wm
.
AddWorker
(
worker
);
err
==
nil
{
worker
.
addFirstSucceed
=
true
}
}
}
wm
.
UpdateWorkerActive
(
worker
)
wm
.
UpdateWorkerActive
(
worker
)
...
@@ -477,6 +478,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -477,6 +478,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
"heartBeat"
:
time
.
Now
()
.
Unix
()
-
int64
(
msg
.
HeartbeatResponse
.
Timestamp
),
"heartBeat"
:
time
.
Now
()
.
Unix
()
-
int64
(
msg
.
HeartbeatResponse
.
Timestamp
),
})
.
Debug
(
"receive worker heartbeat"
)
})
.
Debug
(
"receive worker heartbeat"
)
case
*
omanager
.
WorkerMessage_NodeInfo
:
case
*
omanager
.
WorkerMessage_NodeInfo
:
// todo: remove this message.
nodeinfo
:=
msg
.
NodeInfo
nodeinfo
:=
msg
.
NodeInfo
log
.
WithField
(
"worker-addr"
,
worker
.
workerAddr
)
.
Debugf
(
"receive worker node info:%v"
,
nodeinfo
)
log
.
WithField
(
"worker-addr"
,
worker
.
workerAddr
)
.
Debugf
(
"receive worker node info:%v"
,
nodeinfo
)
if
nodeinfo
.
Hardware
!=
nil
&&
nodeinfo
.
Hardware
.
NET
!=
nil
{
if
nodeinfo
.
Hardware
!=
nil
&&
nodeinfo
.
Hardware
.
NET
!=
nil
{
...
@@ -566,29 +568,15 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -566,29 +568,15 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
// ignore the info.
// ignore the info.
continue
continue
}
}
// todo: verify signature
{
{
var
infoHash
[
32
]
byte
var
infoHash
[
32
]
byte
infoData
,
err
:=
json
.
Marshal
(
msg
.
DeviceInfo
)
infoHash
=
sha3
.
Sum256
([]
byte
(
msg
.
DeviceInfo
.
String
()))
if
err
!=
nil
{
// update local cache.
l
.
WithFields
(
log
.
Fields
{
"worker-addr"
:
worker
.
workerAddr
,
"error"
:
err
,
})
.
Error
(
"marshal device info failed"
)
}
if
len
(
infoData
)
==
0
{
continue
}
infoHash
=
sha3
.
Sum256
(
infoData
)
worker
.
info
.
nodeInfo
.
Hardware
=
msg
.
DeviceInfo
.
Hardware
worker
.
info
.
nodeInfo
.
Hardware
=
msg
.
DeviceInfo
.
Hardware
if
worker
.
registed
&&
worker
.
addFirstSucceed
==
false
{
wm
.
AddWorkerToQueue
(
worker
)
}
// check device info changed, and update to cache.
// check device info changed, and update to cache.
if
bytes
.
Compare
(
infoHash
[
:
],
worker
.
deviceInfoHash
)
!=
0
{
if
bytes
.
Compare
(
infoHash
[
:
],
worker
.
deviceInfoHash
)
!=
0
{
wm
.
UpdateWorkerDeviceInfo
(
worker
,
string
(
infoData
)
)
wm
.
UpdateWorkerDeviceInfo
(
worker
,
msg
.
DeviceInfo
)
}
}
worker
.
deviceInfoHash
=
infoHash
[
:
]
worker
.
deviceInfoHash
=
infoHash
[
:
]
}
}
...
@@ -597,8 +585,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -597,8 +585,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
if
!
worker
.
registed
{
if
!
worker
.
registed
{
continue
continue
}
}
usageData
,
_
:=
json
.
Marshal
(
msg
.
DeviceUsage
)
wm
.
UpdateWorkerUsageInfo
(
worker
,
msg
.
DeviceUsage
)
wm
.
UpdateWorkerUsageInfo
(
worker
,
string
(
usageData
))
worker
.
usage
.
hwUsage
=
msg
.
DeviceUsage
.
Usage
worker
.
usage
.
hwUsage
=
msg
.
DeviceUsage
.
Usage
l
.
WithFields
(
log
.
Fields
{
l
.
WithFields
(
log
.
Fields
{
...
@@ -613,6 +600,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -613,6 +600,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
"worker-addr"
:
worker
.
workerAddr
,
"worker-addr"
:
worker
.
workerAddr
,
"model count"
:
len
(
msg
.
AddModelRunning
.
Models
),
"model count"
:
len
(
msg
.
AddModelRunning
.
Models
),
})
.
Debugf
(
"receive worker add model running:%v"
,
msg
.
AddModelRunning
.
Models
)
})
.
Debugf
(
"receive worker add model running:%v"
,
msg
.
AddModelRunning
.
Models
)
// todo: add worker running model.
case
*
omanager
.
WorkerMessage_DelModeRunning
:
case
*
omanager
.
WorkerMessage_DelModeRunning
:
if
!
worker
.
registed
{
if
!
worker
.
registed
{
...
@@ -622,6 +610,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -622,6 +610,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
"worker-addr"
:
worker
.
workerAddr
,
"worker-addr"
:
worker
.
workerAddr
,
"model count"
:
len
(
msg
.
DelModeRunning
.
ModelIds
),
"model count"
:
len
(
msg
.
DelModeRunning
.
ModelIds
),
})
.
Debugf
(
"receive worker del model running:%v"
,
msg
.
DelModeRunning
.
ModelIds
)
})
.
Debugf
(
"receive worker del model running:%v"
,
msg
.
DelModeRunning
.
ModelIds
)
// todo: del worker running model with model_id.
case
*
omanager
.
WorkerMessage_AddModelInstalled
:
case
*
omanager
.
WorkerMessage_AddModelInstalled
:
if
!
worker
.
registed
{
if
!
worker
.
registed
{
...
@@ -631,6 +620,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -631,6 +620,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
"worker-addr"
:
worker
.
workerAddr
,
"worker-addr"
:
worker
.
workerAddr
,
"model count"
:
len
(
msg
.
AddModelInstalled
.
Models
),
"model count"
:
len
(
msg
.
AddModelInstalled
.
Models
),
})
.
Debugf
(
"receive worker add model installed:%v"
,
msg
.
AddModelInstalled
.
Models
)
})
.
Debugf
(
"receive worker add model installed:%v"
,
msg
.
AddModelInstalled
.
Models
)
// todo: add worker installed model with model_id.
case
*
omanager
.
WorkerMessage_DelModelInstalled
:
case
*
omanager
.
WorkerMessage_DelModelInstalled
:
if
!
worker
.
registed
{
if
!
worker
.
registed
{
...
@@ -640,6 +630,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -640,6 +630,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
"worker-addr"
:
worker
.
workerAddr
,
"worker-addr"
:
worker
.
workerAddr
,
"model count"
:
len
(
msg
.
DelModelInstalled
.
ModelIds
),
"model count"
:
len
(
msg
.
DelModelInstalled
.
ModelIds
),
})
.
Debugf
(
"receive worker del model installed:%v"
,
msg
.
DelModelInstalled
.
ModelIds
)
})
.
Debugf
(
"receive worker del model installed:%v"
,
msg
.
DelModelInstalled
.
ModelIds
)
// todo: del worker installed model with model_id.
case
*
omanager
.
WorkerMessage_InstalledModelStatus
:
case
*
omanager
.
WorkerMessage_InstalledModelStatus
:
if
!
worker
.
registed
{
if
!
worker
.
registed
{
...
@@ -650,6 +641,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -650,6 +641,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
"model"
:
len
(
msg
.
InstalledModelStatus
.
ModelId
),
"model"
:
len
(
msg
.
InstalledModelStatus
.
ModelId
),
"type"
:
"status"
,
"type"
:
"status"
,
})
.
Debugf
(
"receive worker installed model status:%v"
,
msg
.
InstalledModelStatus
)
})
.
Debugf
(
"receive worker installed model status:%v"
,
msg
.
InstalledModelStatus
)
// todo: update worker installed model status.
case
*
omanager
.
WorkerMessage_RunningModelStatus
:
case
*
omanager
.
WorkerMessage_RunningModelStatus
:
if
!
worker
.
registed
{
if
!
worker
.
registed
{
...
@@ -660,6 +652,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -660,6 +652,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
"model"
:
len
(
msg
.
RunningModelStatus
.
ModelId
),
"model"
:
len
(
msg
.
RunningModelStatus
.
ModelId
),
"type"
:
"status"
,
"type"
:
"status"
,
})
.
Debugf
(
"receive worker running model status:%v"
,
msg
.
RunningModelStatus
)
})
.
Debugf
(
"receive worker running model status:%v"
,
msg
.
RunningModelStatus
)
// todo: update worker running model status.
case
*
omanager
.
WorkerMessage_GpuUsage
:
case
*
omanager
.
WorkerMessage_GpuUsage
:
if
!
worker
.
registed
{
if
!
worker
.
registed
{
...
@@ -669,15 +662,17 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -669,15 +662,17 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
"worker-addr"
:
worker
.
workerAddr
,
"worker-addr"
:
worker
.
workerAddr
,
"usage count"
:
len
(
msg
.
GpuUsage
.
Usages
),
"usage count"
:
len
(
msg
.
GpuUsage
.
Usages
),
})
.
Debugf
(
"receive worker gpu usage:%v"
,
msg
.
GpuUsage
.
Usages
)
})
.
Debugf
(
"receive worker gpu usage:%v"
,
msg
.
GpuUsage
.
Usages
)
// todo: update worker gpu usage info.
case
*
omanager
.
WorkerMessage_RegisteMessage
:
case
*
omanager
.
WorkerMessage_RegisteMessage
:
// 1. do some verify.
if
worker
.
registed
{
if
worker
.
registed
{
continue
continue
}
}
l
.
WithFields
(
log
.
Fields
{
l
.
WithFields
(
log
.
Fields
{
"worker-addr"
:
worker
.
workerAddr
,
"worker-addr"
:
worker
.
workerAddr
,
})
.
Debug
(
"receive registed message"
)
})
.
Debug
(
"receive registed message"
)
//
todo: verify signature
//
2. check signature.
info
:=
msg
.
RegisteMessage
.
Info
info
:=
msg
.
RegisteMessage
.
Info
{
{
...
@@ -695,6 +690,9 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -695,6 +690,9 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
worker
.
quit
<-
ErrInvalidMessageValue
worker
.
quit
<-
ErrInvalidMessageValue
return
return
}
}
}
// 3. check timestamp not expired.
if
time
.
Now
()
.
Unix
()
-
int64
(
msg
.
RegisteMessage
.
Timestamp
)
>
config
.
GetConfig
()
.
GetWorkerSignatureExpiredTime
()
{
if
time
.
Now
()
.
Unix
()
-
int64
(
msg
.
RegisteMessage
.
Timestamp
)
>
config
.
GetConfig
()
.
GetWorkerSignatureExpiredTime
()
{
l
.
WithFields
(
log
.
Fields
{
l
.
WithFields
(
log
.
Fields
{
"worker-addr"
:
worker
.
workerAddr
,
"worker-addr"
:
worker
.
workerAddr
,
...
@@ -702,13 +700,17 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -702,13 +700,17 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
worker
.
quit
<-
ErrExpiredMsgSignature
worker
.
quit
<-
ErrExpiredMsgSignature
return
return
}
}
}
// 4. replace old connection.
if
pubkey
,
err
:=
utils
.
HexToPubkey
(
info
.
MinerPubkey
);
err
!=
nil
{
if
pubkey
,
err
:=
utils
.
HexToPubkey
(
info
.
MinerPubkey
);
err
!=
nil
{
l
.
WithFields
(
log
.
Fields
{
l
.
WithFields
(
log
.
Fields
{
"worker-addr"
:
worker
.
workerAddr
,
"worker-addr"
:
worker
.
workerAddr
,
"error"
:
err
,
"error"
:
err
,
})
.
Error
(
"parse pubkey failed"
)
})
.
Error
(
"parse pubkey failed"
)
worker
.
quit
<-
ErrInvalidMsgSignature
return
}
else
{
}
else
{
addr
:=
utils
.
PubkeyToAddress
(
pubkey
)
addr
:=
utils
.
PubkeyToAddress
(
pubkey
)
if
old
:=
wm
.
GetWorkerByAddr
(
addr
);
old
!=
nil
{
if
old
:=
wm
.
GetWorkerByAddr
(
addr
);
old
!=
nil
{
...
@@ -721,6 +723,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -721,6 +723,7 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
worker
.
workerAddr
=
addr
worker
.
workerAddr
=
addr
}
}
worker
.
registed
=
true
worker
.
registed
=
true
// 5. check ip address.
matched
,
err
:=
regexp
.
MatchString
(
"((2(5[0-5]|[0-4]
\\
d))|[0-1]?
\\
d{1,2})(
\\
.((2(5[0-5]|[0-4]
\\
d))|[0-1]?
\\
d{1,2})){3}"
,
matched
,
err
:=
regexp
.
MatchString
(
"((2(5[0-5]|[0-4]
\\
d))|[0-1]?
\\
d{1,2})(
\\
.((2(5[0-5]|[0-4]
\\
d))|[0-1]?
\\
d{1,2})){3}"
,
msg
.
RegisteMessage
.
Hardware
.
NET
.
Ip
)
msg
.
RegisteMessage
.
Hardware
.
NET
.
Ip
)
if
err
!=
nil
{
if
err
!=
nil
{
...
@@ -735,14 +738,21 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
...
@@ -735,14 +738,21 @@ func (wm *WorkerManager) handleWorkerMsg(worker *Worker) {
Models
:
msg
.
RegisteMessage
.
Models
,
Models
:
msg
.
RegisteMessage
.
Models
,
}
}
wm
.
SetWorkerAddr
(
worker
,
worker
.
workerAddr
)
wm
.
SetWorkerAddr
(
worker
,
worker
.
workerAddr
)
// check white list.
if
err
:=
wm
.
checkWhiteList
(
worker
,
info
.
BenefitAddress
);
err
!=
nil
{
if
err
:=
wm
.
checkWhiteList
(
worker
,
info
.
BenefitAddress
);
err
!=
nil
{
worker
.
quit
<-
err
worker
.
quit
<-
err
return
return
}
else
{
}
else
{
wm
.
addWorkerToSets
(
worker
,
info
.
BenefitAddress
)
wm
.
addWorkerToWhiteListSet
(
worker
,
info
.
BenefitAddress
)
}
// add worker to mogo.
if
err
:=
wm
.
AddWorker
(
worker
);
err
==
nil
{
worker
.
addFirstSucceed
=
true
wm
.
UpdateWorkerActive
(
worker
)
}
}
// start manage worker.
wreg
:=
workerRegistry
{
wreg
:=
workerRegistry
{
worker
:
worker
,
worker
:
worker
,
wm
:
wm
,
wm
:
wm
,
...
...
server/worker_registry.go
View file @
f3f5c792
...
@@ -87,7 +87,7 @@ func (w workerRegistry) DetailInfo() (json.RawMessage, error) {
...
@@ -87,7 +87,7 @@ func (w workerRegistry) DetailInfo() (json.RawMessage, error) {
}
}
info
.
HearBeat
=
w
.
wm
.
GetHeartBeat
(
w
.
worker
.
uuid
)
*
1000
// to ms
info
.
HearBeat
=
w
.
wm
.
GetHeartBeat
(
w
.
worker
.
uuid
)
*
1000
// to ms
info
.
MinerAddress
=
w
.
worker
.
workerAddr
info
.
MinerAddress
=
w
.
worker
.
workerAddr
info
.
Nonce
=
int64
(
w
.
worker
.
nonce
)
info
.
Nonce
=
0
if
w
.
worker
.
info
.
nodeInfo
!=
nil
{
if
w
.
worker
.
info
.
nodeInfo
!=
nil
{
info
.
CpuModel
=
w
.
worker
.
info
.
nodeInfo
.
Hardware
.
CPU
.
Model
info
.
CpuModel
=
w
.
worker
.
info
.
nodeInfo
.
Hardware
.
CPU
.
Model
info
.
CpuCore
=
int
(
w
.
worker
.
info
.
nodeInfo
.
Hardware
.
CPU
.
Cores
)
info
.
CpuCore
=
int
(
w
.
worker
.
info
.
nodeInfo
.
Hardware
.
CPU
.
Cores
)
...
...
server/workerstatu.go
View file @
f3f5c792
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment