Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
power-node
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Odysseus
power-node
Commits
14cdac1e
Commit
14cdac1e
authored
May 13, 2024
by
duanjinfei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update msg resp
parent
59f14f68
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
106 additions
and
78 deletions
+106
-78
rootcmd.go
cmd/rootcmd.go
+1
-1
NodeController.go
controllers/NodeController.go
+5
-0
StateController.go
controllers/StateController.go
+11
-0
db.go
db/db.go
+25
-2
model_handler.go
largeModel/model_handler.go
+44
-37
monitor.go
nm/monitor.go
+0
-4
msg_resp.go
nm/msg_resp.go
+1
-0
task_handler.go
nm/task_handler.go
+10
-3
docker.go
operate/docker.go
+0
-16
router.go
routers/router.go
+2
-1
util.go
utils/util.go
+7
-14
No files found.
cmd/rootcmd.go
View file @
14cdac1e
...
...
@@ -21,7 +21,7 @@ var (
func
init
()
{
RootCmd
.
PersistentFlags
()
.
StringVarP
(
&
rewardAddr
,
"reward"
,
"r"
,
"0x0Fb196385c8826e3806ebA2cA2cb78B26E08fEEe"
,
"please enter a reward address"
)
RootCmd
.
PersistentFlags
()
.
StringVarP
(
&
externalIp
,
"externalIp"
,
"e"
,
"192.168.1.120"
,
"please enter server external ip address"
)
RootCmd
.
PersistentFlags
()
.
StringVarP
(
&
opSys
,
"opSys"
,
"s"
,
""
,
"please enter you op sys name : win、linux
、mac
"
)
RootCmd
.
PersistentFlags
()
.
StringVarP
(
&
opSys
,
"opSys"
,
"s"
,
""
,
"please enter you op sys name : win、linux"
)
RootCmd
.
PersistentFlags
()
.
BoolVarP
(
&
debug
,
"debug"
,
"d"
,
false
,
"set log level debug"
)
cobra
.
OnInitialize
(
initConfig
)
}
...
...
controllers/NodeController.go
View file @
14cdac1e
...
...
@@ -9,6 +9,7 @@ import (
"example.com/m/utils"
nodemanagerV2
"github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v2"
"io"
"time"
)
type
NodeController
struct
{
...
...
@@ -119,6 +120,10 @@ func (c *NodeController) UpdateRecvStatus() {
c
.
ResponseInfo
(
500
,
"param error"
,
""
)
return
}
if
!
nm
.
IsRecvTask
&&
req
.
IsRecv
{
nm
.
RunningState
.
RunningTime
=
time
.
Now
()
.
Unix
()
nm
.
RunningState
.
CompletedTaskCount
=
0
}
nm
.
IsRecvTask
=
req
.
IsRecv
c
.
ResponseInfo
(
200
,
"update recv status successful"
,
""
)
}
...
...
controllers/StateController.go
View file @
14cdac1e
...
...
@@ -18,6 +18,17 @@ func (c *StateController) GetRunningState() {
c
.
ResponseInfo
(
200
,
"get running state successful"
,
res
)
}
func
(
c
*
StateController
)
GetRunningTp
()
{
info
:=
utils
.
GetHardwareInfo
()
var
totalTemp
int64
for
_
,
gpu
:=
range
info
.
Data
.
Gpus
{
totalTemp
+=
gpu
.
Temp
}
avgTemp
:=
totalTemp
/
int64
(
len
(
info
.
Data
.
Gpus
))
c
.
ResponseInfo
(
200
,
"get running state successful"
,
avgTemp
)
}
func
(
c
*
StateController
)
GetWorkerInfo
()
{
res
:=
models
.
WorkerAccount
{
WorkerAcc
:
conf
.
GetConfig
()
.
SignPublicAddress
.
Hex
(),
...
...
db/db.go
View file @
14cdac1e
package
db
import
(
"encoding/json"
"example.com/m/log"
"example.com/m/models"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/iterator"
)
...
...
@@ -18,9 +20,14 @@ func init() {
}
}
func
Put
(
key
string
,
value
[]
byte
)
error
{
func
Put
(
key
string
,
value
any
)
error
{
valueByte
,
err
:=
json
.
Marshal
(
value
)
if
err
!=
nil
{
log
.
Error
(
"Leveldb put data failed:"
,
err
)
return
err
}
// 存储数据
err
:=
dbInstance
.
Put
([]
byte
(
key
),
valu
e
,
nil
)
err
=
dbInstance
.
Put
([]
byte
(
key
),
valueByt
e
,
nil
)
if
err
!=
nil
{
log
.
Error
(
"Leveldb put data failed:"
,
err
)
return
err
...
...
@@ -48,6 +55,22 @@ func Get(key string) ([]byte, error) {
return
data
,
nil
}
func
GetModel
(
key
string
)
(
*
models
.
ModelInfo
,
error
)
{
data
,
err
:=
dbInstance
.
Get
([]
byte
(
key
),
nil
)
if
err
!=
nil
{
log
.
Error
(
"Leveldb get data failed:"
,
err
)
return
nil
,
err
}
log
.
WithField
(
"key"
,
key
)
.
WithField
(
"value"
,
data
)
.
Info
(
"leveldb data"
)
imageInfo
:=
&
models
.
ModelInfo
{}
err
=
json
.
Unmarshal
(
data
,
imageInfo
)
if
err
!=
nil
{
log
.
Error
(
"Json decode image data failed:"
,
err
)
return
nil
,
err
}
return
imageInfo
,
nil
}
func
Delete
(
key
[]
byte
)
error
{
err
:=
dbInstance
.
Delete
(
key
,
nil
)
if
err
!=
nil
{
...
...
largeModel/model_handler.go
View file @
14cdac1e
...
...
@@ -3,6 +3,7 @@ package largeModel
import
(
"encoding/json"
"example.com/m/conf"
"example.com/m/db"
"example.com/m/log"
"example.com/m/models"
"example.com/m/operate"
...
...
@@ -62,12 +63,6 @@ func (m *ModelHandler) MonitorModelInfo() {
continue
}
modelInfosResp
:=
resp
.
Data
//imageNameMap, err := m.dockerOp.PsImageNameMap()
//if err != nil {
// log.Error("Docker op ps images failed:", err)
// continue
//}
reportTaskIds
:=
make
([]
uint64
,
0
)
for
_
,
modelInfo
:=
range
modelInfosResp
{
if
modelInfo
.
ImageName
==
""
{
continue
...
...
@@ -77,38 +72,19 @@ func (m *ModelHandler) MonitorModelInfo() {
if
len
(
split
)
!=
2
{
continue
}
{
//if !imageNameMap[modelInfo.ImageName] {
// todo: 判断机器资源是否够用
//isPull := m.isResourceEnough(modelInfo)
// todo: 如果够用
//if isPull && modelInfo.PublishStatus == models.ModelPublishStatusYes {
// log.WithField("model image name", modelInfo.ImageName).Info("pulling image")
// go m.dockerOp.PullImage(modelInfo)
//}
//} else {
//
//}
}
log
.
WithField
(
"name"
,
modelInfo
.
ImageName
)
.
Info
(
"The image name is already"
)
m
.
dockerOp
.
BootUpModelId
[
modelInfo
.
ImageName
]
=
modelInfo
.
TaskId
reportTaskIds
=
append
(
reportTaskIds
,
modelInfo
.
TaskId
)
m
.
dockerOp
.
SignApi
[
modelInfo
.
ImageName
]
=
modelInfo
.
SignUrl
}
m
.
dockerOp
.
ModelsInfo
=
modelInfosResp
m
.
dockerOp
.
ReportModelIds
=
reportTaskIds
err
=
os
.
WriteFile
(
m
.
modelsFileName
,
bodyBytes
,
0644
)
err
:=
db
.
Put
(
modelInfo
.
ImageName
,
modelInfo
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Error writing models.json"
)
log
.
WithError
(
err
)
.
Error
(
"Put db error"
)
continue
}
}
ticker
=
time
.
NewTicker
(
time
.
Minute
*
10
)
}
}
}
func
(
m
*
ModelHandler
)
heatDataHandler
(
modelInfosResp
[]
*
models
.
ModelInfo
)
{
}
func
(
m
*
ModelHandler
)
ReadModels
()
([]
*
models
.
ModelInfo
,
error
)
{
bodyBytes
,
err
:=
os
.
ReadFile
(
m
.
modelsFileName
)
if
err
!=
nil
{
...
...
@@ -131,15 +107,51 @@ func (m *ModelHandler) ReadModels() ([]*models.ModelInfo, error) {
}
return
resp
.
Data
,
nil
}
func
(
m
*
ModelHandler
)
GetRpcModelsResp
()
(
*
nodemanagerV2
.
ModelsInfo
,
error
)
{
return
nil
,
nil
func
(
m
*
ModelHandler
)
GetRpcModelsResp
()
(
*
nodemanagerV2
.
ModelsInfo
,
error
)
{
installedModels
:=
make
([]
*
nodemanagerV2
.
InstalledModel
,
0
)
runningModels
:=
make
([]
*
nodemanagerV2
.
RunningModel
,
0
)
readModels
,
err
:=
m
.
ReadModels
()
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Error reading models"
)
return
nil
,
err
}
for
_
,
model
:=
range
readModels
{
if
model
.
IsInstalled
{
diskSize
,
err
:=
strconv
.
ParseInt
(
model
.
HardwareRequire
.
DiskSize
,
10
,
64
)
if
err
!=
nil
{
return
nil
,
err
}
model
:=
&
nodemanagerV2
.
InstalledModel
{
ModelId
:
strconv
.
FormatUint
(
model
.
TaskId
,
10
),
DiskSize
:
diskSize
,
InstalledTime
:
model
.
SetupTime
,
LastRunTime
:
model
.
LastRunTime
,
}
installedModels
=
append
(
installedModels
,
model
)
}
if
model
.
IsRunning
{
model
:=
&
nodemanagerV2
.
RunningModel
{
ModelId
:
strconv
.
FormatUint
(
model
.
TaskId
,
10
),
}
runningModels
=
append
(
runningModels
,
model
)
}
}
res
:=
&
nodemanagerV2
.
ModelsInfo
{
InstalledModels
:
installedModels
,
RunningModels
:
runningModels
,
}
return
res
,
nil
}
func
(
m
*
ModelHandler
)
isResourceEnough
(
modelInfo
*
models
.
ModelInfo
)
bool
{
return
true
}
func
(
m
*
ModelHandler
)
checkGpuUsage
(
modelInfo
*
models
.
ModelInfo
)
bool
{
return
false
}
func
(
m
*
ModelHandler
)
checkDiskUsage
(
modelInfo
*
models
.
ModelInfo
)
bool
{
totalSize
,
usedSize
,
availSize
,
usageSize
,
err
:=
m
.
dockerOp
.
GetDockerInfo
()
if
err
!=
nil
{
...
...
@@ -162,8 +174,3 @@ func (m *ModelHandler) checkDiskUsage(modelInfo *models.ModelInfo) bool {
}
return
true
}
func
(
m
*
ModelHandler
)
checkHeat
()
bool
{
return
false
}
nm/monitor.go
View file @
14cdac1e
...
...
@@ -60,10 +60,6 @@ func (m *MonitorNm) monitorNmClient() {
msgRespWorker
.
RegisterMsgResp
(
nodeManager
,
worker
,
DeviceInfoResp
,
nil
)
log
.
Info
(
"------------------------Send deviceInfo message ended------------------------"
)
if
len
(
m
.
DockerOp
.
ReportModelIds
)
==
0
{
//params := utils.BuildParams(m.DockerOp.ReportModelIds, []uint64{0})
//msgRespWorker.RegisterMsgResp(nodeManager, worker, SubmitResourceMapRes, params)
}
log
.
Info
(
"------------------------Send once-off message ended------------------------"
)
nodeManagerHandler
:=
NewNodeManagerHandler
(
nodeManager
,
worker
,
msgRespWorker
,
taskMsgWorker
)
...
...
nm/msg_resp.go
View file @
14cdac1e
...
...
@@ -119,6 +119,7 @@ func NodeInfoResp(params ...interface{}) *nodemanagerV2.WorkerMessage {
modelsInfo
:=
params
[
0
]
.
(
*
largeModel
.
ModelHandler
)
readModels
,
err
:=
modelsInfo
.
GetRpcModelsResp
()
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Error getting rpc models response"
)
return
nil
}
nodeInfoRes
:=
&
nodemanagerV2
.
WorkerMessage
{
...
...
nm/task_handler.go
View file @
14cdac1e
...
...
@@ -4,6 +4,7 @@ import (
"bytes"
"encoding/json"
"example.com/m/conf"
"example.com/m/db"
"example.com/m/log"
"example.com/m/models"
"example.com/m/operate"
...
...
@@ -220,7 +221,10 @@ func (t *TaskWorker) GetAckResp(taskMsg *nodemanagerV2.PushTaskMessage) (isCanEx
isSuccess
:=
value
.
(
bool
)
log
.
WithField
(
"isSuccess"
,
isSuccess
)
.
Info
(
"Task exec info"
)
if
!
isSuccess
&&
!
t
.
lastExecTaskStartTime
.
IsZero
()
{
lastTaskImageInfo
:=
t
.
DockerOp
.
GetImageInfo
(
t
.
lastExecTaskImageName
)
lastTaskImageInfo
,
err
:=
db
.
GetModel
(
t
.
lastExecTaskImageName
)
if
err
!=
nil
{
return
false
,
0
,
0
,
0
}
since
:=
time
.
Since
(
t
.
lastExecTaskStartTime
)
queueWaitTime
=
lastTaskImageInfo
.
EstimatExeTime
-
int64
(
since
.
Seconds
())
if
queueWaitTime
<
0
{
...
...
@@ -238,7 +242,10 @@ func (t *TaskWorker) GetAckResp(taskMsg *nodemanagerV2.PushTaskMessage) (isCanEx
}
log
.
Info
(
"found task image finished"
)
isCanExecute
=
true
modelInfo
:=
t
.
DockerOp
.
GetImageInfo
(
taskCmd
.
ImageName
)
modelInfo
,
err
:=
db
.
GetModel
(
t
.
lastExecTaskImageName
)
if
err
!=
nil
{
return
false
,
0
,
0
,
0
}
if
modelInfo
!=
nil
{
bootUpTime
=
modelInfo
.
StartUpTime
executeTime
=
modelInfo
.
EstimatExeTime
...
...
@@ -432,7 +439,7 @@ func (op *TaskOp) getFileCache(respStr string, dockerOp *operate.DockerOp) (stri
log
.
WithField
(
"isBase64"
,
isBase64
)
.
Info
(
"resp str info"
)
if
isBase64
{
log
.
WithField
(
"taskId"
,
op
.
taskMsg
.
TaskId
)
.
WithField
(
"format"
,
respFormat
)
.
WithField
(
"suffix"
,
suffix
)
.
Info
(
"Parse container resp"
)
queryString
:=
utils
.
MatchFileCacheQueryString
(
op
.
taskParam
.
Headers
,
op
.
taskCmd
.
ImageName
,
dockerOp
.
ModelsInfo
,
respFormat
)
queryString
:=
utils
.
MatchFileCacheQueryString
(
op
.
taskParam
.
Headers
,
op
.
taskCmd
.
ImageName
,
respFormat
)
ossUri
,
err
:=
op
.
uploadOSS
(
op
.
taskMsg
.
TaskId
,
queryString
,
decodeByte
,
suffix
)
if
err
!=
nil
||
ossUri
==
""
{
log
.
WithError
(
err
)
.
Error
(
"upload image into file cache failed"
)
...
...
operate/docker.go
View file @
14cdac1e
...
...
@@ -30,10 +30,6 @@ type DockerOp struct {
dockerClient
*
client
.
Client
UsedExternalPort
map
[
int64
]
bool
SignApi
map
[
string
]
string
ModelsInfo
[]
*
models
.
ModelInfo
ReportModelIds
[]
uint64
BootUpModelId
map
[
string
]
uint64
//RunningImages map[string]bool
}
func
init
()
{
...
...
@@ -53,23 +49,11 @@ func NewDockerOp() *DockerOp {
Reason
:
""
,
dockerClient
:
dockerClient
,
SignApi
:
make
(
map
[
string
]
string
,
0
),
ModelsInfo
:
make
([]
*
models
.
ModelInfo
,
100000
),
UsedExternalPort
:
make
(
map
[
int64
]
bool
,
0
),
ReportModelIds
:
make
([]
uint64
,
0
),
BootUpModelId
:
make
(
map
[
string
]
uint64
,
0
),
//RunningImages: make(map[string]bool, 0),
}
}
func
(
d
*
DockerOp
)
GetImageInfo
(
imageName
string
)
*
models
.
ModelInfo
{
for
_
,
info
:=
range
d
.
ModelsInfo
{
if
info
.
ImageName
==
imageName
{
return
info
}
}
return
nil
}
func
(
d
*
DockerOp
)
GetContainerSign
(
taskMsg
*
nodemanagerV2
.
PushTaskMessage
,
taskRes
[]
byte
)
[]
byte
{
reqBody
:=
&
models
.
TaskReq
{
TaskId
:
taskMsg
.
TaskId
,
...
...
routers/router.go
View file @
14cdac1e
...
...
@@ -14,9 +14,10 @@ func init() {
beego
.
Router
(
"/api/v1/power/get/recv/status"
,
&
controllers
.
NodeController
{},
"get:GetRecvStatus"
)
beego
.
Router
(
"/api/v1/power/get/conf"
,
&
controllers
.
NodeController
{},
"get:GetConfigInfo"
)
beego
.
Router
(
"/api/v1/power/get/current/benefit"
,
&
controllers
.
NodeController
{},
"get:GetBenefit"
)
beego
.
Router
(
"/api/v1/power/get/running/tp"
,
&
controllers
.
StateController
{},
"get:GetRunningTp"
)
beego
.
Router
(
"/api/v1/power/get/running/state"
,
&
controllers
.
StateController
{},
"get:GetRunningState"
)
beego
.
Router
(
"/api/v1/power/get/worker/info"
,
&
controllers
.
StateController
{},
"get:GetWorkerInfo"
)
beego
.
Router
(
"/api/v1/power/list/gpu/info"
,
&
controllers
.
StateController
{},
"get:GetListGpuInfo"
)
beego
.
Router
(
"/api/v1/power/get/gpu/info"
,
&
controllers
.
StateController
{},
"post:GetGpuUsageInfo"
)
beego
.
Router
(
"/api/v1/power/get/hardware/info"
,
&
controllers
.
StateController
{},
"
pos
t:GetOtherHardwareInfo"
)
beego
.
Router
(
"/api/v1/power/get/hardware/info"
,
&
controllers
.
StateController
{},
"
ge
t:GetOtherHardwareInfo"
)
}
utils/util.go
View file @
14cdac1e
...
...
@@ -9,6 +9,7 @@ import (
"crypto/rand"
"encoding/base64"
"encoding/json"
"example.com/m/db"
"example.com/m/log"
"example.com/m/models"
//"example.com/m/nm"
...
...
@@ -135,7 +136,7 @@ func IsBase64ImageStr(imageStr string) (bool, []byte, string, string) {
return
true
,
decodeBytes
,
formatStr
,
suffix
}
func
MatchFileCacheQueryString
(
params
map
[
string
][]
string
,
taskImageName
string
,
modelsInfo
[]
*
models
.
ModelInfo
,
contentType
string
)
string
{
func
MatchFileCacheQueryString
(
params
map
[
string
][]
string
,
taskImageName
string
,
contentType
string
)
string
{
values
:=
url
.
Values
{}
isExistFileExpires
:=
false
for
key
,
value
:=
range
params
{
...
...
@@ -145,22 +146,14 @@ func MatchFileCacheQueryString(params map[string][]string, taskImageName string,
break
}
}
isModelExistFileExpires
:=
false
if
!
isExistFileExpires
{
for
_
,
info
:=
range
modelsInfo
{
if
info
==
nil
{
continue
}
if
info
.
ImageName
==
taskImageName
&&
info
.
FileExpiresTime
!=
""
{
values
.
Add
(
models
.
ResultFileExpiresDB
,
info
.
FileExpiresTime
)
isModelExistFileExpires
=
true
break
}
}
}
if
!
isModelExistFileExpires
{
modelInfo
,
_
:=
db
.
GetModel
(
taskImageName
)
if
modelInfo
!=
nil
&&
modelInfo
.
FileExpiresTime
!=
""
{
values
.
Add
(
models
.
ResultFileExpiresDB
,
modelInfo
.
FileExpiresTime
)
}
else
{
values
.
Add
(
models
.
ResultFileExpiresDB
,
"600"
)
}
}
values
.
Add
(
models
.
ContentType
,
contentType
)
return
values
.
Encode
()
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment