Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
power-node
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Odysseus
power-node
Commits
9f9c2f73
Commit
9f9c2f73
authored
May 24, 2024
by
duanjinfei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add report model info
parent
3c7a1676
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
65 additions
and
12 deletions
+65
-12
monitor.go
nm/monitor.go
+61
-2
msg_handler.go
nm/msg_handler.go
+1
-1
msg_resp.go
nm/msg_resp.go
+3
-9
No files found.
nm/monitor.go
View file @
9f9c2f73
...
...
@@ -67,7 +67,9 @@ func (m *MonitorNm) monitorNmClient() {
nodeManagerHandler
:=
NewNodeManagerHandler
(
nodeManager
,
worker
,
msgRespWorker
,
taskMsgWorker
)
log
.
Info
(
"Report model info started"
)
go
m
.
monitorModel
(
msgRespWorker
,
nodeManager
,
worker
)
go
m
.
monitorInstallModel
(
msgRespWorker
,
nodeManager
,
worker
)
go
m
.
monitorRunningModel
(
msgRespWorker
,
nodeManager
,
worker
)
go
nodeManagerHandler
.
MonitorStandardTaskWorker
()
log
.
Info
(
"Monitor standard task worker started"
)
...
...
@@ -140,7 +142,7 @@ func (m *MonitorNm) monitorNodeManagerSeed() {
}
}
func
(
m
*
MonitorNm
)
monitorModel
(
msgRespWorker
*
RespMsgWorker
,
nodeManager
*
models
.
NodeManagerClient
,
worker
nodemanagerV2
.
NodeManagerService_RegisterWorkerClient
)
{
func
(
m
*
MonitorNm
)
monitor
Install
Model
(
msgRespWorker
*
RespMsgWorker
,
nodeManager
*
models
.
NodeManagerClient
,
worker
nodemanagerV2
.
NodeManagerService_RegisterWorkerClient
)
{
reportModel
:=
make
(
map
[
string
]
bool
,
0
)
images
,
err
:=
m
.
DockerOp
.
PsImageNameMap
()
if
err
!=
nil
{
...
...
@@ -187,3 +189,60 @@ func (m *MonitorNm) monitorModel(msgRespWorker *RespMsgWorker, nodeManager *mode
}
}
}
func
(
m
*
MonitorNm
)
monitorRunningModel
(
msgRespWorker
*
RespMsgWorker
,
nodeManager
*
models
.
NodeManagerClient
,
worker
nodemanagerV2
.
NodeManagerService_RegisterWorkerClient
)
{
reportModel
:=
make
(
map
[
string
]
bool
,
0
)
containerList
:=
m
.
DockerOp
.
ListContainer
()
if
containerList
==
nil
||
len
(
containerList
)
==
0
{
log
.
Error
(
"Get container failed"
)
return
}
allModels
,
err
:=
db
.
GetAllModels
()
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Get all models failed"
)
return
}
addRunningModels
:=
make
([]
interface
{},
0
)
for
_
,
model
:=
range
allModels
{
isExist
:=
false
for
_
,
container
:=
range
containerList
{
if
model
.
ImageName
==
container
.
Image
{
isExist
=
true
}
}
if
reportModel
[
model
.
ImageName
]
||
!
isExist
{
continue
}
reportModel
[
model
.
ImageName
]
=
true
addRunningModels
=
append
(
addRunningModels
,
&
nodemanagerV2
.
RunningModel
{
ModelId
:
strconv
.
FormatUint
(
model
.
TaskId
,
10
),
GpuSeq
:
model
.
GpuSeq
,
GpuRam
:
model
.
RunningMem
,
StartedTime
:
model
.
LastRunTime
,
LastWorkTime
:
model
.
LastWorkTime
,
TotalRunCount
:
model
.
TotalRunCount
,
ExecTime
:
model
.
EstimatExeTime
})
}
params
:=
utils
.
BuildParams
(
addRunningModels
...
)
msgRespWorker
.
RegisterMsgResp
(
nodeManager
,
worker
,
AddModelRunningResp
,
params
)
ticker
:=
time
.
NewTicker
(
time
.
Second
*
1
)
defer
ticker
.
Stop
()
for
{
select
{
case
<-
ticker
.
C
:
{
addRunningModels
:=
make
([]
interface
{},
0
)
for
_
,
model
:=
range
allModels
{
isExist
:=
false
for
_
,
container
:=
range
containerList
{
if
model
.
ImageName
==
container
.
Image
{
isExist
=
true
}
}
if
reportModel
[
model
.
ImageName
]
||
!
isExist
{
continue
}
reportModel
[
model
.
ImageName
]
=
true
addRunningModels
=
append
(
addRunningModels
,
&
nodemanagerV2
.
RunningModel
{
ModelId
:
strconv
.
FormatUint
(
model
.
TaskId
,
10
),
GpuSeq
:
model
.
GpuSeq
,
GpuRam
:
model
.
RunningMem
,
StartedTime
:
model
.
LastRunTime
,
LastWorkTime
:
model
.
LastWorkTime
,
TotalRunCount
:
model
.
TotalRunCount
,
ExecTime
:
model
.
EstimatExeTime
})
}
params
:=
utils
.
BuildParams
(
addRunningModels
...
)
msgRespWorker
.
RegisterMsgResp
(
nodeManager
,
worker
,
AddModelRunningResp
,
params
)
ticker
=
time
.
NewTicker
(
time
.
Minute
*
10
)
}
}
}
}
nm/msg_handler.go
View file @
9f9c2f73
...
...
@@ -342,7 +342,7 @@ func (n *NodeManagerHandler) MonitorImageOp(op *nodemanagerV2.ModelOperate) {
model
.
ContainerId
=
container
.
ID
model
.
IsRunning
=
true
model
.
LastRunTime
=
time
.
Now
()
.
Unix
()
params
:=
utils
.
BuildParams
(
strconv
.
FormatUint
(
model
.
TaskId
,
10
),
model
.
GpuSeq
,
model
.
RunningMem
,
model
.
LastRunTime
,
model
.
LastWorkTime
,
model
.
TotalRunCount
,
model
.
EstimatExeTime
)
params
:=
utils
.
BuildParams
(
&
nodemanagerV2
.
RunningModel
{
ModelId
:
strconv
.
FormatUint
(
model
.
TaskId
,
10
),
GpuSeq
:
model
.
GpuSeq
,
GpuRam
:
model
.
RunningMem
,
StartedTime
:
model
.
LastRunTime
,
LastWorkTime
:
model
.
LastWorkTime
,
TotalRunCount
:
model
.
TotalRunCount
,
ExecTime
:
model
.
EstimatExeTime
}
)
n
.
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
AddModelRunningResp
,
params
)
break
}
...
...
nm/msg_resp.go
View file @
9f9c2f73
...
...
@@ -327,16 +327,10 @@ func DelModelInstalledResp(params ...interface{}) *nodemanagerV2.WorkerMessage {
func
AddModelRunningResp
(
params
...
interface
{})
*
nodemanagerV2
.
WorkerMessage
{
log
.
Info
(
"Add model running response received params:"
,
params
)
runningModels
:=
make
([]
*
nodemanagerV2
.
RunningModel
,
0
)
model
:=
&
nodemanagerV2
.
RunningModel
{
ModelId
:
params
[
0
]
.
(
string
),
GpuSeq
:
params
[
1
]
.
(
int32
),
GpuRam
:
params
[
2
]
.
(
int64
),
StartedTime
:
params
[
3
]
.
(
int64
),
LastWorkTime
:
params
[
4
]
.
(
int64
),
TotalRunCount
:
params
[
5
]
.
(
int32
),
ExecTime
:
params
[
6
]
.
(
int32
),
for
_
,
param
:=
range
params
{
runningModel
:=
param
.
(
*
nodemanagerV2
.
RunningModel
)
runningModels
=
append
(
runningModels
,
runningModel
)
}
runningModels
=
append
(
runningModels
,
model
)
addModelRunningRes
:=
&
nodemanagerV2
.
WorkerMessage
{
Message
:
&
nodemanagerV2
.
WorkerMessage_AddModelRunning
{
AddModelRunning
:
&
nodemanagerV2
.
AddModelRunning
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment