Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
power-node
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Odysseus
power-node
Commits
11fb03c0
Commit
11fb03c0
authored
May 28, 2024
by
duanjinfei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update msg resp
parent
26212fdb
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
104 additions
and
32 deletions
+104
-32
NodeController.go
controllers/NodeController.go
+1
-0
api.go
nm/api.go
+5
-3
monitor.go
nm/monitor.go
+6
-1
msg_handler.go
nm/msg_handler.go
+14
-3
msg_resp.go
nm/msg_resp.go
+63
-14
task_handler.go
nm/task_handler.go
+12
-4
docker.go
operate/docker.go
+3
-7
No files found.
controllers/NodeController.go
View file @
11fb03c0
...
@@ -65,6 +65,7 @@ func (c *NodeController) SetBenefitAddress() {
...
@@ -65,6 +65,7 @@ func (c *NodeController) SetBenefitAddress() {
if
err
!=
nil
{
if
err
!=
nil
{
c
.
ResponseInfo
(
500
,
"Write benefit file failed"
,
""
)
c
.
ResponseInfo
(
500
,
"Write benefit file failed"
,
""
)
}
}
nm
.
IsUpdateBenefitAddr
=
true
}
}
c
.
ResponseInfo
(
200
,
"set benefit address successful"
,
""
)
c
.
ResponseInfo
(
200
,
"set benefit address successful"
,
""
)
}
}
...
...
nm/api.go
View file @
11fb03c0
...
@@ -13,13 +13,15 @@ type NodeManager struct {
...
@@ -13,13 +13,15 @@ type NodeManager struct {
}
}
var
(
var
(
HistoryBenefitAcc
[]
*
models
.
BenefitAddressStruct
HistoryBenefitAcc
[]
*
models
.
BenefitAddressStruct
RunningState
*
models
.
RunningState
RunningState
*
models
.
RunningState
IsRecvTask
bool
IsRecvTask
bool
IsUpdateBenefitAddr
bool
)
)
func
init
()
{
func
init
()
{
IsRecvTask
=
false
IsRecvTask
=
false
IsUpdateBenefitAddr
=
false
HistoryBenefitAcc
=
make
([]
*
models
.
BenefitAddressStruct
,
0
)
HistoryBenefitAcc
=
make
([]
*
models
.
BenefitAddressStruct
,
0
)
RunningState
=
&
models
.
RunningState
{
RunningState
=
&
models
.
RunningState
{
RunningTime
:
time
.
Now
()
.
Unix
(),
RunningTime
:
time
.
Now
()
.
Unix
(),
...
...
nm/monitor.go
View file @
11fb03c0
...
@@ -81,10 +81,15 @@ func (m *MonitorNm) monitorNmClient() {
...
@@ -81,10 +81,15 @@ func (m *MonitorNm) monitorNmClient() {
for
{
for
{
if
!
IsRecvTask
{
if
!
IsRecvTask
{
log
.
Warn
(
"User set recv task status is false"
)
log
.
Warn
(
"User set recv task status is false"
)
msgRespWorker
.
RegisterMsgResp
(
nodeManager
,
worker
,
RegisterInfo
Resp
,
nil
)
msgRespWorker
.
RegisterMsgResp
(
nodeManager
,
worker
,
Goodbye
Resp
,
nil
)
nodeManager
.
UpdateStatus
(
false
)
nodeManager
.
UpdateStatus
(
false
)
return
return
}
}
if
IsUpdateBenefitAddr
{
benefitAddrUpdateParam
:=
utils
.
BuildParams
(
conf
.
GetConfig
()
.
BenefitAddress
)
msgRespWorker
.
RegisterMsgResp
(
nodeManager
,
worker
,
BenefitAddrUpdateResp
,
benefitAddrUpdateParam
)
IsUpdateBenefitAddr
=
false
}
sub
:=
time
.
Now
()
.
Sub
(
nodeManager
.
GetLastHeartTime
())
.
Seconds
()
sub
:=
time
.
Now
()
.
Sub
(
nodeManager
.
GetLastHeartTime
())
.
Seconds
()
log
.
WithField
(
"time(uint seconds)"
,
sub
)
.
Info
(
"Handler nm msg thread monitor heartbeat time"
)
log
.
WithField
(
"time(uint seconds)"
,
sub
)
.
Info
(
"Handler nm msg thread monitor heartbeat time"
)
rev
,
err
:=
worker
.
Recv
()
rev
,
err
:=
worker
.
Recv
()
...
...
nm/msg_handler.go
View file @
11fb03c0
...
@@ -61,7 +61,7 @@ func (n *NodeManagerHandler) DistributionMsgWorker(nodeManagerMsgChan chan *node
...
@@ -61,7 +61,7 @@ func (n *NodeManagerHandler) DistributionMsgWorker(nodeManagerMsgChan chan *node
taskMsg
:=
rev
.
GetPushTask
()
taskMsg
:=
rev
.
GetPushTask
()
if
taskMsg
!=
nil
{
if
taskMsg
!=
nil
{
go
func
(
msgRespWorker
*
RespMsgWorker
,
taskMsgWorker
*
TaskWorker
,
taskMsg
*
nodemanagerV2
.
PushTaskMessage
)
{
go
func
(
msgRespWorker
*
RespMsgWorker
,
taskMsgWorker
*
TaskWorker
,
taskMsg
*
nodemanagerV2
.
PushTaskMessage
)
{
isCanExecute
,
bootUpTime
,
queueWaitTime
,
executeTime
:=
taskMsgWorker
.
GetAckResp
(
taskMsg
)
isCanExecute
,
bootUpTime
,
queueWaitTime
,
executeTime
,
imageName
:=
taskMsgWorker
.
GetAckResp
(
taskMsg
)
ackParams
:=
utils
.
BuildParams
(
taskMsg
.
TaskId
,
isCanExecute
,
bootUpTime
,
queueWaitTime
,
executeTime
)
ackParams
:=
utils
.
BuildParams
(
taskMsg
.
TaskId
,
isCanExecute
,
bootUpTime
,
queueWaitTime
,
executeTime
)
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
RespTaskAck
,
ackParams
)
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
RespTaskAck
,
ackParams
)
if
!
isCanExecute
{
if
!
isCanExecute
{
...
@@ -108,7 +108,12 @@ func (n *NodeManagerHandler) DistributionMsgWorker(nodeManagerMsgChan chan *node
...
@@ -108,7 +108,12 @@ func (n *NodeManagerHandler) DistributionMsgWorker(nodeManagerMsgChan chan *node
RunningState
.
CompletedTaskCount
++
RunningState
.
CompletedTaskCount
++
log
.
Info
(
"Completed task count: "
,
RunningState
.
CompletedTaskCount
)
log
.
Info
(
"Completed task count: "
,
RunningState
.
CompletedTaskCount
)
log
.
Info
(
"--------------taskMsg--------------:"
,
taskMsg
)
log
.
Info
(
"--------------taskMsg--------------:"
,
taskMsg
)
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
GpuUsageResp
,
ackParams
)
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
GpuUsageResp
,
nil
)
model
,
_
:=
db
.
GetModel
(
imageName
)
if
model
!=
nil
{
runningModelStatusParam
:=
utils
.
BuildParams
(
model
)
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
RunningModelStatusResp
,
runningModelStatusParam
)
}
}(
n
.
msgRespWorker
,
n
.
taskMsgWorker
,
taskMsg
)
}(
n
.
msgRespWorker
,
n
.
taskMsgWorker
,
taskMsg
)
continue
continue
}
}
...
@@ -175,7 +180,11 @@ func (n *NodeManagerHandler) DistributionMsgWorker(nodeManagerMsgChan chan *node
...
@@ -175,7 +180,11 @@ func (n *NodeManagerHandler) DistributionMsgWorker(nodeManagerMsgChan chan *node
HostIp
:
models
.
ZeroHost
,
HostIp
:
models
.
ZeroHost
,
HostPort
:
n
.
taskMsgWorker
.
getExternalPort
(),
HostPort
:
n
.
taskMsgWorker
.
getExternalPort
(),
}
}
containerId
,
gpuSeq
,
err
:=
dockerOp
.
CreateAndStartContainer
(
model
,
dockerCmd
)
info
:=
GetHardwareInfo
()
if
info
==
nil
{
continue
}
containerId
,
gpuSeq
,
err
:=
dockerOp
.
CreateAndStartContainer
(
info
,
model
,
dockerCmd
)
if
err
!=
nil
{
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Error creating container"
)
log
.
WithError
(
err
)
.
Error
(
"Error creating container"
)
continue
continue
...
@@ -350,6 +359,8 @@ func (n *NodeManagerHandler) MonitorImageOp(op *nodemanagerV2.ModelOperate) {
...
@@ -350,6 +359,8 @@ func (n *NodeManagerHandler) MonitorImageOp(op *nodemanagerV2.ModelOperate) {
model
.
ContainerId
=
""
model
.
ContainerId
=
""
params
:=
utils
.
BuildParams
(
strconv
.
FormatUint
(
model
.
TaskId
,
10
))
params
:=
utils
.
BuildParams
(
strconv
.
FormatUint
(
model
.
TaskId
,
10
))
n
.
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
DelModelRunningResp
,
params
)
n
.
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
DelModelRunningResp
,
params
)
params
=
utils
.
BuildParams
(
strconv
.
FormatUint
(
model
.
TaskId
,
10
),
model
.
LastRunTime
)
n
.
msgRespWorker
.
RegisterMsgResp
(
n
.
nodeManager
,
n
.
worker
,
InstallModelStatusResp
,
params
)
break
break
}
}
}
}
...
...
nm/msg_resp.go
View file @
11fb03c0
package
nm
package
nm
import
(
import
(
"bytes"
"example.com/m/conf"
"example.com/m/conf"
"example.com/m/largeModel"
"example.com/m/largeModel"
"example.com/m/log"
"example.com/m/log"
...
@@ -11,6 +10,7 @@ import (
...
@@ -11,6 +10,7 @@ import (
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/crypto"
nodemanagerV2
"github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v2"
nodemanagerV2
"github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v2"
"math/big"
"math/big"
"strconv"
"time"
"time"
)
)
...
@@ -80,28 +80,30 @@ func HeartbeatResp(params ...interface{}) *nodemanagerV2.WorkerMessage {
...
@@ -80,28 +80,30 @@ func HeartbeatResp(params ...interface{}) *nodemanagerV2.WorkerMessage {
func
RegisterInfoResp
(
params
...
interface
{})
*
nodemanagerV2
.
WorkerMessage
{
func
RegisterInfoResp
(
params
...
interface
{})
*
nodemanagerV2
.
WorkerMessage
{
log
.
Info
(
"Register info response received params:"
,
params
)
log
.
Info
(
"Register info response received params:"
,
params
)
info
:=
&
nodemanagerV2
.
NodeInfo
{
MinerPubkey
:
conf
.
GetConfig
()
.
SignPub
,
BenefitAddress
:
conf
.
GetConfig
()
.
BenefitAddress
,
}
hardwareInfo
:=
GetHardwareInfo
()
modelsInfo
:=
params
[
0
]
.
(
*
largeModel
.
ModelHandler
)
readModels
,
err
:=
modelsInfo
.
ScanModelsResp
()
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Error scanning models response failed"
)
return
nil
}
nowTimeStamp
:=
time
.
Now
()
.
Unix
()
nowTimeStamp
:=
time
.
Now
()
.
Unix
()
nowTimeBytes
:=
big
.
NewInt
(
nowTimeStamp
)
.
Bytes
()
nowTimeBytes
:=
big
.
NewInt
(
nowTimeStamp
)
.
Bytes
()
signHash
:=
crypto
.
Keccak256Hash
(
bytes
.
NewBufferString
(
conf
.
GetConfig
()
.
GetExternalIp
())
.
Bytes
(
),
signHash
:=
crypto
.
Keccak256Hash
(
[]
byte
(
info
.
String
()
),
bytes
.
NewBufferString
(
conf
.
GetConfig
()
.
SignPub
)
.
Bytes
(
),
[]
byte
(
hardwareInfo
.
String
()
),
bytes
.
NewBufferString
(
conf
.
GetConfig
()
.
BenefitAddress
)
.
Bytes
(
),
[]
byte
(
readModels
.
String
()
),
nowTimeBytes
)
nowTimeBytes
)
log
.
WithField
(
"hash"
,
signHash
.
String
())
.
Info
(
"register message sign result"
)
log
.
WithField
(
"hash"
,
signHash
.
String
())
.
Info
(
"register message sign result"
)
sign
,
_
:=
crypto
.
Sign
(
signHash
.
Bytes
(),
conf
.
GetConfig
()
.
SignPrivateKey
)
sign
,
_
:=
crypto
.
Sign
(
signHash
.
Bytes
(),
conf
.
GetConfig
()
.
SignPrivateKey
)
log
.
Info
(
"register message sign:"
,
common
.
Bytes2Hex
(
sign
))
log
.
Info
(
"register message sign:"
,
common
.
Bytes2Hex
(
sign
))
modelsInfo
:=
params
[
0
]
.
(
*
largeModel
.
ModelHandler
)
readModels
,
err
:=
modelsInfo
.
ScanModelsResp
()
if
err
!=
nil
{
return
nil
}
hardwareInfo
:=
GetHardwareInfo
()
nodeInfoRes
:=
&
nodemanagerV2
.
WorkerMessage
{
nodeInfoRes
:=
&
nodemanagerV2
.
WorkerMessage
{
Message
:
&
nodemanagerV2
.
WorkerMessage_RegisteMessage
{
Message
:
&
nodemanagerV2
.
WorkerMessage_RegisteMessage
{
RegisteMessage
:
&
nodemanagerV2
.
RegisteMessage
{
RegisteMessage
:
&
nodemanagerV2
.
RegisteMessage
{
Info
:
&
nodemanagerV2
.
NodeInfo
{
Info
:
info
,
MinerPubkey
:
conf
.
GetConfig
()
.
SignPub
,
BenefitAddress
:
conf
.
GetConfig
()
.
BenefitAddress
,
},
Hardware
:
hardwareInfo
,
Hardware
:
hardwareInfo
,
Models
:
readModels
,
Models
:
readModels
,
Timestamp
:
nowTimeStamp
,
Timestamp
:
nowTimeStamp
,
...
@@ -355,6 +357,53 @@ func DelModelRunningResp(params ...interface{}) *nodemanagerV2.WorkerMessage {
...
@@ -355,6 +357,53 @@ func DelModelRunningResp(params ...interface{}) *nodemanagerV2.WorkerMessage {
return
delModelRunningRes
return
delModelRunningRes
}
}
func
BenefitAddrUpdateResp
(
params
...
interface
{})
*
nodemanagerV2
.
WorkerMessage
{
log
.
Info
(
"Benefit addr update response received params:"
,
params
)
addr
:=
params
[
0
]
.
(
string
)
benefitAddrUpdateResp
:=
&
nodemanagerV2
.
WorkerMessage
{
Message
:
&
nodemanagerV2
.
WorkerMessage_BenefitAddrUpdate
{
BenefitAddrUpdate
:
&
nodemanagerV2
.
BenefitAddrUpdate
{
BenefitAddress
:
addr
,
},
},
}
log
.
Info
(
"---------------------------------------Send Benefit addr update response msg ------------------------------------"
)
return
benefitAddrUpdateResp
}
func
RunningModelStatusResp
(
params
...
interface
{})
*
nodemanagerV2
.
WorkerMessage
{
log
.
Info
(
"Running Model Status response received params:"
,
params
)
info
:=
params
[
0
]
.
(
*
models
.
ModelInfo
)
runningModelStatusResp
:=
&
nodemanagerV2
.
WorkerMessage
{
Message
:
&
nodemanagerV2
.
WorkerMessage_RunningModelStatus
{
RunningModelStatus
:
&
nodemanagerV2
.
RunningModelStatus
{
ModelId
:
strconv
.
FormatUint
(
info
.
TaskId
,
10
),
LastWorkTime
:
info
.
LastWorkTime
,
TotalRunCount
:
info
.
TotalRunCount
,
ExecTime
:
info
.
EstimatExeTime
,
},
},
}
log
.
Info
(
"---------------------------------------Send running model status response msg ------------------------------------"
)
return
runningModelStatusResp
}
func
InstallModelStatusResp
(
params
...
interface
{})
*
nodemanagerV2
.
WorkerMessage
{
log
.
Info
(
"Install Model Status response received params:"
,
params
)
modelId
:=
params
[
0
]
.
(
string
)
lastRunTime
:=
params
[
1
]
.
(
int64
)
installModelStatusRes
:=
&
nodemanagerV2
.
WorkerMessage
{
Message
:
&
nodemanagerV2
.
WorkerMessage_InstalledModelStatus
{
InstalledModelStatus
:
&
nodemanagerV2
.
InstalledModelStatus
{
ModelId
:
modelId
,
LastRunTime
:
lastRunTime
,
},
},
}
log
.
Info
(
"---------------------------------------Send install model status response msg ------------------------------------"
)
return
installModelStatusRes
}
func
GetHardwareInfo
()
*
nodemanagerV2
.
HardwareInfo
{
func
GetHardwareInfo
()
*
nodemanagerV2
.
HardwareInfo
{
hardwareInfo
:=
utils
.
GetHardwareInfo
(
conf
.
GetConfig
()
.
HardwareUrl
)
hardwareInfo
:=
utils
.
GetHardwareInfo
(
conf
.
GetConfig
()
.
HardwareUrl
)
if
hardwareInfo
==
nil
{
if
hardwareInfo
==
nil
{
...
...
nm/task_handler.go
View file @
11fb03c0
...
@@ -182,7 +182,14 @@ func (t *TaskWorker) ComputeTaskHandler(taskMsg *nodemanagerV2.PushTaskMessage)
...
@@ -182,7 +182,14 @@ func (t *TaskWorker) ComputeTaskHandler(taskMsg *nodemanagerV2.PushTaskMessage)
if
!
running
{
if
!
running
{
taskOp
.
taskCmd
.
DockerCmd
.
HostIp
=
models
.
ZeroHost
taskOp
.
taskCmd
.
DockerCmd
.
HostIp
=
models
.
ZeroHost
taskOp
.
taskCmd
.
DockerCmd
.
HostPort
=
t
.
getExternalPort
()
taskOp
.
taskCmd
.
DockerCmd
.
HostPort
=
t
.
getExternalPort
()
containerId
,
gpuSeq
,
err
:=
t
.
DockerOp
.
CreateAndStartContainer
(
model
,
taskOp
.
taskCmd
.
DockerCmd
)
info
:=
GetHardwareInfo
()
if
info
==
nil
{
log
.
Error
(
"Error getting hardware info"
)
taskOp
.
taskExecResult
.
TaskExecError
=
fmt
.
Sprintf
(
"%s"
,
"Error getting hardware info"
)
t
.
ExecTaskIdIsFinished
.
Store
(
taskMsg
.
TaskId
,
true
)
return
}
containerId
,
gpuSeq
,
err
:=
t
.
DockerOp
.
CreateAndStartContainer
(
info
,
model
,
taskOp
.
taskCmd
.
DockerCmd
)
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Errorf
(
"Create and start container failed: %s"
,
err
.
Error
())
log
.
Errorf
(
"Create and start container failed: %s"
,
err
.
Error
())
taskOp
.
taskExecResult
.
TaskExecError
=
fmt
.
Sprintf
(
"%s,%s"
,
"Create and start container failed"
,
err
.
Error
())
taskOp
.
taskExecResult
.
TaskExecError
=
fmt
.
Sprintf
(
"%s,%s"
,
"Create and start container failed"
,
err
.
Error
())
...
@@ -217,7 +224,7 @@ func (t *TaskWorker) ComputeTaskHandler(taskMsg *nodemanagerV2.PushTaskMessage)
...
@@ -217,7 +224,7 @@ func (t *TaskWorker) ComputeTaskHandler(taskMsg *nodemanagerV2.PushTaskMessage)
log
.
Info
(
"----------------------Compute task exec done--------------------------------"
)
log
.
Info
(
"----------------------Compute task exec done--------------------------------"
)
}
}
func
(
t
*
TaskWorker
)
GetAckResp
(
taskMsg
*
nodemanagerV2
.
PushTaskMessage
)
(
isCanExecute
bool
,
bootUpTime
,
queueWaitTime
,
executeTime
int64
)
{
func
(
t
*
TaskWorker
)
GetAckResp
(
taskMsg
*
nodemanagerV2
.
PushTaskMessage
)
(
isCanExecute
bool
,
bootUpTime
,
queueWaitTime
,
executeTime
int64
,
imageName
string
)
{
if
t
.
IsExecStandardTask
{
if
t
.
IsExecStandardTask
{
isCanExecute
=
true
isCanExecute
=
true
return
return
...
@@ -237,7 +244,7 @@ func (t *TaskWorker) GetAckResp(taskMsg *nodemanagerV2.PushTaskMessage) (isCanEx
...
@@ -237,7 +244,7 @@ func (t *TaskWorker) GetAckResp(taskMsg *nodemanagerV2.PushTaskMessage) (isCanEx
if
!
isSuccess
&&
!
t
.
lastExecTaskStartTime
.
IsZero
()
{
if
!
isSuccess
&&
!
t
.
lastExecTaskStartTime
.
IsZero
()
{
lastTaskImageInfo
,
err
:=
db
.
GetModel
(
t
.
lastExecTaskImageName
)
lastTaskImageInfo
,
err
:=
db
.
GetModel
(
t
.
lastExecTaskImageName
)
if
err
!=
nil
{
if
err
!=
nil
{
return
false
,
0
,
0
,
0
return
false
,
0
,
0
,
0
,
""
}
}
since
:=
time
.
Since
(
t
.
lastExecTaskStartTime
)
since
:=
time
.
Since
(
t
.
lastExecTaskStartTime
)
queueWaitTime
=
int64
(
lastTaskImageInfo
.
EstimatExeTime
-
int32
(
since
.
Seconds
()))
queueWaitTime
=
int64
(
lastTaskImageInfo
.
EstimatExeTime
-
int32
(
since
.
Seconds
()))
...
@@ -258,12 +265,13 @@ func (t *TaskWorker) GetAckResp(taskMsg *nodemanagerV2.PushTaskMessage) (isCanEx
...
@@ -258,12 +265,13 @@ func (t *TaskWorker) GetAckResp(taskMsg *nodemanagerV2.PushTaskMessage) (isCanEx
isCanExecute
=
true
isCanExecute
=
true
modelInfo
,
err
:=
db
.
GetModel
(
t
.
lastExecTaskImageName
)
modelInfo
,
err
:=
db
.
GetModel
(
t
.
lastExecTaskImageName
)
if
err
!=
nil
{
if
err
!=
nil
{
return
false
,
0
,
0
,
0
return
false
,
0
,
0
,
0
,
""
}
}
if
modelInfo
!=
nil
{
if
modelInfo
!=
nil
{
bootUpTime
=
modelInfo
.
StartUpTime
bootUpTime
=
modelInfo
.
StartUpTime
executeTime
=
int64
(
modelInfo
.
EstimatExeTime
)
executeTime
=
int64
(
modelInfo
.
EstimatExeTime
)
}
}
imageName
=
modelInfo
.
ImageName
return
return
}
}
...
...
operate/docker.go
View file @
11fb03c0
...
@@ -125,8 +125,8 @@ func (d *DockerOp) ListContainer() []types.Container {
...
@@ -125,8 +125,8 @@ func (d *DockerOp) ListContainer() []types.Container {
return
containers
return
containers
}
}
func
(
d
*
DockerOp
)
CreateAndStartContainer
(
modelInfo
*
models
.
ModelInfo
,
dockerCmd
*
models
.
DockerCmd
)
(
string
,
int32
,
error
)
{
func
(
d
*
DockerOp
)
CreateAndStartContainer
(
info
*
nodemanagerV2
.
HardwareInfo
,
modelInfo
*
models
.
ModelInfo
,
dockerCmd
*
models
.
DockerCmd
)
(
string
,
int32
,
error
)
{
gpuSeq
:=
d
.
checkGpuUsage
(
modelInfo
,
dockerCmd
)
gpuSeq
:=
d
.
checkGpuUsage
(
info
,
modelInfo
,
dockerCmd
)
containerId
,
err
:=
d
.
CreateContainer
(
modelInfo
.
ImageName
,
dockerCmd
)
containerId
,
err
:=
d
.
CreateContainer
(
modelInfo
.
ImageName
,
dockerCmd
)
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Error
(
"Error creating container image failed: "
,
err
)
log
.
Error
(
"Error creating container image failed: "
,
err
)
...
@@ -391,11 +391,7 @@ func (d *DockerOp) getContainerInfo(id string) (types.Container, error) {
...
@@ -391,11 +391,7 @@ func (d *DockerOp) getContainerInfo(id string) (types.Container, error) {
return
types
.
Container
{},
fmt
.
Errorf
(
"get container info failed"
)
return
types
.
Container
{},
fmt
.
Errorf
(
"get container info failed"
)
}
}
func
(
d
*
DockerOp
)
checkGpuUsage
(
modelInfo
*
models
.
ModelInfo
,
dockerCmd
*
models
.
DockerCmd
)
int32
{
func
(
d
*
DockerOp
)
checkGpuUsage
(
info
*
nodemanagerV2
.
HardwareInfo
,
modelInfo
*
models
.
ModelInfo
,
dockerCmd
*
models
.
DockerCmd
)
int32
{
info
:=
nm
.
GetHardwareInfo
()
if
info
==
nil
{
return
0
}
envMap
:=
make
(
map
[
string
]
string
,
0
)
envMap
:=
make
(
map
[
string
]
string
,
0
)
gpu
:=
info
.
GPU
gpu
:=
info
.
GPU
isMatch
:=
false
isMatch
:=
false
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment