Commit d40f1e5a authored by duanjinfei's avatar duanjinfei

add container env

parent 97efa134
......@@ -24,6 +24,7 @@ const (
EncryptionKey = "uxhendjFYsoWFnsO"
HistoryBenefitAddressDirectory = "data/benefitList"
CudaEnv = "CUDA_VISIBLE_DEVICES"
CudaCheck = "PYTORCH_NVML_BASED_CUDA_CHECK"
BasicMode = 1
HealthMode = 2
SaveMode = 3
......
......@@ -179,6 +179,7 @@ func (n *NodeManagerHandler) DistributionMsgWorker(nodeManagerMsgChan chan *node
dockerCmd := &models.DockerCmd{
HostIp: models.ZeroHost,
HostPort: n.taskMsgWorker.getExternalPort(),
EnvMap: make(map[string]string, 0),
}
info := GetHardwareInfo()
if info == nil {
......
......@@ -182,6 +182,7 @@ func (t *TaskWorker) ComputeTaskHandler(taskMsg *nodemanagerV2.PushTaskMessage)
if !running {
taskOp.taskCmd.DockerCmd.HostIp = models.ZeroHost
taskOp.taskCmd.DockerCmd.HostPort = t.getExternalPort()
taskOp.taskCmd.DockerCmd.EnvMap = make(map[string]string, 0)
info := GetHardwareInfo()
if info == nil {
log.Error("Error getting hardware info")
......
......@@ -126,6 +126,7 @@ func (d *DockerOp) ListContainer() []types.Container {
}
func (d *DockerOp) CreateAndStartContainer(info *nodemanagerV2.HardwareInfo, modelInfo *models.ModelInfo, dockerCmd *models.DockerCmd) (string, int32, error) {
dockerCmd.EnvMap[models.CudaCheck] = "1"
gpuSeq := d.checkGpuUsage(info, modelInfo, dockerCmd)
containerId, err := d.CreateContainer(modelInfo.ImageName, dockerCmd)
if err != nil {
......@@ -405,6 +406,7 @@ func (d *DockerOp) checkGpuUsage(info *nodemanagerV2.HardwareInfo, modelInfo *mo
}
if !isMatch {
res = d.foundSeq(modelInfo)
dockerCmd.EnvMap[models.CudaEnv] = strconv.FormatInt(int64(res), 10)
}
return res
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment