Commit eb6468d9 authored by duanjinfei's avatar duanjinfei

update msg resp

parent 510c913b
...@@ -12,6 +12,7 @@ import ( ...@@ -12,6 +12,7 @@ import (
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/spf13/viper" "github.com/spf13/viper"
"os" "os"
"strings"
) )
var ( var (
...@@ -20,8 +21,8 @@ var ( ...@@ -20,8 +21,8 @@ var (
) )
func init() { func init() {
RootCmd.PersistentFlags().StringVarP(&rewardAddr, "reward", "r", "0x40EC4256fcBCA69CdbAc942594caeC79FBE10494", "please enter a reward address") RootCmd.PersistentFlags().StringVarP(&rewardAddr, "reward", "r", "", "please enter a reward address")
RootCmd.PersistentFlags().StringVarP(&externalIp, "externalIp", "e", "192.168.1.120", "please enter server external ip address") RootCmd.PersistentFlags().StringVarP(&externalIp, "externalIp", "e", "", "please enter server external ip address")
RootCmd.PersistentFlags().StringVarP(&opSys, "opSys", "s", "", "please enter you op sys name : win、linux") RootCmd.PersistentFlags().StringVarP(&opSys, "opSys", "s", "", "please enter you op sys name : win、linux")
RootCmd.PersistentFlags().BoolVarP(&debug, "debug", "d", false, "set log level debug") RootCmd.PersistentFlags().BoolVarP(&debug, "debug", "d", false, "set log level debug")
cobra.OnInitialize(initConfig) cobra.OnInitialize(initConfig)
...@@ -58,7 +59,7 @@ var RootCmd = &cobra.Command{ ...@@ -58,7 +59,7 @@ var RootCmd = &cobra.Command{
} }
isExist := false isExist := false
for _, acc := range nm.HistoryBenefitAcc { for _, acc := range nm.HistoryBenefitAcc {
if acc.Address == rewardAddr { if strings.ToLower(acc.Address) == strings.ToLower(rewardAddr) {
isExist = true isExist = true
} }
} }
...@@ -70,7 +71,7 @@ var RootCmd = &cobra.Command{ ...@@ -70,7 +71,7 @@ var RootCmd = &cobra.Command{
return return
} }
} }
//go nm.StartMonitor() go nm.StartMonitor()
} }
beego.Run() beego.Run()
}, },
......
...@@ -18,7 +18,8 @@ type Config struct { ...@@ -18,7 +18,8 @@ type Config struct {
ExternalIp string ExternalIp string
SignPublicAddress common.Address SignPublicAddress common.Address
SignPrivateKey *ecdsa.PrivateKey SignPrivateKey *ecdsa.PrivateKey
RunMode int `json:"init_run_mode"` HardwareUrl string `json:"hardware_url" mapstructure:"hardware_url"`
RunMode int `json:"init_run_mode" mapstructure:"init_run_mode"`
NmSeed string `json:"nm_seed" mapstructure:"nm_seed"` NmSeed string `json:"nm_seed" mapstructure:"nm_seed"`
HeartRespTimeSecond int64 `json:"heart_response" mapstructure:"heart_response"` HeartRespTimeSecond int64 `json:"heart_response" mapstructure:"heart_response"`
NodeManagerNum int64 `json:"node_manager_num" mapstructure:"node_manager_num"` NodeManagerNum int64 `json:"node_manager_num" mapstructure:"node_manager_num"`
......
{ {
"nm_seed": "43.198.29.144:10001", "nm_seed": "52.221.177.10:10001",
"api_url": "https://aigic.ai/admin/api/task/taskheat", "api_url": "https://aigic.ai/admin/api/task/taskheat",
"node_manager_num": 1, "node_manager_num": 1,
"heart_response": 60, "heart_response": 60,
...@@ -12,5 +12,6 @@ ...@@ -12,5 +12,6 @@
"replicate_image_name_suffix": "docker.aigic.ai/ai", "replicate_image_name_suffix": "docker.aigic.ai/ai",
"is_stop_last_container": true, "is_stop_last_container": true,
"disk_usage":80, "disk_usage":80,
"init_run_mode": 1 "init_run_mode": 1,
"hardware_url": "http://47.94.59.74:8005/hw"
} }
\ No newline at end of file
...@@ -9,6 +9,7 @@ import ( ...@@ -9,6 +9,7 @@ import (
"example.com/m/utils" "example.com/m/utils"
nodemanagerV2 "github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v2" nodemanagerV2 "github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v2"
"io" "io"
"strings"
"time" "time"
) )
...@@ -54,7 +55,7 @@ func (c *NodeController) SetBenefitAddress() { ...@@ -54,7 +55,7 @@ func (c *NodeController) SetBenefitAddress() {
} }
isExist := false isExist := false
for _, s := range nm.HistoryBenefitAcc { for _, s := range nm.HistoryBenefitAcc {
if s.Address == req.Address && !s.IsDel { if strings.ToLower(s.Address) == strings.ToLower(req.Address) && !s.IsDel {
isExist = true isExist = true
} }
} }
...@@ -207,7 +208,7 @@ func (c *NodeController) DelBenefitAddress() { ...@@ -207,7 +208,7 @@ func (c *NodeController) DelBenefitAddress() {
} }
isExist := false isExist := false
for _, s := range nm.HistoryBenefitAcc { for _, s := range nm.HistoryBenefitAcc {
if s.Address == req.Address { if strings.ToLower(s.Address) == strings.ToLower(req.Address) {
s.IsDel = true s.IsDel = true
isExist = true isExist = true
} }
......
...@@ -27,7 +27,7 @@ func (c *StateController) GetRunningState() { ...@@ -27,7 +27,7 @@ func (c *StateController) GetRunningState() {
} }
func (c *StateController) GetRunningTp() { func (c *StateController) GetRunningTp() {
info := utils.GetHardwareInfo() info := utils.GetHardwareInfo(conf.GetConfig().HardwareUrl)
if info == nil { if info == nil {
c.ResponseInfo(500, "get running tp failed", 0) c.ResponseInfo(500, "get running tp failed", 0)
return return
...@@ -69,7 +69,7 @@ func (c *StateController) GetWorkerInfo() { ...@@ -69,7 +69,7 @@ func (c *StateController) GetWorkerInfo() {
} }
func (c *StateController) GetListGpuInfo() { func (c *StateController) GetListGpuInfo() {
info := utils.GetHardwareInfo() info := utils.GetHardwareInfo(conf.GetConfig().HardwareUrl)
if info != nil && info.Data != nil { if info != nil && info.Data != nil {
c.ResponseInfo(200, "get list gpu info successful", info.Data.Gpus) c.ResponseInfo(200, "get list gpu info successful", info.Data.Gpus)
return return
...@@ -89,7 +89,7 @@ func (c *StateController) GetGpuUsageInfo() { ...@@ -89,7 +89,7 @@ func (c *StateController) GetGpuUsageInfo() {
c.ResponseInfo(500, "param error", "") c.ResponseInfo(500, "param error", "")
return return
} }
info := utils.GetHardwareInfo() info := utils.GetHardwareInfo(conf.GetConfig().HardwareUrl)
if info != nil { if info != nil {
for _, gpu := range info.Data.Gpus { for _, gpu := range info.Data.Gpus {
if gpu.Seq == req.Seq { if gpu.Seq == req.Seq {
...@@ -102,7 +102,7 @@ func (c *StateController) GetGpuUsageInfo() { ...@@ -102,7 +102,7 @@ func (c *StateController) GetGpuUsageInfo() {
} }
func (c *StateController) GetOtherHardwareInfo() { func (c *StateController) GetOtherHardwareInfo() {
info := utils.GetHardwareInfo() info := utils.GetHardwareInfo(conf.GetConfig().HardwareUrl)
var diskTotal, diskFree int64 var diskTotal, diskFree int64
for _, disk := range info.Data.Disk { for _, disk := range info.Data.Disk {
for _, point := range disk.MountPoints { for _, point := range disk.MountPoints {
......
...@@ -58,6 +58,11 @@ func (m *ModelHandler) MonitorModelInfo() { ...@@ -58,6 +58,11 @@ func (m *ModelHandler) MonitorModelInfo() {
log.Warn("Response data is empty") log.Warn("Response data is empty")
continue continue
} }
imageMap, err := m.dockerOp.PsImageNameMap()
if err != nil {
log.Error("Error getting image name map from client failed:", err)
continue
}
modelInfosResp := resp.Data modelInfosResp := resp.Data
for _, modelInfo := range modelInfosResp { for _, modelInfo := range modelInfosResp {
if modelInfo.ImageName == "" { if modelInfo.ImageName == "" {
...@@ -68,7 +73,7 @@ func (m *ModelHandler) MonitorModelInfo() { ...@@ -68,7 +73,7 @@ func (m *ModelHandler) MonitorModelInfo() {
if len(split) != 2 { if len(split) != 2 {
continue continue
} }
log.WithField("name", modelInfo.ImageName).Info("The image name is already") log.WithField("name", modelInfo.ImageName).Info("The image name is")
m.dockerOp.SignApi[modelInfo.ImageName] = modelInfo.SignUrl m.dockerOp.SignApi[modelInfo.ImageName] = modelInfo.SignUrl
model, _ := db.GetModel(modelInfo.ImageName) model, _ := db.GetModel(modelInfo.ImageName)
if model != nil { if model != nil {
...@@ -78,14 +83,16 @@ func (m *ModelHandler) MonitorModelInfo() { ...@@ -78,14 +83,16 @@ func (m *ModelHandler) MonitorModelInfo() {
log.WithError(err).Error("Put db error") log.WithError(err).Error("Put db error")
continue continue
} }
log.WithField("name", modelInfo.ImageName).Info("The image updated")
} else { } else {
err := db.PutModel(modelInfo.ImageName, modelInfo) err := db.PutModel(modelInfo.ImageName, modelInfo)
if err != nil { if err != nil {
log.WithError(err).Error("Put db error") log.WithError(err).Error("Put db error")
continue continue
} }
log.WithField("name", modelInfo.ImageName).Info("The image add")
} }
if modelInfo.PublishStatus == models.ModelPublishStatusYes { if !imageMap[modelInfo.ImageName] && modelInfo.PublishStatus == models.ModelPublishStatusYes {
log.WithField("model image name", modelInfo.ImageName).Info("pulling image") log.WithField("model image name", modelInfo.ImageName).Info("pulling image")
go m.dockerOp.PullImage(model.ImageName) go m.dockerOp.PullImage(model.ImageName)
} }
...@@ -180,34 +187,3 @@ func (m *ModelHandler) MonitorModelStatus() { ...@@ -180,34 +187,3 @@ func (m *ModelHandler) MonitorModelStatus() {
} }
} }
} }
func (m *ModelHandler) isResourceEnough(modelInfo *models.ModelInfo) bool {
return true
}
func (m *ModelHandler) checkGpuUsage(modelInfo *models.ModelInfo) bool {
return false
}
func (m *ModelHandler) checkDiskUsage(modelInfo *models.ModelInfo) bool {
totalSize, usedSize, availSize, usageSize, err := m.dockerOp.GetDockerInfo()
if err != nil {
log.WithError(err).Error("Disk resource is not enough")
return false
}
log.WithField("TotalSize:", totalSize).WithField("UsedSize:", usedSize).WithField("AvailSize:", availSize).Info("The disk info")
if conf.GetConfig().DiskUsage < usageSize {
return false
}
modelDiskSizeGB, err := strconv.ParseInt(modelInfo.HardwareRequire.DiskSize, 10, 64)
if err != nil {
log.Errorf("Error parsing model disk usage failed: %v", err)
return false
}
modelDiskSizeByte := modelDiskSizeGB * 1024 * 1024
if availSize < modelDiskSizeByte {
log.Error("The hard drive is running out of space")
return false
}
return true
}
...@@ -70,16 +70,16 @@ type GpuInfo struct { ...@@ -70,16 +70,16 @@ type GpuInfo struct {
} }
type Gpu struct { type Gpu struct {
Seq int64 `json:"seq"` Seq int32 `json:"seq"`
Uuid string `json:"uuid"` Uuid string `json:"uuid"`
Model string `json:"model"` Model string `json:"model"`
Performance int64 `json:"performance"` Performance int32 `json:"performance"`
PowerRating int64 `json:"power_rating"` PowerRating int32 `json:"power_rating"`
MemTotal int64 `json:"mem_total"` MemTotal int64 `json:"mem_total"`
MemFree int64 `json:"mem_free"` MemFree int64 `json:"mem_free"`
Usage int64 `json:"usage"` Usage int32 `json:"usage"`
Temp int64 `json:"temp"` Temp int32 `json:"temp"`
PowerRt int64 `json:"power_rt"` PowerRt int32 `json:"power_rt"`
} }
type Cpu struct { type Cpu struct {
......
...@@ -35,7 +35,7 @@ type WorkerAccount struct { ...@@ -35,7 +35,7 @@ type WorkerAccount struct {
} }
type GpuUsageReq struct { type GpuUsageReq struct {
Seq int64 `json:"seq"` Seq int32 `json:"seq"`
} }
type OtherHardwareInfoResp struct { type OtherHardwareInfoResp struct {
......
...@@ -362,7 +362,7 @@ func DelModelRunningResp(params ...interface{}) *nodemanagerV2.WorkerMessage { ...@@ -362,7 +362,7 @@ func DelModelRunningResp(params ...interface{}) *nodemanagerV2.WorkerMessage {
} }
func getHardwareInfo() *nodemanagerV2.HardwareInfo { func getHardwareInfo() *nodemanagerV2.HardwareInfo {
hardwareInfo := utils.GetHardwareInfo() hardwareInfo := utils.GetHardwareInfo(conf.GetConfig().HardwareUrl)
gpusInfo := make([]*nodemanagerV2.GPUInfo, 0) gpusInfo := make([]*nodemanagerV2.GPUInfo, 0)
var diskTotal, diskFree int64 var diskTotal, diskFree int64
for _, disk := range hardwareInfo.Data.Disk { for _, disk := range hardwareInfo.Data.Disk {
...@@ -383,6 +383,21 @@ func getHardwareInfo() *nodemanagerV2.HardwareInfo { ...@@ -383,6 +383,21 @@ func getHardwareInfo() *nodemanagerV2.HardwareInfo {
bandWidth = net.Speed bandWidth = net.Speed
} }
} }
for _, gpu := range hardwareInfo.Data.Gpus {
gpuInfo := &nodemanagerV2.GPUInfo{
Seq: gpu.Seq,
Uuid: gpu.Uuid,
Model: gpu.Model,
Performance: gpu.Performance,
PowerRating: gpu.PowerRating,
MemTotal: gpu.MemTotal,
MemFree: gpu.MemFree,
Usage: gpu.Usage,
Temp: gpu.Temp,
PowerRt: gpu.PowerRt,
}
gpusInfo = append(gpusInfo, gpuInfo)
}
res := &nodemanagerV2.HardwareInfo{ res := &nodemanagerV2.HardwareInfo{
CPU: &nodemanagerV2.CPUInfo{ CPU: &nodemanagerV2.CPUInfo{
Model: hardwareInfo.Data.Cpus.Model, Model: hardwareInfo.Data.Cpus.Model,
......
...@@ -34,9 +34,9 @@ func StartMonitor() { ...@@ -34,9 +34,9 @@ func StartMonitor() {
monitorNm := NewMonitorNm(dockerOp, modelHandler) monitorNm := NewMonitorNm(dockerOp, modelHandler)
go modelHandler.MonitorModelInfo() //go modelHandler.MonitorModelInfo()
go modelHandler.MonitorModelStatus() //go modelHandler.MonitorModelStatus()
go monitorNm.monitorNodeManagerSeed() go monitorNm.monitorNodeManagerSeed()
......
...@@ -310,8 +310,8 @@ func readAndDecryptFile(key []byte, filename string) ([]byte, error) { ...@@ -310,8 +310,8 @@ func readAndDecryptFile(key []byte, filename string) ([]byte, error) {
return decryptedData, nil return decryptedData, nil
} }
func GetHardwareInfo() *models.HardwareInfoRep { func GetHardwareInfo(url string) *models.HardwareInfoRep {
resp, err := http.Get("http://124.193.167.71:5000/hw") resp, err := http.Get(url)
if err != nil { if err != nil {
log.Error("Error creating request") log.Error("Error creating request")
return nil return nil
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment