Commit 142a2d16 authored by duanjinfei's avatar duanjinfei

update monitor modelInfo

parent 1bfec526
......@@ -13,10 +13,10 @@ func (d *BaseController) ResponseInfo(code int, msg interface{}, result interfac
switch code {
case 500:
logs.Error(msg, result)
d.Data["json"] = result
d.Data["json"] = map[string]interface{}{"code": "500", "msg": msg, "data": result}
case 200:
logs.Info(msg, result)
d.Data["json"] = result
d.Data["json"] = map[string]interface{}{"code": "200", "msg": msg, "data": result}
}
d.ServeJSON()
}
......@@ -73,7 +73,7 @@ func (c *NodeController) AddNodeManager() {
}
func (c *NodeController) GetNodeManager() {
func (c *NodeController) GetNodeManagers() {
manager := nm.GetNodeManagers()
res := make([]*nm.NodeManager, 0)
for _, nodeManager := range manager {
......
......@@ -3,6 +3,7 @@ package main
import (
"example.com/m/log"
"example.com/m/nm"
_ "example.com/m/routers"
"github.com/astaxie/beego"
_ "net/http/pprof"
)
......@@ -14,6 +15,6 @@ func main() {
//}()
//runtime.SetBlockProfileRate(1) // 开启对阻塞操作的跟踪,block
//runtime.SetMutexProfileFraction(1) // 开启对锁调用的跟踪,mutex
nm.StartMonitor()
go nm.StartMonitor()
beego.Run()
}
......@@ -38,7 +38,7 @@ type ModelInfo struct {
type ComputeResult struct {
Code string `json:"code"`
Msg string `json:"msg"`
Content string `json:"content"`
Data string `json:"data"`
}
type NodeManagerClient struct {
......
......@@ -12,3 +12,9 @@ type NodeManagerReq struct {
PublicKey string `json:"public_key"`
EndPoint string `json:"end_point"`
}
type Resp struct {
Code int `json:"code"`
Msg string `json:"msg"`
Data string `json:"data"`
}
package nm
import (
"bytes"
"encoding/json"
"example.com/m/log"
"example.com/m/models"
"example.com/m/operate"
nodeManagerV1 "github.com/odysseus/odysseus-protocol/gen/proto/go/nodemanager/v1"
"io"
"net/http"
"strings"
"time"
)
func monitorModelInfo(dockerOp *operate.DockerOp) {
// TODO: 向api端获取model信息
info := &models.ModelInfo{
TaskId: 1,
User: "",
Pwd: "",
Repository: "",
SignUrl: "http://192.168.1.120:8888/llm/test/get/sign",
ImageName: "onlydd/llm-server:0119",
DiskSize: 10000,
MemorySize: 10000,
IsImageExist: false,
}
client := &http.Client{}
ticker := time.NewTicker(time.Second * 1)
for {
select {
case <-ticker.C:
resp := make([]*models.ModelInfo, 0)
resp = append(resp, info)
modelInfosResp := make([]*models.ModelInfo, 0)
modelResp, err := client.Get("http://192.168.1.8:8087/api/task/taskheat")
if err != nil {
log.Error("Error getting model info from client failed:", err)
continue
}
bodyBytes, err := io.ReadAll(modelResp.Body)
if err != nil {
log.Error("Error reading model response failed:", err)
continue
}
resp := &models.Resp{}
err = json.Unmarshal(bodyBytes, resp)
if err != nil {
log.Error("Unmarshal model response failed:", err)
continue
}
dataResp, err := io.ReadAll(bytes.NewBufferString(resp.Data))
if err != nil {
log.Error("Error reading model response data failed:", err)
continue
}
err = json.Unmarshal(dataResp, &modelInfosResp)
if err != nil {
log.Error("Unmarshal model response failed:", err)
continue
}
modelInfoMap := make(map[uint64]*models.ModelInfo, 0)
for _, modelInfo := range resp {
for _, modelInfo := range modelInfosResp {
modelInfoMap[modelInfo.TaskId] = modelInfo
}
imageNameMap, err := dockerOp.PsImageNameMap()
......@@ -36,8 +55,12 @@ func monitorModelInfo(dockerOp *operate.DockerOp) {
log.Error("Docker op ps images failed:", err)
continue
}
for _, modelInfo := range modelInfos {
if modelInfo.IsImageExist {
for _, modelInfo := range modelInfosResp {
if modelInfo.ImageName == "" {
continue
}
split := strings.Split(modelInfo.ImageName, ":")
if len(split) != 2 {
continue
}
if !imageNameMap[modelInfo.ImageName] {
......@@ -48,16 +71,16 @@ func monitorModelInfo(dockerOp *operate.DockerOp) {
go dockerOp.PullImage(modelInfo)
modelInfo.IsImageExist = true
dockerOp.ModelTaskIdChan <- modelInfo.TaskId
dockerOp.IsReportModelTaskId[modelInfo.TaskId] = true
}
} else if !dockerOp.IsReportModelTaskId[modelInfo.TaskId] {
dockerOp.ModelTaskIdChan <- modelInfo.TaskId
dockerOp.IsReportModelTaskId[modelInfo.TaskId] = true
}
if modelInfoMap[modelInfo.TaskId] == nil {
dockerOp.SignApi[modelInfo.ImageName] = modelInfo.SignUrl
dockerOp.ModelsInfo = append(dockerOp.ModelsInfo, modelInfo)
}
}
ticker = time.NewTicker(time.Minute * 65)
}
}
}
......
......@@ -22,13 +22,11 @@ var (
usedNodeManagerClient []*models.NodeManagerClient
nodeManagerClientChan chan *models.NodeManagerClient
nodeManagerMsgChan chan *nodeManagerV1.ManagerMessage
modelInfos []*models.ModelInfo
)
func init() {
nodeManagerArr = make([]*NodeManager, 0)
usedNodeManagerClient = make([]*models.NodeManagerClient, 0)
modelInfos = make([]*models.ModelInfo, 0)
nodeManagerClientChan = make(chan *models.NodeManagerClient, 0)
nodeManagerMsgChan = make(chan *nodeManagerV1.ManagerMessage, 1000)
}
......
......@@ -95,7 +95,7 @@ func (d *DockerOp) GetContainerSign(taskMsg *nodemanagerv1.PushTaskMessage, task
log.Error("Failed to parse docker response body")
return nil
}
sign := res.Content
sign := res.Data
log.Info("Container sign:", sign)
return common.Hex2Bytes(sign)
}
......
......@@ -6,5 +6,5 @@ import (
)
func init() {
beego.Router("/llm/test/get/sign", &controllers.NodeController{}, "post:GetContainerSign")
beego.Router("/power/node/get/nm", &controllers.NodeController{}, "post:GetNodeManagers")
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment