Commit 44642bba authored by duanjinfei's avatar duanjinfei

update task handler

parent f2771c8b
...@@ -13,9 +13,13 @@ const ( ...@@ -13,9 +13,13 @@ const (
UseRedirect = "Use-Redirect" UseRedirect = "Use-Redirect"
Prefer = "Prefer" Prefer = "Prefer"
Async = "respond-async" Async = "respond-async"
MaxExecTime = "MaxExecTime"
HealthCheckAPI = "/health-check" HealthCheckAPI = "/health-check"
ReplicateImageNameSuffix = "docker.agicoin.ai/agicoin" ReplicateImageNameSuffix = "docker.agicoin.ai/agicoin"
READY = "READY" READY = "READY"
ZeroHost = "0.0.0.0"
ModelPublishStatusYes = 1 ModelPublishStatusYes = 1
ModelPublishStatusNo = 2 ModelPublishStatusNo = 2
DefaultMaxExecTime = 300
DefaultTaskTimer = 2
) )
...@@ -25,14 +25,13 @@ import ( ...@@ -25,14 +25,13 @@ import (
"time" "time"
) )
type TaskHandler struct { type TaskWorker struct {
Wg *sync.WaitGroup Wg *sync.WaitGroup
Mutex *sync.Mutex Mutex *sync.Mutex
LruCache *lru.Cache LruCache *lru.Cache
DockerOp *operate.DockerOp DockerOp *operate.DockerOp
CmdOp *operate.Command CmdOp *operate.Command
TaskMsg chan *nodeManagerV1.PushTaskMessage TaskMsg chan *nodeManagerV1.PushTaskMessage
HttpClient *http.Client
IsExecAiTask bool IsExecAiTask bool
IsExecStandardTask bool IsExecStandardTask bool
ExecTaskIdIsSuccess *sync.Map ExecTaskIdIsSuccess *sync.Map
...@@ -40,22 +39,32 @@ type TaskHandler struct { ...@@ -40,22 +39,32 @@ type TaskHandler struct {
oldTaskId string oldTaskId string
} }
func NewTaskWorker(op *operate.DockerOp) *TaskHandler { type TaskOp struct {
return &TaskHandler{ taskMsg *nodeManagerV1.PushTaskMessage
taskCmd *models.TaskCmd
taskExecResult *models.TaskResult
taskParam *models.TaskParam
httpClient *http.Client
request *http.Request
ticker *time.Ticker
startBeforeTaskTime time.Time
}
func NewTaskWorker(op *operate.DockerOp) *TaskWorker {
return &TaskWorker{
Wg: &sync.WaitGroup{}, Wg: &sync.WaitGroup{},
Mutex: &sync.Mutex{}, Mutex: &sync.Mutex{},
LruCache: lru.New(100), LruCache: lru.New(100),
DockerOp: op, DockerOp: op,
TaskMsg: make(chan *nodeManagerV1.PushTaskMessage, 0), TaskMsg: make(chan *nodeManagerV1.PushTaskMessage, 0),
HttpClient: &http.Client{},
IsExecAiTask: false, IsExecAiTask: false,
ExecTaskIdIsSuccess: &sync.Map{}, ExecTaskIdIsSuccess: &sync.Map{},
} }
} }
func (t *TaskHandler) DistributionTaskWorker(runCount int) { func (t *TaskWorker) DistributionTaskWorker(runCount int) {
for i := 0; i < runCount; i++ { for i := 0; i < runCount; i++ {
go func(t *TaskHandler) { go func(t *TaskWorker) {
for { for {
select { select {
case taskMsg := <-t.TaskMsg: case taskMsg := <-t.TaskMsg:
...@@ -86,7 +95,7 @@ func (t *TaskHandler) DistributionTaskWorker(runCount int) { ...@@ -86,7 +95,7 @@ func (t *TaskHandler) DistributionTaskWorker(runCount int) {
} }
} }
func (t *TaskHandler) GetMinerSign(msg *nodeManagerV1.PushTaskMessage, taskResult []byte) ([]byte, []byte, []byte) { func (t *TaskWorker) GetMinerSign(msg *nodeManagerV1.PushTaskMessage, taskResult []byte) ([]byte, []byte, []byte) {
reqHash := crypto.Keccak256Hash(msg.TaskParam) reqHash := crypto.Keccak256Hash(msg.TaskParam)
respHash := crypto.Keccak256Hash(taskResult) respHash := crypto.Keccak256Hash(taskResult)
signHash := crypto.Keccak256Hash(bytes.NewBufferString(msg.TaskId).Bytes(), reqHash.Bytes(), respHash.Bytes()) signHash := crypto.Keccak256Hash(bytes.NewBufferString(msg.TaskId).Bytes(), reqHash.Bytes(), respHash.Bytes())
...@@ -99,167 +108,448 @@ func (t *TaskHandler) GetMinerSign(msg *nodeManagerV1.PushTaskMessage, taskResul ...@@ -99,167 +108,448 @@ func (t *TaskHandler) GetMinerSign(msg *nodeManagerV1.PushTaskMessage, taskResul
return reqHash.Bytes(), respHash.Bytes(), sign return reqHash.Bytes(), respHash.Bytes(), sign
} }
func (t *TaskHandler) SystemTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) { func (t *TaskWorker) SystemTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) {
defer t.Wg.Done() defer t.Wg.Done()
log.Info("received systemTask--------------------------------") log.Info("received systemTask--------------------------------")
} }
func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) { func (t *TaskWorker) CustomTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) {
defer t.Wg.Done()
_, err := t.DockerOp.PsImages()
if err != nil {
log.Error("custom task handler docker op ps images failed: ", err)
return
}
log.Info("received customTask--------------------------------")
}
func (t *TaskWorker) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) {
defer t.Wg.Done() defer t.Wg.Done()
t.checkLastTaskExecStatus(taskMsg) t.checkLastTaskExecStatus(taskMsg)
log.Info("check last task exec status successful") log.Info("check last task exec status successful")
taskExecResult := &models.TaskResult{ taskOp := &TaskOp{
taskMsg: taskMsg,
taskCmd: &models.TaskCmd{},
taskExecResult: &models.TaskResult{
TaskHttpStatusCode: 200, TaskHttpStatusCode: 200,
TaskRespBody: nil, TaskRespBody: nil,
TaskHttpHeaders: nil, TaskHttpHeaders: nil,
TaskIsSuccess: false, TaskIsSuccess: false,
TaskExecTime: 0, TaskExecTime: 0,
TaskExecError: "", TaskExecError: "",
} },
t.LruCache.Add(taskMsg.TaskId, taskExecResult) taskParam: &models.TaskParam{},
taskCmd := &models.TaskCmd{} httpClient: &http.Client{},
err := json.Unmarshal(bytes.NewBufferString(taskMsg.TaskCmd).Bytes(), taskCmd) request: &http.Request{},
ticker: time.NewTicker(time.Second * models.DefaultTaskTimer),
startBeforeTaskTime: time.Now(),
}
t.LruCache.Add(taskMsg.TaskId, taskOp.taskExecResult)
err := json.Unmarshal(bytes.NewBufferString(taskMsg.TaskCmd).Bytes(), taskOp.taskCmd)
if err != nil { if err != nil {
log.Errorf("failed to unmarshal task cmd: %s", err.Error()) log.Errorf("failed to unmarshal task cmd: %s", err.Error())
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "failed to unmarshal task cmd: %s", err.Error()) taskOp.taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "failed to unmarshal task cmd: %s", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true) t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return return
} }
taskCmd.ImageName = fmt.Sprintf("%s-%s", taskCmd.ImageName, conf.GetConfig().OpSys) taskOp.taskCmd.ImageName = fmt.Sprintf("%s-%s", taskOp.taskCmd.ImageName, conf.GetConfig().OpSys)
log.Info("received task cmd :", taskCmd) log.Info("received task cmd :", taskOp.taskCmd)
log.WithField("t.oldTaskImageName", t.oldTaskImageName).WithField("newTaskImageName", taskCmd.ImageName).Info("task image info") log.WithField("t.oldTaskImageName", t.oldTaskImageName).WithField("newTaskImageName", taskOp.taskCmd.ImageName).Info("task image info")
if taskMsg.TaskKind != baseV1.TaskKind_StandardTask { if taskMsg.TaskKind != baseV1.TaskKind_StandardTask {
t.checkIsStopContainer(taskCmd) t.checkIsStopContainer(taskOp.taskCmd)
} }
log.Info("check is stop container finished") log.Info("check is stop container finished")
isFound, imageId := t.foundTaskImage(taskCmd) imageId := t.foundTaskImage(taskOp.taskCmd)
log.Info("found task image finished") log.Info("found task image finished")
if !isFound || imageId == "" { if imageId == "" {
log.Error("The image is not found:", taskCmd.ImageName) log.Error("The image is not found:", taskOp.taskCmd.ImageName)
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "The image is not found:", taskCmd.ImageName) taskOp.taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "The image is not found:", taskOp.taskCmd.ImageName)
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
}
err = json.Unmarshal(taskMsg.TaskParam, taskOp.taskParam)
if err != nil {
log.WithField("err", err).Error("Error unmarshalling task parameter")
taskOp.taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Error unmarshalling task parameter", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true) t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return return
} }
running, internalIp, internalPort := t.foundImageIsRunning(imageId) running, internalIp, internalPort := t.foundImageIsRunning(imageId)
if !running { if !running {
var externalPort int64 taskOp.taskCmd.DockerCmd.HostIp = models.ZeroHost
for { taskOp.taskCmd.DockerCmd.HostPort = t.getExternalPort()
// 设置种子以确保每次运行时生成不同的随机数序列 containerId, err := t.DockerOp.CreateAndStartContainer(taskOp.taskCmd.ImageName, taskOp.taskCmd.DockerCmd)
rand.Seed(time.Now().UnixNano())
// 生成一个介于 0 和 100 之间的随机整数
externalPort = rand.Int63n(10001) + 10000
log.Info("DockerOp UsedExternalPort :", t.DockerOp.UsedExternalPort[externalPort])
if t.DockerOp.UsedExternalPort[externalPort] {
continue
}
break
}
taskCmd.DockerCmd.HostIp = "0.0.0.0"
taskCmd.DockerCmd.HostPort = strconv.FormatInt(externalPort, 10)
containerId, err := t.DockerOp.CreateAndStartContainer(taskCmd.ImageName, taskCmd.DockerCmd)
if err != nil { if err != nil {
log.Errorf("Create and start container failed: %s", err.Error()) log.Errorf("Create and start container failed: %s", err.Error())
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Create and start container failed", err.Error()) taskOp.taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Create and start container failed", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true) t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return return
} }
log.Infof("Started container with ID %s", containerId) log.Infof("Started container with ID %s", containerId)
time.Sleep(time.Second * 70) if err = taskOp.waitContainerRunning(t, imageId); err != nil {
running, internalIp, internalPort = t.foundImageIsRunning(imageId) taskOp.taskExecResult.TaskExecError = fmt.Sprintf("%s", err.Error())
if running {
isMatch := strings.HasPrefix(taskCmd.ImageName, models.ReplicateImageNameSuffix)
if isMatch {
if !t.checkContainerHealthy(internalIp, internalPort, taskMsg, taskExecResult) {
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true) t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return return
} }
}
taskCmd.ApiUrl = fmt.Sprintf("http://%s:%d%s", internalIp, internalPort, taskCmd.ApiUrl)
log.Info("Container ports:", internalPort)
log.WithField("ApiUrl", taskCmd.ApiUrl).Info("The image is not running")
}
} else { } else {
taskCmd.ApiUrl = fmt.Sprintf("http://%s:%d%s", internalIp, internalPort, taskCmd.ApiUrl) taskOp.taskCmd.ApiUrl = fmt.Sprintf("http://%s:%d%s", internalIp, internalPort, taskOp.taskCmd.ApiUrl)
log.Info("Container ports:", internalPort) log.Info("Container ports:", internalPort)
log.WithField("ApiUrl", taskCmd.ApiUrl).Info("The image is running") log.WithField("ApiUrl", taskOp.taskCmd.ApiUrl).Info("The image is running")
} }
startBeforeTaskTime := time.Now() reqContainerBody := bytes.NewReader(taskOp.taskParam.Body)
taskParam := &models.TaskParam{} if len(taskOp.taskParam.Queries) > 0 {
err = json.Unmarshal(taskMsg.TaskParam, taskParam) queryString := utils.MatchContainerQueryString(taskOp.taskParam.Queries)
taskOp.taskCmd.ApiUrl = fmt.Sprintf("%s?%s", taskOp.taskCmd.ApiUrl, queryString)
log.WithField("ApiUrl", taskOp.taskCmd.ApiUrl).Info("The task param query str not empty")
}
taskOp.request, err = http.NewRequest("POST", taskOp.taskCmd.ApiUrl, reqContainerBody)
if err != nil { if err != nil {
log.WithField("err", err).Error("Error unmarshalling task parameter") log.WithField("error:", err).Error("New container request failed")
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Error unmarshalling task parameter", err.Error()) taskOp.taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Http client new container request failed", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
}
taskOp.request.Header.Set("Content-Type", "application/json")
if err = taskOp.validateWebHook(); err != nil {
taskOp.taskExecResult.TaskExecError = fmt.Sprintf("%s", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true) t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return return
} }
reqContainerBody := bytes.NewReader(taskParam.Body) if err = taskOp.waitReqContainerOk(t.DockerOp); err != nil {
if len(taskParam.Queries) > 0 { taskOp.taskExecResult.TaskExecError = fmt.Sprintf("%s", err.Error())
queryString := utils.MatchContainerQueryString(taskParam.Queries) t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
taskCmd.ApiUrl = fmt.Sprintf("%s?%s", taskCmd.ApiUrl, queryString) return
}
endAfterTaskTime := time.Since(taskOp.startBeforeTaskTime)
taskOp.taskExecResult.TaskExecTime = endAfterTaskTime.Microseconds()
log.WithField("time", endAfterTaskTime.Seconds()).WithField("taskId", taskMsg.TaskId).Info("Exec task end (second is units) :")
if taskMsg.TaskKind == baseV1.TaskKind_ComputeTask {
t.IsExecAiTask = false
} else if taskMsg.TaskKind == baseV1.TaskKind_StandardTask {
t.IsExecStandardTask = false
} }
request, err := http.NewRequest("POST", taskCmd.ApiUrl, reqContainerBody)
if err != nil {
log.WithField("error:", err).Error("New container request failed")
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Http client new container request failed", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true) t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
//log.WithField("result", taskExecResult).Info("lru cache storage task result")
log.Info("----------------------Compute task exec done--------------------------------")
}
func (t *TaskWorker) foundTaskImage(taskCmd *models.TaskCmd) (imageId string) {
images, err := t.DockerOp.PsImages()
if err != nil {
log.Error("Ps images failed:", err)
imageId = ""
return
}
isFound := false
for _, image := range images {
if isFound {
break
}
for _, tag := range image.RepoTags {
if tag == taskCmd.ImageName {
imageId = image.ID
isFound = true
log.Info("The image found success:", image.ID)
break
}
}
}
return
}
func (t *TaskWorker) foundImageIsRunning(imageId string) (bool, string, uint16) {
containers := t.DockerOp.ListContainer()
for _, container := range containers {
if container.ImageID == imageId && container.State == "running" {
networks := container.NetworkSettings.Networks
ip := ""
for _, endPoint := range networks {
ip = endPoint.IPAddress
log.Warn("Container network ip:", ip)
}
return true, ip, container.Ports[0].PrivatePort
}
}
return false, "", 0
}
func (t *TaskWorker) checkLastTaskExecStatus(taskMsg *nodeManagerV1.PushTaskMessage) {
if taskMsg.TaskKind == baseV1.TaskKind_ComputeTask {
t.IsExecAiTask = true
if t.IsExecStandardTask {
//todo: 停止标准任务容器
//containers := t.DockerOp.ListContainer()
//for _, container := range containers {
// if container.Image == taskCmd.ImageName && container.State == "running" {
// t.DockerOp.StopContainer(container.ID)
// }
//}
t.IsExecStandardTask = false
}
} else if taskMsg.TaskKind == baseV1.TaskKind_StandardTask {
t.IsExecStandardTask = true
}
if t.oldTaskId != taskMsg.TaskId {
now := time.Now()
for {
since := time.Since(now)
if int64(since.Seconds()) > conf.GetConfig().WaitLastTaskExecTime {
log.WithField("taskId", t.oldTaskId).Info("Waiting for last task execution ending")
t.oldTaskId = taskMsg.TaskId
break
}
if t.oldTaskId == "" {
t.oldTaskId = taskMsg.TaskId
break
}
value, ok := t.ExecTaskIdIsSuccess.Load(t.oldTaskId)
//log.WithField("isSuccess", value).Info("Task id exec info")
if !ok {
//log.WithField("task id", t.oldTaskId).Warn("task exec is not finished")
continue
}
isSuccess := value.(bool)
if isSuccess {
t.oldTaskId = taskMsg.TaskId
log.WithField("taskId", t.oldTaskId).Info("Task exec success")
break
}
}
}
}
func (t *TaskWorker) checkIsStopContainer(taskCmd *models.TaskCmd) {
if t.oldTaskImageName != "" && t.oldTaskImageName != taskCmd.ImageName {
//todo: 停止标准任务容器
containers := t.DockerOp.ListContainer()
for _, container := range containers {
split := strings.Split(container.Image, ":")
if len(split) == 1 {
container.Image = fmt.Sprintf("%s:%s", container.Image, "latest")
}
log.WithField("containerImageName", container.Image).WithField("t.oldTaskImageName", t.oldTaskImageName).Info("match image")
if container.Image == t.oldTaskImageName && container.State == "running" {
t.DockerOp.StopContainer(container.ID)
log.WithField("Image name", container.Image).Info("Stopping container")
//t.DockerOp.RunningImages[t.oldTaskImageName] = false
break
}
}
t.oldTaskImageName = taskCmd.ImageName
} else {
t.oldTaskImageName = taskCmd.ImageName
}
}
func (t *TaskWorker) getExternalPort() (externalPort string) {
for {
// 设置种子以确保每次运行时生成不同的随机数序列
rand.Seed(time.Now().UnixNano())
// 生成一个介于 0 和 100 之间的随机整数
externalPortInt := rand.Int63n(10001) + 10000
log.WithField("externalPortInt", externalPortInt).Info("DockerOp UsedExternalPort :", t.DockerOp.UsedExternalPort[externalPortInt])
if t.DockerOp.UsedExternalPort[externalPortInt] {
continue
}
externalPort = strconv.FormatInt(externalPortInt, 10)
break
}
return return
}
func (op *TaskOp) uploadOSS(taskId string, queries string, decodedImage []byte, suffix string) (string, error) {
var requestBody bytes.Buffer
writer := multipart.NewWriter(&requestBody)
// 创建文件表单字段
fileField, err := writer.CreateFormFile("file", fmt.Sprintf("%s.%s", taskId, suffix))
if err != nil {
log.WithError(err).Error("Error creating form file")
return "", err
}
_, err = io.Copy(fileField, bytes.NewReader(decodedImage))
//_, err = io.Copy(fileField, strings.NewReader(base64Image))
if err != nil {
log.WithError(err).Error("Error copying file contents")
return "", err
}
// 关闭 multipart writer
err = writer.Close()
if err != nil {
log.WithError(err).Error("Error closing writer")
return "", err
}
ossUrl := fmt.Sprintf("%s?%s", conf.GetConfig().OssUrl, queries)
request, err := http.NewRequest("POST", ossUrl, &requestBody)
if err != nil {
return "", err
}
request.Header.Set("Content-Type", writer.FormDataContentType())
response, err := op.httpClient.Do(request)
if err != nil {
log.WithError(err).Error("Error request oss failed")
return "", err
}
ossRespBody, err := io.ReadAll(response.Body)
if err != nil {
log.WithError(err).Error("Error read oss resp body failed")
return "", err
}
log.WithField("res", string(ossRespBody)).Info("file cache resp body")
fileCacheRes := &models.FileCacheResult{}
err = json.Unmarshal(ossRespBody, fileCacheRes)
if err != nil {
log.WithError(err).Error("Json unmarshal file cache result failed")
return "", err
}
log.WithField("code", fileCacheRes.Code).WithField("msg", fileCacheRes.Msg).WithField("data", fileCacheRes.Data).Info("file cache result")
if fileCacheRes.Code == http.StatusOK && fileCacheRes.Data != "" {
_, err := url.Parse(fileCacheRes.Data)
if err != nil {
log.WithError(err).Error("url parse file cache data error")
return "", err
}
return fileCacheRes.Data, nil
}
return "", err
}
func (op *TaskOp) getFileCache(respStr string, dockerOp *operate.DockerOp) (string, error) {
isBase64, decodeByte, respFormat, suffix := utils.IsBase64ImageStr(respStr)
log.WithField("isBase64", isBase64).Info("resp str info")
if isBase64 {
log.WithField("taskId", op.taskMsg.TaskId).WithField("format", respFormat).WithField("suffix", suffix).Info("Parse container resp")
queryString := utils.MatchFileCacheQueryString(op.taskParam.Headers, op.taskCmd.ImageName, dockerOp.ModelsInfo, respFormat)
ossUri, err := op.uploadOSS(op.taskMsg.TaskId, queryString, decodeByte, suffix)
if err != nil || ossUri == "" {
log.WithError(err).Error("upload image into file cache failed")
return "", err
}
log.WithField("uri", ossUri).Info("upload image OSS successful")
return ossUri, nil
}
return "", nil
}
func (op *TaskOp) checkContainerHealthy(internalIp string, internalPort uint16) error {
healthCheckUrl := fmt.Sprintf("http://%s:%d%s", internalIp, internalPort, models.HealthCheckAPI)
healthyCheckResp, err := op.httpClient.Get(healthCheckUrl)
if err != nil {
log.Errorf("Request container healthy failed: %s", err.Error())
return fmt.Errorf("%s-%s", "The container is not ready", err)
}
if healthyCheckResp.StatusCode == http.StatusNotFound {
return nil
}
body, err := io.ReadAll(healthyCheckResp.Body)
m := &models.HealthyCheck{}
err = json.Unmarshal(body, m)
if err != nil {
log.Errorf("Json unmarshal container healthy body failed: %s", err.Error())
return fmt.Errorf("%s,%s", "Json unmarshal container healthy body failed", err.Error())
}
if m.Status != models.READY {
log.Errorf("The container is not ready")
return fmt.Errorf("%s", "The container is not ready")
}
return nil
}
func (op *TaskOp) waitContainerRunning(handler *TaskWorker, imageId string) error {
maxExecTime, err := strconv.ParseInt(op.taskParam.Headers[models.MaxExecTime][0], 10, 64)
if err != nil {
log.Errorf("%s-%s", "Parse max exec time", err.Error())
return fmt.Errorf("%s-%s", "Parse max exec time", err.Error())
}
if maxExecTime == 0 {
maxExecTime = models.DefaultMaxExecTime
}
for {
select {
case <-op.ticker.C:
if int64(time.Since(op.startBeforeTaskTime).Seconds()) > maxExecTime-50 {
log.Errorf("%s", "The maximum execution time for this task has been exceeded")
return fmt.Errorf("%s", "The maximum execution time for this task has been exceeded")
}
running, internalIp, internalPort := handler.foundImageIsRunning(imageId)
if !running {
continue
}
if isMatch := strings.HasPrefix(op.taskCmd.ImageName, models.ReplicateImageNameSuffix); isMatch {
if err := op.checkContainerHealthy(internalIp, internalPort); err != nil {
log.WithField("err", err).Errorf("check container healthy failed")
return fmt.Errorf("%s-%s", "check container healthy failed", err.Error())
}
}
op.taskCmd.ApiUrl = fmt.Sprintf("http://%s:%d%s", internalIp, internalPort, op.taskCmd.ApiUrl)
log.Info("Container ports:", internalPort)
log.WithField("ApiUrl", op.taskCmd.ApiUrl).Info("The image is not running")
return nil
} }
request.Header.Set("Content-Type", "application/json") }
for key, value := range taskParam.Headers { }
func (op *TaskOp) validateWebHook() error {
for key, value := range op.taskParam.Headers {
if key == models.Prefer { if key == models.Prefer {
if value[0] == models.Async { if value[0] == models.Async {
request.Header.Set(models.Prefer, models.Async) op.request.Header.Set(models.Prefer, models.Async)
m := &models.ContainerRequest{} m := &models.ContainerRequest{}
err := json.Unmarshal(taskParam.Body, m) err := json.Unmarshal(op.taskParam.Body, m)
if err != nil { if err != nil {
log.WithError(err).Error("json unmarshal task body failed") log.WithError(err).Error("json unmarshal task body failed")
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Json unmarshal task body failed", err.Error()) return fmt.Errorf("%s,%s", "Json unmarshal task body failed", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
if m.WebHook == "" { if m.WebHook == "" {
log.Error("Request webhook is nil") log.Error("Request webhook is nil")
taskExecResult.TaskExecError = fmt.Sprintf("%s", "Request webhook is nil") return fmt.Errorf("%s", "Request webhook is nil")
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} else { } else {
_, err := url.Parse(m.WebHook) _, err := url.Parse(m.WebHook)
if err != nil { if err != nil {
log.WithError(err).Error("web hook url parse failed") log.WithError(err).Error("web hook url parse failed")
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Web hook url parse failed", err.Error()) return fmt.Errorf("%s,%s", "Web hook url parse failed", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
} }
break break
} }
} }
} }
post, err := t.HttpClient.Do(request) return nil
}
func (op *TaskOp) waitReqContainerOk(dockerOp *operate.DockerOp) error {
maxExecTime, err := strconv.ParseInt(op.taskParam.Headers[models.MaxExecTime][0], 10, 64)
if err != nil {
log.Errorf("%s-%s", "Parse max exec time", err.Error())
return fmt.Errorf("%s-%s", "Parse max exec time", err.Error())
}
if maxExecTime == 0 {
maxExecTime = models.DefaultMaxExecTime
}
for {
select {
case <-op.ticker.C:
if int64(time.Since(op.startBeforeTaskTime).Seconds()) > maxExecTime-50 {
log.Errorf("%s", "The maximum execution time for this task has been exceeded")
return fmt.Errorf("%s", "The maximum execution time for this task has been exceeded")
}
post, err := op.httpClient.Do(op.request)
if err != nil { if err != nil {
log.WithField("error:", err).Error("Http client post request container failed") log.WithField("error:", err).Error("Http client post request container failed")
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Http client post request container failed", err.Error()) return fmt.Errorf("%s,%s", "Http client post request container failed", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
endAfterTaskTime := time.Since(startBeforeTaskTime) log.WithField("StatusCode", post.StatusCode).WithField("taskId", op.taskMsg.TaskId).Info("Exec task result")
log.WithField("time", endAfterTaskTime.Seconds()).WithField("taskId", taskMsg.TaskId).Info("Exec task end (second is units) :")
log.WithField("StatusCode", post.StatusCode).WithField("taskId", taskMsg.TaskId).Info("Exec task result")
if post.StatusCode == http.StatusOK { if post.StatusCode == http.StatusOK {
taskExecResult.TaskHttpStatusCode = http.StatusOK op.taskExecResult.TaskHttpStatusCode = http.StatusOK
readBody, err := io.ReadAll(post.Body) readBody, err := io.ReadAll(post.Body)
if err != nil { if err != nil {
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s,Container Http Code:%d", "Read container body failed", err.Error(), post.StatusCode) log.Errorf("%s,%s,Container Http Code:%d", "Read container body failed", err.Error(), post.StatusCode)
log.Error("Read container body failed", err) return fmt.Errorf("%s,%s,Container Http Code:%d", "Read container body failed", err.Error(), post.StatusCode)
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
if op.taskMsg.TaskKind != baseV1.TaskKind_StandardTask {
if taskMsg.TaskKind != baseV1.TaskKind_StandardTask {
isUseFileCache := true isUseFileCache := true
isUseRedirect := false isUseRedirect := false
for key, value := range taskParam.Headers { for key, value := range op.taskParam.Headers {
log.WithField("key", key).WithField("val", value).Debug("Headers Info") log.WithField("key", key).WithField("val", value).Debug("Headers Info")
if key == models.UseRedirect { if key == models.UseRedirect {
log.WithField("UseRedirect", value[0]).Info("Headers info") log.WithField("UseRedirect", value[0]).Info("Headers info")
...@@ -298,10 +588,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -298,10 +588,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
isSuccess = true isSuccess = true
continue continue
} }
ossUri, err := t.getFileCache(respStr, taskMsg, taskParam, taskCmd) ossUri, err := op.getFileCache(respStr, dockerOp)
if err != nil || ossUri == "" { if err != nil || ossUri == "" {
if err != nil { if err != nil {
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Get file cache uri failed", err) op.taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Get file cache uri failed", err)
} }
if ossUri == "" { if ossUri == "" {
apiResOneArr = append(apiResOneArr, respStr) apiResOneArr = append(apiResOneArr, respStr)
...@@ -310,9 +600,9 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -310,9 +600,9 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
continue continue
} }
if isUseRedirect && ossUri != "" && len(res) == 1 && len(innerSlice) == 1 { if isUseRedirect && ossUri != "" && len(res) == 1 && len(innerSlice) == 1 {
taskExecResult.TaskHttpStatusCode = models.RedirectCode op.taskExecResult.TaskHttpStatusCode = models.RedirectCode
apiResBody := utils.EncodeJsonEscapeHTML(ossUri) apiResBody := utils.EncodeJsonEscapeHTML(ossUri)
taskExecResult.TaskRespBody = apiResBody op.taskExecResult.TaskRespBody = apiResBody
post.Header.Set("Location", ossUri) post.Header.Set("Location", ossUri)
isSuccess = true isSuccess = true
break break
...@@ -324,13 +614,11 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -324,13 +614,11 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
apiRes = append(apiRes, apiResOneArr) apiRes = append(apiRes, apiResOneArr)
} }
if !isSuccess { if !isSuccess {
taskExecResult.TaskExecError = fmt.Sprintf("%s-%s", "Container output is nil", string(readBody)) return fmt.Errorf("%s-%s", "Container output is nil", string(readBody))
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
if !isUseRedirect { if !isUseRedirect {
apiResBody := utils.EncodeJsonEscapeHTML(apiRes) apiResBody := utils.EncodeJsonEscapeHTML(apiRes)
taskExecResult.TaskRespBody = apiResBody op.taskExecResult.TaskRespBody = apiResBody
} }
} }
case []string: case []string:
...@@ -347,10 +635,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -347,10 +635,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
isSuccess = true isSuccess = true
continue continue
} }
ossUri, err := t.getFileCache(respStr, taskMsg, taskParam, taskCmd) ossUri, err := op.getFileCache(respStr, dockerOp)
if err != nil || ossUri == "" { if err != nil || ossUri == "" {
if err != nil { if err != nil {
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Get file cache uri failed", err) op.taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Get file cache uri failed", err)
} }
if ossUri == "" { if ossUri == "" {
apiRes = append(apiRes, respStr) apiRes = append(apiRes, respStr)
...@@ -359,10 +647,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -359,10 +647,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
continue continue
} }
if isUseRedirect && ossUri != "" && len(res) == 1 { if isUseRedirect && ossUri != "" && len(res) == 1 {
taskExecResult.TaskHttpStatusCode = models.RedirectCode op.taskExecResult.TaskHttpStatusCode = models.RedirectCode
post.Header.Set("Location", ossUri) post.Header.Set("Location", ossUri)
apiResBody := utils.EncodeJsonEscapeHTML(ossUri) apiResBody := utils.EncodeJsonEscapeHTML(ossUri)
taskExecResult.TaskRespBody = apiResBody op.taskExecResult.TaskRespBody = apiResBody
isSuccess = true isSuccess = true
break break
} else { } else {
...@@ -371,13 +659,11 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -371,13 +659,11 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
} }
} }
if !isSuccess { if !isSuccess {
taskExecResult.TaskExecError = fmt.Sprintf("%s-%s", "Container output is nil", string(readBody)) return fmt.Errorf("%s-%s", "Container output is nil", string(readBody))
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
if !isUseRedirect { if !isUseRedirect {
apiResBody := utils.EncodeJsonEscapeHTML(apiRes) apiResBody := utils.EncodeJsonEscapeHTML(apiRes)
taskExecResult.TaskRespBody = apiResBody op.taskExecResult.TaskRespBody = apiResBody
} }
} }
case string: case string:
...@@ -395,10 +681,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -395,10 +681,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
isSuccess = true isSuccess = true
continue continue
} }
ossUri, err := t.getFileCache(respStr, taskMsg, taskParam, taskCmd) ossUri, err := op.getFileCache(respStr, dockerOp)
if err != nil || ossUri == "" { if err != nil || ossUri == "" {
if err != nil { if err != nil {
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Get file cache uri failed", err) op.taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Get file cache uri failed", err)
} }
if ossUri == "" { if ossUri == "" {
apiRes = append(apiRes, respStr) apiRes = append(apiRes, respStr)
...@@ -407,10 +693,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -407,10 +693,10 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
continue continue
} }
if isUseRedirect && ossUri != "" && len(resArr) == 1 { if isUseRedirect && ossUri != "" && len(resArr) == 1 {
taskExecResult.TaskHttpStatusCode = models.RedirectCode op.taskExecResult.TaskHttpStatusCode = models.RedirectCode
post.Header.Set("Location", ossUri) post.Header.Set("Location", ossUri)
apiResBody := utils.EncodeJsonEscapeHTML(ossUri) apiResBody := utils.EncodeJsonEscapeHTML(ossUri)
taskExecResult.TaskRespBody = apiResBody op.taskExecResult.TaskRespBody = apiResBody
isSuccess = true isSuccess = true
break break
} else { } else {
...@@ -419,291 +705,51 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) ...@@ -419,291 +705,51 @@ func (t *TaskHandler) ComputeTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage)
} }
} }
if !isSuccess { if !isSuccess {
taskExecResult.TaskExecError = fmt.Sprintf("%s-%s", "Container output is nil", string(readBody)) return fmt.Errorf("%s-%s", "Container output is nil", string(readBody))
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
if !isUseRedirect { if !isUseRedirect {
apiResBody := utils.EncodeJsonEscapeHTML(apiRes) apiResBody := utils.EncodeJsonEscapeHTML(apiRes)
taskExecResult.TaskRespBody = apiResBody op.taskExecResult.TaskRespBody = apiResBody
} }
} }
default: default:
log.Error("data is unknown type", v) log.Error("data is unknown type", v)
taskExecResult.TaskExecError = fmt.Sprintf("%s", "Container resp data is unknown type") return fmt.Errorf("%s", "Container resp data is unknown type")
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
} else { } else {
log.Error("Container resp output is nil") log.Error("Container resp output is nil")
taskExecResult.TaskExecError = fmt.Sprintf("%s", "Container resp output is nil") op.taskExecResult.TaskRespBody = readBody
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true) return fmt.Errorf("%s", "Container resp output is nil")
taskExecResult.TaskRespBody = readBody
return
} }
} }
} }
if taskMsg.TaskKind == baseV1.TaskKind_StandardTask { if op.taskMsg.TaskKind == baseV1.TaskKind_StandardTask {
taskExecResult.TaskRespBody = readBody op.taskExecResult.TaskRespBody = readBody
} }
headers, err := json.Marshal(post.Header) headers, err := json.Marshal(post.Header)
if err != nil { if err != nil {
log.WithError(err).Error("JSON marshal container header failed") log.WithError(err).Error("JSON marshal container header failed")
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "JSON marshal container header failed", err.Error()) return fmt.Errorf("%s,%s", "JSON marshal container header failed", err.Error())
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
} }
log.WithField("headers", post.Header).Info("return task http headers") log.WithField("headers", post.Header).Info("return task http headers")
taskExecResult.TaskHttpHeaders = headers op.taskExecResult.TaskHttpHeaders = headers
taskExecResult.TaskIsSuccess = true op.taskExecResult.TaskIsSuccess = true
taskExecResult.TaskExecTime = endAfterTaskTime.Microseconds()
} else {
taskExecResult.TaskHttpStatusCode = int32(post.StatusCode)
if post.Body != nil {
all, err := io.ReadAll(post.Body)
if err != nil {
log.Error("JSON read error: ", err)
taskExecResult.TaskExecError = fmt.Sprintf("%s,Container Http Code:%d,err:%s", "Read container body failed", post.StatusCode, err)
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
return
}
if taskExecResult.TaskHttpStatusCode == http.StatusConflict {
taskExecResult.TaskExecError = fmt.Sprintf("%s,Container Http Code:%d", "Already running a prediction", post.StatusCode)
} else {
taskExecResult.TaskExecError = fmt.Sprintf("%s,Container Http Code:%d,body:%s", "Container is exec failed", post.StatusCode, string(all))
}
} else { } else {
taskExecResult.TaskExecError = fmt.Sprintf("%s,Container Http Code:%d,body:%s", "Container resp body is nil", post.StatusCode, "") op.taskExecResult.TaskHttpStatusCode = int32(post.StatusCode)
} if op.taskExecResult.TaskHttpStatusCode == http.StatusConflict {
log.WithField("error", post.Body).WithField("taskId", taskMsg.TaskId).Error("Exec task result is failed") log.Errorf("%s,Container Http Code:%d", "Already running a prediction", post.StatusCode)
}
if taskMsg.TaskKind == baseV1.TaskKind_ComputeTask {
t.IsExecAiTask = false
} else if taskMsg.TaskKind == baseV1.TaskKind_StandardTask {
t.IsExecStandardTask = false
}
t.ExecTaskIdIsSuccess.Store(taskMsg.TaskId, true)
//log.WithField("result", taskExecResult).Info("lru cache storage task result")
log.Info("received computeTask--------------------------------")
}
func (t *TaskHandler) CustomTaskHandler(taskMsg *nodeManagerV1.PushTaskMessage) {
defer t.Wg.Done()
_, err := t.DockerOp.PsImages()
if err != nil {
log.Error("custom task handler docker op ps images failed: ", err)
return
}
log.Info("received customTask--------------------------------")
}
func (t *TaskHandler) foundTaskImage(taskCmd *models.TaskCmd) (isSuccess bool, imageId string) {
images, err := t.DockerOp.PsImages()
if err != nil {
log.Error("Ps images failed:", err)
isSuccess = false
imageId = ""
return
}
isFound := false
for _, image := range images {
if isFound {
break
}
for _, tag := range image.RepoTags {
if tag == taskCmd.ImageName {
imageId = image.ID
isFound = true
log.Info("The image found success:", image.ID)
break
}
}
}
isSuccess = isFound
return
}
func (t *TaskHandler) foundImageIsRunning(imageId string) (bool, string, uint16) {
containers := t.DockerOp.ListContainer()
for _, container := range containers {
if container.ImageID == imageId && container.State == "running" {
networks := container.NetworkSettings.Networks
ip := ""
for _, endPoint := range networks {
ip = endPoint.IPAddress
log.Warn("Container network ip:", ip)
}
return true, ip, container.Ports[0].PrivatePort
}
}
return false, "", 0
}
func (t *TaskHandler) uploadOSS(taskId string, queries string, decodedImage []byte, suffix string) (string, error) {
var requestBody bytes.Buffer
writer := multipart.NewWriter(&requestBody)
// 创建文件表单字段
fileField, err := writer.CreateFormFile("file", fmt.Sprintf("%s.%s", taskId, suffix))
if err != nil {
log.WithError(err).Error("Error creating form file")
return "", err
}
_, err = io.Copy(fileField, bytes.NewReader(decodedImage))
//_, err = io.Copy(fileField, strings.NewReader(base64Image))
if err != nil {
log.WithError(err).Error("Error copying file contents")
return "", err
}
// 关闭 multipart writer
err = writer.Close()
if err != nil {
log.WithError(err).Error("Error closing writer")
return "", err
}
ossUrl := fmt.Sprintf("%s?%s", conf.GetConfig().OssUrl, queries)
request, err := http.NewRequest("POST", ossUrl, &requestBody)
if err != nil {
return "", err
}
request.Header.Set("Content-Type", writer.FormDataContentType())
response, err := t.HttpClient.Do(request)
if err != nil {
log.WithError(err).Error("Error request oss failed")
return "", err
}
ossRespBody, err := io.ReadAll(response.Body)
if err != nil {
log.WithError(err).Error("Error read oss resp body failed")
return "", err
}
log.WithField("res", string(ossRespBody)).Info("file cache resp body")
fileCacheRes := &models.FileCacheResult{}
err = json.Unmarshal(ossRespBody, fileCacheRes)
if err != nil {
log.WithError(err).Error("Json unmarshal file cache result failed")
return "", err
}
log.WithField("code", fileCacheRes.Code).WithField("msg", fileCacheRes.Msg).WithField("data", fileCacheRes.Data).Info("file cache result")
if fileCacheRes.Code == http.StatusOK && fileCacheRes.Data != "" {
_, err := url.Parse(fileCacheRes.Data)
if err != nil {
log.WithError(err).Error("url parse file cache data error")
return "", err
}
return fileCacheRes.Data, nil
}
return "", err
}
func (t *TaskHandler) getFileCache(respStr string, taskMsg *nodeManagerV1.PushTaskMessage, taskParam *models.TaskParam, taskCmd *models.TaskCmd) (string, error) {
isBase64, decodeByte, respFormat, suffix := utils.IsBase64ImageStr(respStr)
log.WithField("isBase64", isBase64).Info("resp str info")
if isBase64 {
log.WithField("taskId", taskMsg.TaskId).WithField("format", respFormat).WithField("suffix", suffix).Info("Parse container resp")
queryString := utils.MatchFileCacheQueryString(taskParam.Headers, taskCmd.ImageName, t.DockerOp.ModelsInfo, respFormat)
ossUri, err := t.uploadOSS(taskMsg.TaskId, queryString, decodeByte, suffix)
if err != nil || ossUri == "" {
log.WithError(err).Error("upload image into file cache failed")
return "", err
}
log.WithField("uri", ossUri).Info("upload image OSS successful")
return ossUri, nil
}
return "", nil
}
func (t *TaskHandler) checkLastTaskExecStatus(taskMsg *nodeManagerV1.PushTaskMessage) {
if taskMsg.TaskKind == baseV1.TaskKind_ComputeTask {
t.IsExecAiTask = true
if t.IsExecStandardTask {
//todo: 停止标准任务容器
//containers := t.DockerOp.ListContainer()
//for _, container := range containers {
// if container.Image == taskCmd.ImageName && container.State == "running" {
// t.DockerOp.StopContainer(container.ID)
// }
//}
t.IsExecStandardTask = false
}
} else if taskMsg.TaskKind == baseV1.TaskKind_StandardTask {
t.IsExecStandardTask = true
}
if t.oldTaskId != taskMsg.TaskId {
now := time.Now()
for {
since := time.Since(now)
if int64(since.Seconds()) > conf.GetConfig().WaitLastTaskExecTime {
log.WithField("taskId", t.oldTaskId).Info("Waiting for last task execution ending")
t.oldTaskId = taskMsg.TaskId
break
}
if t.oldTaskId == "" {
t.oldTaskId = taskMsg.TaskId
break
}
value, ok := t.ExecTaskIdIsSuccess.Load(t.oldTaskId)
//log.WithField("isSuccess", value).Info("Task id exec info")
if !ok {
//log.WithField("task id", t.oldTaskId).Warn("task exec is not finished")
continue continue
} }
isSuccess := value.(bool) log.WithField("taskId", op.taskMsg.TaskId).Error("Exec task result is failed")
if isSuccess { if post.Body != nil {
t.oldTaskId = taskMsg.TaskId all, _ := io.ReadAll(post.Body)
log.WithField("taskId", t.oldTaskId).Info("Task exec success") return fmt.Errorf("%s,Container Http Code:%d,body:%s", "Container is exec failed", post.StatusCode, string(all))
break } else {
} return fmt.Errorf("%s,Container Http Code:%d,body:%s", "Container resp body is nil", post.StatusCode, "")
}
}
}
func (t *TaskHandler) checkContainerHealthy(internalIp string, internalPort uint16, taskMsg *nodeManagerV1.PushTaskMessage, taskExecResult *models.TaskResult) bool {
healthCheckUrl := fmt.Sprintf("http://%s:%d%s", internalIp, internalPort, models.HealthCheckAPI)
healthyCheckResp, err := t.HttpClient.Get(healthCheckUrl)
if err != nil {
log.Errorf("Request container healthy failed: %s", err.Error())
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Request container healthy failed", err.Error())
return false
}
if healthyCheckResp.StatusCode == http.StatusNotFound {
return true
}
body, err := io.ReadAll(healthyCheckResp.Body)
m := &models.HealthyCheck{}
err = json.Unmarshal(body, m)
if err != nil {
log.Errorf("Json unmarshal container healthy body failed: %s", err.Error())
taskExecResult.TaskExecError = fmt.Sprintf("%s,%s", "Json unmarshal container healthy body failed", err.Error())
return false
}
if m.Status != models.READY {
log.Errorf("The container is not ready")
taskExecResult.TaskExecError = fmt.Sprintf("%s", "The container is not ready")
return false
}
return true
}
func (t *TaskHandler) checkIsStopContainer(taskCmd *models.TaskCmd) {
if t.oldTaskImageName != "" && t.oldTaskImageName != taskCmd.ImageName {
//todo: 停止标准任务容器
containers := t.DockerOp.ListContainer()
for _, container := range containers {
split := strings.Split(container.Image, ":")
if len(split) == 1 {
container.Image = fmt.Sprintf("%s:%s", container.Image, "latest")
} }
log.WithField("containerImageName", container.Image).WithField("t.oldTaskImageName", t.oldTaskImageName).Info("match image")
if container.Image == t.oldTaskImageName && container.State == "running" {
t.DockerOp.StopContainer(container.ID)
log.WithField("Image name", container.Image).Info("Stopping container")
//t.DockerOp.RunningImages[t.oldTaskImageName] = false
break
} }
return nil
} }
t.oldTaskImageName = taskCmd.ImageName
} else {
t.oldTaskImageName = taskCmd.ImageName
} }
} }
......
...@@ -111,12 +111,11 @@ func TestTaskHandler_computeTaskHandler(t1 *testing.T) { ...@@ -111,12 +111,11 @@ func TestTaskHandler_computeTaskHandler(t1 *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t1.Run(tt.name, func(t1 *testing.T) { t1.Run(tt.name, func(t1 *testing.T) {
t := &nm.TaskHandler{ t := &nm.TaskWorker{
Wg: tt.fields.wg, Wg: tt.fields.wg,
LruCache: tt.fields.lruCache, LruCache: tt.fields.lruCache,
DockerOp: tt.fields.DockerOp, DockerOp: tt.fields.DockerOp,
TaskMsg: tt.fields.TaskMsg, TaskMsg: tt.fields.TaskMsg,
HttpClient: tt.fields.HttpClient,
} }
tt.fields.wg.Add(1) tt.fields.wg.Add(1)
t.ComputeTaskHandler(tt.args.taskMsg) t.ComputeTaskHandler(tt.args.taskMsg)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment