Commit 3fff37f1 authored by Your Name's avatar Your Name

update to monitor.dcgm:v.0.0.8

parent a9dd8628
...@@ -44,6 +44,9 @@ func CpuSmi() (Cpu, error) { ...@@ -44,6 +44,9 @@ func CpuSmi() (Cpu, error) {
} }
percent, err := cpu.Percent(time.Second, false) percent, err := cpu.Percent(time.Second, false)
if err != nil {
return Cpu{}, err
}
percentAsInt := int64(0) percentAsInt := int64(0)
...@@ -55,6 +58,10 @@ func CpuSmi() (Cpu, error) { ...@@ -55,6 +58,10 @@ func CpuSmi() (Cpu, error) {
c, err := cpu.Counts(false) c, err := cpu.Counts(false)
if err != nil {
return Cpu{}, err
}
return Cpu{ return Cpu{
Model: model, Model: model,
Number: int64(len(physicalMap)), Number: int64(len(physicalMap)),
......
...@@ -2,7 +2,7 @@ version: "3.9" ...@@ -2,7 +2,7 @@ version: "3.9"
services: services:
monitor.dcgm: monitor.dcgm:
image: docker.ai.17xuexi.com/monitorapi/monitor.dcgm:v.0.0.7 image: docker.ai.17xuexi.com/monitorapi/monitor.dcgm:v.0.0.8
ports: ports:
- 9093:9090 - 9093:9090
- 4000:4000 - 4000:4000
......
...@@ -18,4 +18,5 @@ services: ...@@ -18,4 +18,5 @@ services:
restart: unless-stopped restart: unless-stopped
#docker run --rm -it --net=host nicolaka/netshoot nc -lkv 0.0.0.0 8000 #docker run --rm -it --net=host nicolaka/netshoot nc -lkv 0.0.0.0 8000
\ No newline at end of file
...@@ -117,10 +117,10 @@ func (c *ProApi) GpuUtil(inGpus map[string]Gpu) (map[string]Gpu, error) { ...@@ -117,10 +117,10 @@ func (c *ProApi) GpuUtil(inGpus map[string]Gpu) (map[string]Gpu, error) {
// outGpus := make(map[string]Gpu, 8) // outGpus := make(map[string]Gpu, 8)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(5*time.Second)) gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -241,10 +241,10 @@ func (c *ProApi) GpuPowerUsage(inGpus map[string]Gpu) (map[string]Gpu, error) { ...@@ -241,10 +241,10 @@ func (c *ProApi) GpuPowerUsage(inGpus map[string]Gpu) (map[string]Gpu, error) {
// outGpus := make(map[string]Gpu, 8) // outGpus := make(map[string]Gpu, 8)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_POWER_USAGE", time.Now(), v1.WithTimeout(5*time.Second)) gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_POWER_USAGE", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -301,10 +301,10 @@ func (c *ProApi) GpuPowerUsage(inGpus map[string]Gpu) (map[string]Gpu, error) { ...@@ -301,10 +301,10 @@ func (c *ProApi) GpuPowerUsage(inGpus map[string]Gpu) (map[string]Gpu, error) {
func (c *ProApi) GpuMemFree(inGpus map[string]Gpu) (map[string]Gpu, error) { func (c *ProApi) GpuMemFree(inGpus map[string]Gpu) (map[string]Gpu, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_FB_FREE", time.Now(), v1.WithTimeout(5*time.Second)) gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_FB_FREE", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -361,10 +361,10 @@ func (c *ProApi) GpuMemFree(inGpus map[string]Gpu) (map[string]Gpu, error) { ...@@ -361,10 +361,10 @@ func (c *ProApi) GpuMemFree(inGpus map[string]Gpu) (map[string]Gpu, error) {
func (c *ProApi) GpuMemTotal(inGpus map[string]Gpu) (map[string]Gpu, error) { func (c *ProApi) GpuMemTotal(inGpus map[string]Gpu) (map[string]Gpu, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_FB_USED+DCGM_FI_DEV_FB_FREE", time.Now(), v1.WithTimeout(5*time.Second)) gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_FB_USED+DCGM_FI_DEV_FB_FREE", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -421,10 +421,10 @@ func (c *ProApi) GpuMemTotal(inGpus map[string]Gpu) (map[string]Gpu, error) { ...@@ -421,10 +421,10 @@ func (c *ProApi) GpuMemTotal(inGpus map[string]Gpu) (map[string]Gpu, error) {
func (c *ProApi) GpuTemp(inGpus map[string]Gpu) (map[string]Gpu, error) { func (c *ProApi) GpuTemp(inGpus map[string]Gpu) (map[string]Gpu, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_TEMP", time.Now(), v1.WithTimeout(5*time.Second)) gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_TEMP", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -483,10 +483,10 @@ func (c *ProApi) GpuTemp(inGpus map[string]Gpu) (map[string]Gpu, error) { ...@@ -483,10 +483,10 @@ func (c *ProApi) GpuTemp(inGpus map[string]Gpu) (map[string]Gpu, error) {
func (c *ProApi) GpuPowerUsageOld() ([]DeviceInfo, error) { func (c *ProApi) GpuPowerUsageOld() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_POWER_USAGE", time.Now(), v1.WithTimeout(5*time.Second)) result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_POWER_USAGE", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -534,10 +534,10 @@ func (c *ProApi) GpuPowerUsageOld() ([]DeviceInfo, error) { ...@@ -534,10 +534,10 @@ func (c *ProApi) GpuPowerUsageOld() ([]DeviceInfo, error) {
func (c *ProApi) GpuMemTemp() ([]DeviceInfo, error) { func (c *ProApi) GpuMemTemp() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_MEMORY_TEMP", time.Now(), v1.WithTimeout(5*time.Second)) result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_MEMORY_TEMP", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -585,10 +585,10 @@ func (c *ProApi) GpuMemTemp() ([]DeviceInfo, error) { ...@@ -585,10 +585,10 @@ func (c *ProApi) GpuMemTemp() ([]DeviceInfo, error) {
func (c *ProApi) GpuTempOld() ([]DeviceInfo, error) { func (c *ProApi) GpuTempOld() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_TEMP", time.Now(), v1.WithTimeout(5*time.Second)) result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_TEMP", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -636,10 +636,10 @@ func (c *ProApi) GpuTempOld() ([]DeviceInfo, error) { ...@@ -636,10 +636,10 @@ func (c *ProApi) GpuTempOld() ([]DeviceInfo, error) {
func (c *ProApi) GpuUtilOld() ([]DeviceInfo, error) { func (c *ProApi) GpuUtilOld() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(5*time.Second)) result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -688,10 +688,10 @@ func (c *ProApi) GpuUtilOld() ([]DeviceInfo, error) { ...@@ -688,10 +688,10 @@ func (c *ProApi) GpuUtilOld() ([]DeviceInfo, error) {
// DCGM_FI_DEV_MEM_COPY_UTIL // DCGM_FI_DEV_MEM_COPY_UTIL
func (c *ProApi) GpuMemUtilOld() ([]DeviceInfo, error) { func (c *ProApi) GpuMemUtilOld() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_MEM_COPY_UTIL", time.Now(), v1.WithTimeout(5*time.Second)) result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_MEM_COPY_UTIL", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -739,10 +739,10 @@ func (c *ProApi) GpuMemUtilOld() ([]DeviceInfo, error) { ...@@ -739,10 +739,10 @@ func (c *ProApi) GpuMemUtilOld() ([]DeviceInfo, error) {
func (c *ProApi) GpuInfo() ([]DeviceInfo, error) { func (c *ProApi) GpuInfo() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel() defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(5*time.Second)) result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(time.Second))
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
package main package main
func main() { func main() {
//DCGM() DCGM()
Smi() //Smi()
} }
global: global:
scrape_interval: 10s scrape_interval: 1s
evaluation_interval: 10s evaluation_interval: 1s
scrape_configs: scrape_configs:
- job_name: prometheus - job_name: prometheus
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment