Commit 3fff37f1 authored by Your Name's avatar Your Name

update to monitor.dcgm:v.0.0.8

parent a9dd8628
......@@ -44,6 +44,9 @@ func CpuSmi() (Cpu, error) {
}
percent, err := cpu.Percent(time.Second, false)
if err != nil {
return Cpu{}, err
}
percentAsInt := int64(0)
......@@ -55,6 +58,10 @@ func CpuSmi() (Cpu, error) {
c, err := cpu.Counts(false)
if err != nil {
return Cpu{}, err
}
return Cpu{
Model: model,
Number: int64(len(physicalMap)),
......
......@@ -2,7 +2,7 @@ version: "3.9"
services:
monitor.dcgm:
image: docker.ai.17xuexi.com/monitorapi/monitor.dcgm:v.0.0.7
image: docker.ai.17xuexi.com/monitorapi/monitor.dcgm:v.0.0.8
ports:
- 9093:9090
- 4000:4000
......
......@@ -19,3 +19,4 @@ services:
#docker run --rm -it --net=host nicolaka/netshoot nc -lkv 0.0.0.0 8000
......@@ -117,10 +117,10 @@ func (c *ProApi) GpuUtil(inGpus map[string]Gpu) (map[string]Gpu, error) {
// outGpus := make(map[string]Gpu, 8)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(5*time.Second))
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -241,10 +241,10 @@ func (c *ProApi) GpuPowerUsage(inGpus map[string]Gpu) (map[string]Gpu, error) {
// outGpus := make(map[string]Gpu, 8)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_POWER_USAGE", time.Now(), v1.WithTimeout(5*time.Second))
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_POWER_USAGE", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -301,10 +301,10 @@ func (c *ProApi) GpuPowerUsage(inGpus map[string]Gpu) (map[string]Gpu, error) {
func (c *ProApi) GpuMemFree(inGpus map[string]Gpu) (map[string]Gpu, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_FB_FREE", time.Now(), v1.WithTimeout(5*time.Second))
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_FB_FREE", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -361,10 +361,10 @@ func (c *ProApi) GpuMemFree(inGpus map[string]Gpu) (map[string]Gpu, error) {
func (c *ProApi) GpuMemTotal(inGpus map[string]Gpu) (map[string]Gpu, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_FB_USED+DCGM_FI_DEV_FB_FREE", time.Now(), v1.WithTimeout(5*time.Second))
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_FB_USED+DCGM_FI_DEV_FB_FREE", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -421,10 +421,10 @@ func (c *ProApi) GpuMemTotal(inGpus map[string]Gpu) (map[string]Gpu, error) {
func (c *ProApi) GpuTemp(inGpus map[string]Gpu) (map[string]Gpu, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_TEMP", time.Now(), v1.WithTimeout(5*time.Second))
gpuResult, gpuWarnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_TEMP", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -483,10 +483,10 @@ func (c *ProApi) GpuTemp(inGpus map[string]Gpu) (map[string]Gpu, error) {
func (c *ProApi) GpuPowerUsageOld() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_POWER_USAGE", time.Now(), v1.WithTimeout(5*time.Second))
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_POWER_USAGE", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -534,10 +534,10 @@ func (c *ProApi) GpuPowerUsageOld() ([]DeviceInfo, error) {
func (c *ProApi) GpuMemTemp() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_MEMORY_TEMP", time.Now(), v1.WithTimeout(5*time.Second))
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_MEMORY_TEMP", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -585,10 +585,10 @@ func (c *ProApi) GpuMemTemp() ([]DeviceInfo, error) {
func (c *ProApi) GpuTempOld() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_TEMP", time.Now(), v1.WithTimeout(5*time.Second))
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_TEMP", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -636,10 +636,10 @@ func (c *ProApi) GpuTempOld() ([]DeviceInfo, error) {
func (c *ProApi) GpuUtilOld() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(5*time.Second))
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -688,10 +688,10 @@ func (c *ProApi) GpuUtilOld() ([]DeviceInfo, error) {
// DCGM_FI_DEV_MEM_COPY_UTIL
func (c *ProApi) GpuMemUtilOld() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_MEM_COPY_UTIL", time.Now(), v1.WithTimeout(5*time.Second))
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_MEM_COPY_UTIL", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......@@ -739,10 +739,10 @@ func (c *ProApi) GpuMemUtilOld() ([]DeviceInfo, error) {
func (c *ProApi) GpuInfo() ([]DeviceInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(5*time.Second))
result, warnings, err := c.API.Query(ctx, "DCGM_FI_DEV_GPU_UTIL", time.Now(), v1.WithTimeout(time.Second))
if err != nil {
return nil, err
}
......
package main
func main() {
//DCGM()
Smi()
DCGM()
//Smi()
}
global:
scrape_interval: 10s
evaluation_interval: 10s
scrape_interval: 1s
evaluation_interval: 1s
scrape_configs:
- job_name: prometheus
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment