Commit d04e85f4 authored by Carlos Bermudez Porto's avatar Carlos Bermudez Porto Committed by GitHub

feat: add generic prometheus endpoints (#209)

- feat: add execution and additional metrics jobs to prometheus service
- feat: add beacon metrics gazer to prometheus
- fix: ignore not defined metrics info
parent 251b34f7
......@@ -83,6 +83,7 @@ def run(plan, args={}):
prometheus_config_template = read_file(
static_files.PROMETHEUS_CONFIG_TEMPLATE_FILEPATH
)
prometheus_additional_metrics_jobs = []
plan.print("Read the prometheus, grafana templates")
......@@ -229,6 +230,7 @@ def run(plan, args={}):
if not args_with_right_defaults.launch_additional_services:
return
launch_prometheus_grafana = False
for additional_service in args_with_right_defaults.additional_services:
if additional_service == "tx_spammer":
plan.print("Launching transaction spammer")
......@@ -283,12 +285,18 @@ def run(plan, args={}):
beacon_metrics_gazer_config_template = read_file(
static_files.BEACON_METRICS_GAZER_CONFIG_TEMPLATE_FILEPATH
)
beacon_metrics_gazer.launch_beacon_metrics_gazer(
plan,
beacon_metrics_gazer_config_template,
all_cl_client_contexts,
args_with_right_defaults.participants,
network_params,
beacon_metrics_gazer_prometheus_metrics_job = (
beacon_metrics_gazer.launch_beacon_metrics_gazer(
plan,
beacon_metrics_gazer_config_template,
all_cl_client_contexts,
args_with_right_defaults.participants,
network_params,
)
)
launch_prometheus_grafana = True
prometheus_additional_metrics_jobs.append(
beacon_metrics_gazer_prometheus_metrics_job
)
plan.print("Succesfully launched beacon metrics gazer")
elif additional_service == "light_beaconchain_explorer":
......@@ -301,25 +309,28 @@ def run(plan, args={}):
)
plan.print("Succesfully light-beaconchain-explorer")
elif additional_service == "prometheus_grafana":
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
prometheus_config_template,
all_cl_client_contexts,
all_el_client_contexts,
)
plan.print("Successfully launched Prometheus")
plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
grafana_datasource_config_template,
grafana_dashboards_config_template,
prometheus_private_url,
)
plan.print("Succesfully launched grafana")
# Allow prometheus to be launched last so is able to collect metrics from other services
launch_prometheus_grafana = True
else:
fail("Invalid additional service %s" % (additional_service))
if launch_prometheus_grafana:
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
prometheus_config_template,
all_el_client_contexts,
all_cl_client_contexts,
prometheus_additional_metrics_jobs,
)
plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
grafana_datasource_config_template,
grafana_dashboards_config_template,
prometheus_private_url,
)
plan.print("Succesfully launched grafana")
if args_with_right_defaults.wait_for_finalization:
plan.print("Waiting for the first finalized epoch")
......
shared_utils = import_module(
"github.com/kurtosis-tech/ethereum-package/src/shared_utils/shared_utils.star"
)
prometheus = import_module(
"github.com/kurtosis-tech/ethereum-package/src/prometheus/prometheus_launcher.star"
)
SERVICE_NAME = "beacon-metrics-gazer"
......@@ -9,6 +12,8 @@ IMAGE_NAME = "ethpandaops/beacon-metrics-gazer:master"
HTTP_PORT_ID = "http"
HTTP_PORT_NUMBER = 8080
METRICS_PATH = "/metrics"
BEACON_METRICS_GAZER_CONFIG_FILENAME = "validator-ranges.yaml"
BEACON_METRICS_GAZER_CONFIG_MOUNT_DIRPATH_ON_SERVICE = "/config"
......@@ -59,7 +64,18 @@ def launch_beacon_metrics_gazer(
cl_client_contexts[0].http_port_num,
)
plan.add_service(SERVICE_NAME, config)
beacon_metrics_gazer_service = plan.add_service(SERVICE_NAME, config)
return prometheus.new_metrics_job(
job_name=SERVICE_NAME,
endpoint="{0}:{1}".format(
beacon_metrics_gazer_service.ip_address, HTTP_PORT_NUMBER
),
metrics_path=METRICS_PATH,
labels={
"service": SERVICE_NAME,
},
)
def get_config(config_files_artifact_name, ip_addr, http_port_num):
......
......@@ -4,6 +4,14 @@ shared_utils = import_module(
SERVICE_NAME = "prometheus"
EXECUTION_CLIENT_TYPE = "execution"
BEACON_CLIENT_TYPE = "beacon"
VALIDATOR_CLIENT_TYPE = "validator"
METRICS_INFO_NAME_KEY = "name"
METRICS_INFO_URL_KEY = "url"
METRICS_INFO_PATH_KEY = "path"
# TODO(old) I'm not sure if we should use latest version or ping an specific version instead
IMAGE_NAME = "prom/prometheus:latest"
......@@ -22,17 +30,18 @@ USED_PORTS = {
}
def launch_prometheus(plan, config_template, cl_client_contexts, el_client_contexts):
all_nodes_metrics_info = []
for client in cl_client_contexts:
all_nodes_metrics_info.extend(client.cl_nodes_metrics_info)
for client in el_client_contexts:
# etheruemjs doesn't populate metrics just yet
if client.el_metrics_info != [None]:
all_nodes_metrics_info.extend(client.el_metrics_info)
template_data = new_config_template_data(all_nodes_metrics_info)
def launch_prometheus(
plan,
config_template,
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
):
template_data = new_config_template_data(
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
)
template_and_data = shared_utils.new_template_and_data(
config_template, template_data
)
......@@ -75,5 +84,87 @@ def get_config(config_files_artifact_name):
)
def new_config_template_data(cl_nodes_metrics_info):
return {"CLNodesMetricsInfo": cl_nodes_metrics_info}
def new_config_template_data(
el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
):
metrics_jobs = []
# Adding execution clients metrics jobs
for context in el_client_contexts:
if len(context.el_metrics_info) >= 1 and context.el_metrics_info[0] != None:
execution_metrics_info = context.el_metrics_info[0]
metrics_jobs.append(
new_metrics_job(
job_name=execution_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=execution_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=execution_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.service_name,
"client_type": EXECUTION_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
# Adding consensus clients metrics jobs
for context in cl_client_contexts:
if (
len(context.cl_nodes_metrics_info) >= 1
and context.cl_nodes_metrics_info[0] != None
):
# Adding beacon node metrics
beacon_metrics_info = context.cl_nodes_metrics_info[0]
metrics_jobs.append(
new_metrics_job(
job_name=beacon_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=beacon_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=beacon_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.beacon_service_name,
"client_type": BEACON_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
if (
len(context.cl_nodes_metrics_info) >= 2
and context.cl_nodes_metrics_info[1] != None
):
# Adding validator node metrics
validator_metrics_info = context.cl_nodes_metrics_info[1]
metrics_jobs.append(
new_metrics_job(
job_name=validator_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=validator_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=validator_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.validator_service_name,
"client_type": VALIDATOR_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
# Adding additional metrics jobs
for job in additional_metrics_jobs:
if job == None:
continue
metrics_jobs.append(job)
return {
"MetricsJobs": metrics_jobs,
}
def new_metrics_job(
job_name,
endpoint,
metrics_path,
labels,
scrape_interval="15s",
):
return {
"Name": job_name,
"Endpoint": endpoint,
"MetricsPath": metrics_path,
"Labels": labels,
"ScrapeInterval": scrape_interval,
}
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_interval: 15s
scrape_configs:
{{ range $clNode := .CLNodesMetricsInfo }}
- job_name: '{{ $clNode.name }}'
metrics_path: {{ $clNode.path }}
static_configs:
- targets: ['{{ $clNode.url }}']
{{ end }}
- job_name: 'beacon-metrics-gazer'
metrics_path: '/metrics'
static_configs:
- targets: ['beacon-metrics-gazer:8080']
{{- range $job := .MetricsJobs }}
- job_name: "{{ $job.Name }}"
metrics_path: "{{ $job.MetricsPath }}"
{{- if $job.ScrapeInterval }}
scrape_interval: {{ $job.ScrapeInterval }}
{{- end }}
static_configs:
- targets: ['{{ $job.Endpoint }}']
labels:{{ range $labelName, $labelValue := $job.Labels }}
{{ $labelName }}: "{{ $labelValue }}"
{{- end }}
{{- end }}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment