Commit d04e85f4 authored by Carlos Bermudez Porto's avatar Carlos Bermudez Porto Committed by GitHub

feat: add generic prometheus endpoints (#209)

- feat: add execution and additional metrics jobs to prometheus service
- feat: add beacon metrics gazer to prometheus
- fix: ignore not defined metrics info
parent 251b34f7
...@@ -83,6 +83,7 @@ def run(plan, args={}): ...@@ -83,6 +83,7 @@ def run(plan, args={}):
prometheus_config_template = read_file( prometheus_config_template = read_file(
static_files.PROMETHEUS_CONFIG_TEMPLATE_FILEPATH static_files.PROMETHEUS_CONFIG_TEMPLATE_FILEPATH
) )
prometheus_additional_metrics_jobs = []
plan.print("Read the prometheus, grafana templates") plan.print("Read the prometheus, grafana templates")
...@@ -229,6 +230,7 @@ def run(plan, args={}): ...@@ -229,6 +230,7 @@ def run(plan, args={}):
if not args_with_right_defaults.launch_additional_services: if not args_with_right_defaults.launch_additional_services:
return return
launch_prometheus_grafana = False
for additional_service in args_with_right_defaults.additional_services: for additional_service in args_with_right_defaults.additional_services:
if additional_service == "tx_spammer": if additional_service == "tx_spammer":
plan.print("Launching transaction spammer") plan.print("Launching transaction spammer")
...@@ -283,12 +285,18 @@ def run(plan, args={}): ...@@ -283,12 +285,18 @@ def run(plan, args={}):
beacon_metrics_gazer_config_template = read_file( beacon_metrics_gazer_config_template = read_file(
static_files.BEACON_METRICS_GAZER_CONFIG_TEMPLATE_FILEPATH static_files.BEACON_METRICS_GAZER_CONFIG_TEMPLATE_FILEPATH
) )
beacon_metrics_gazer.launch_beacon_metrics_gazer( beacon_metrics_gazer_prometheus_metrics_job = (
plan, beacon_metrics_gazer.launch_beacon_metrics_gazer(
beacon_metrics_gazer_config_template, plan,
all_cl_client_contexts, beacon_metrics_gazer_config_template,
args_with_right_defaults.participants, all_cl_client_contexts,
network_params, args_with_right_defaults.participants,
network_params,
)
)
launch_prometheus_grafana = True
prometheus_additional_metrics_jobs.append(
beacon_metrics_gazer_prometheus_metrics_job
) )
plan.print("Succesfully launched beacon metrics gazer") plan.print("Succesfully launched beacon metrics gazer")
elif additional_service == "light_beaconchain_explorer": elif additional_service == "light_beaconchain_explorer":
...@@ -301,25 +309,28 @@ def run(plan, args={}): ...@@ -301,25 +309,28 @@ def run(plan, args={}):
) )
plan.print("Succesfully light-beaconchain-explorer") plan.print("Succesfully light-beaconchain-explorer")
elif additional_service == "prometheus_grafana": elif additional_service == "prometheus_grafana":
plan.print("Launching prometheus...") # Allow prometheus to be launched last so is able to collect metrics from other services
prometheus_private_url = prometheus.launch_prometheus( launch_prometheus_grafana = True
plan,
prometheus_config_template,
all_cl_client_contexts,
all_el_client_contexts,
)
plan.print("Successfully launched Prometheus")
plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
grafana_datasource_config_template,
grafana_dashboards_config_template,
prometheus_private_url,
)
plan.print("Succesfully launched grafana")
else: else:
fail("Invalid additional service %s" % (additional_service)) fail("Invalid additional service %s" % (additional_service))
if launch_prometheus_grafana:
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
prometheus_config_template,
all_el_client_contexts,
all_cl_client_contexts,
prometheus_additional_metrics_jobs,
)
plan.print("Launching grafana...")
grafana.launch_grafana(
plan,
grafana_datasource_config_template,
grafana_dashboards_config_template,
prometheus_private_url,
)
plan.print("Succesfully launched grafana")
if args_with_right_defaults.wait_for_finalization: if args_with_right_defaults.wait_for_finalization:
plan.print("Waiting for the first finalized epoch") plan.print("Waiting for the first finalized epoch")
......
shared_utils = import_module( shared_utils = import_module(
"github.com/kurtosis-tech/ethereum-package/src/shared_utils/shared_utils.star" "github.com/kurtosis-tech/ethereum-package/src/shared_utils/shared_utils.star"
) )
prometheus = import_module(
"github.com/kurtosis-tech/ethereum-package/src/prometheus/prometheus_launcher.star"
)
SERVICE_NAME = "beacon-metrics-gazer" SERVICE_NAME = "beacon-metrics-gazer"
...@@ -9,6 +12,8 @@ IMAGE_NAME = "ethpandaops/beacon-metrics-gazer:master" ...@@ -9,6 +12,8 @@ IMAGE_NAME = "ethpandaops/beacon-metrics-gazer:master"
HTTP_PORT_ID = "http" HTTP_PORT_ID = "http"
HTTP_PORT_NUMBER = 8080 HTTP_PORT_NUMBER = 8080
METRICS_PATH = "/metrics"
BEACON_METRICS_GAZER_CONFIG_FILENAME = "validator-ranges.yaml" BEACON_METRICS_GAZER_CONFIG_FILENAME = "validator-ranges.yaml"
BEACON_METRICS_GAZER_CONFIG_MOUNT_DIRPATH_ON_SERVICE = "/config" BEACON_METRICS_GAZER_CONFIG_MOUNT_DIRPATH_ON_SERVICE = "/config"
...@@ -59,7 +64,18 @@ def launch_beacon_metrics_gazer( ...@@ -59,7 +64,18 @@ def launch_beacon_metrics_gazer(
cl_client_contexts[0].http_port_num, cl_client_contexts[0].http_port_num,
) )
plan.add_service(SERVICE_NAME, config) beacon_metrics_gazer_service = plan.add_service(SERVICE_NAME, config)
return prometheus.new_metrics_job(
job_name=SERVICE_NAME,
endpoint="{0}:{1}".format(
beacon_metrics_gazer_service.ip_address, HTTP_PORT_NUMBER
),
metrics_path=METRICS_PATH,
labels={
"service": SERVICE_NAME,
},
)
def get_config(config_files_artifact_name, ip_addr, http_port_num): def get_config(config_files_artifact_name, ip_addr, http_port_num):
......
...@@ -4,6 +4,14 @@ shared_utils = import_module( ...@@ -4,6 +4,14 @@ shared_utils = import_module(
SERVICE_NAME = "prometheus" SERVICE_NAME = "prometheus"
EXECUTION_CLIENT_TYPE = "execution"
BEACON_CLIENT_TYPE = "beacon"
VALIDATOR_CLIENT_TYPE = "validator"
METRICS_INFO_NAME_KEY = "name"
METRICS_INFO_URL_KEY = "url"
METRICS_INFO_PATH_KEY = "path"
# TODO(old) I'm not sure if we should use latest version or ping an specific version instead # TODO(old) I'm not sure if we should use latest version or ping an specific version instead
IMAGE_NAME = "prom/prometheus:latest" IMAGE_NAME = "prom/prometheus:latest"
...@@ -22,17 +30,18 @@ USED_PORTS = { ...@@ -22,17 +30,18 @@ USED_PORTS = {
} }
def launch_prometheus(plan, config_template, cl_client_contexts, el_client_contexts): def launch_prometheus(
all_nodes_metrics_info = [] plan,
for client in cl_client_contexts: config_template,
all_nodes_metrics_info.extend(client.cl_nodes_metrics_info) el_client_contexts,
cl_client_contexts,
for client in el_client_contexts: additional_metrics_jobs,
# etheruemjs doesn't populate metrics just yet ):
if client.el_metrics_info != [None]: template_data = new_config_template_data(
all_nodes_metrics_info.extend(client.el_metrics_info) el_client_contexts,
cl_client_contexts,
template_data = new_config_template_data(all_nodes_metrics_info) additional_metrics_jobs,
)
template_and_data = shared_utils.new_template_and_data( template_and_data = shared_utils.new_template_and_data(
config_template, template_data config_template, template_data
) )
...@@ -75,5 +84,87 @@ def get_config(config_files_artifact_name): ...@@ -75,5 +84,87 @@ def get_config(config_files_artifact_name):
) )
def new_config_template_data(cl_nodes_metrics_info): def new_config_template_data(
return {"CLNodesMetricsInfo": cl_nodes_metrics_info} el_client_contexts,
cl_client_contexts,
additional_metrics_jobs,
):
metrics_jobs = []
# Adding execution clients metrics jobs
for context in el_client_contexts:
if len(context.el_metrics_info) >= 1 and context.el_metrics_info[0] != None:
execution_metrics_info = context.el_metrics_info[0]
metrics_jobs.append(
new_metrics_job(
job_name=execution_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=execution_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=execution_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.service_name,
"client_type": EXECUTION_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
# Adding consensus clients metrics jobs
for context in cl_client_contexts:
if (
len(context.cl_nodes_metrics_info) >= 1
and context.cl_nodes_metrics_info[0] != None
):
# Adding beacon node metrics
beacon_metrics_info = context.cl_nodes_metrics_info[0]
metrics_jobs.append(
new_metrics_job(
job_name=beacon_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=beacon_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=beacon_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.beacon_service_name,
"client_type": BEACON_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
if (
len(context.cl_nodes_metrics_info) >= 2
and context.cl_nodes_metrics_info[1] != None
):
# Adding validator node metrics
validator_metrics_info = context.cl_nodes_metrics_info[1]
metrics_jobs.append(
new_metrics_job(
job_name=validator_metrics_info[METRICS_INFO_NAME_KEY],
endpoint=validator_metrics_info[METRICS_INFO_URL_KEY],
metrics_path=validator_metrics_info[METRICS_INFO_PATH_KEY],
labels={
"service": context.validator_service_name,
"client_type": VALIDATOR_CLIENT_TYPE,
"client_name": context.client_name,
},
)
)
# Adding additional metrics jobs
for job in additional_metrics_jobs:
if job == None:
continue
metrics_jobs.append(job)
return {
"MetricsJobs": metrics_jobs,
}
def new_metrics_job(
job_name,
endpoint,
metrics_path,
labels,
scrape_interval="15s",
):
return {
"Name": job_name,
"Endpoint": endpoint,
"MetricsPath": metrics_path,
"Labels": labels,
"ScrapeInterval": scrape_interval,
}
global: global:
scrape_interval: 15s # By default, scrape targets every 15 seconds. scrape_interval: 15s
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs: scrape_configs:
{{ range $clNode := .CLNodesMetricsInfo }} {{- range $job := .MetricsJobs }}
- job_name: '{{ $clNode.name }}' - job_name: "{{ $job.Name }}"
metrics_path: {{ $clNode.path }} metrics_path: "{{ $job.MetricsPath }}"
static_configs: {{- if $job.ScrapeInterval }}
- targets: ['{{ $clNode.url }}'] scrape_interval: {{ $job.ScrapeInterval }}
{{ end }} {{- end }}
- job_name: 'beacon-metrics-gazer' static_configs:
metrics_path: '/metrics' - targets: ['{{ $job.Endpoint }}']
static_configs: labels:{{ range $labelName, $labelValue := $job.Labels }}
- targets: ['beacon-metrics-gazer:8080'] {{ $labelName }}: "{{ $labelValue }}"
{{- end }}
{{- end }}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment