diff options
author | Rebeccah Hunter <rhunter@redhat.com> | 2022-10-12 20:08:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-12 20:08:17 +0200 |
commit | 50614b961ea4868ba4c437f69ba511302f46acfd (patch) | |
tree | efa615c5592070e7e0231515f13bacf7cc665aff | |
parent | Merge pull request #12974 from kialam/new-health-check-started (diff) | |
parent | Updates to Grafana Dashboard and example alerts (diff) | |
download | awx-50614b961ea4868ba4c437f69ba511302f46acfd.tar.xz awx-50614b961ea4868ba4c437f69ba511302f46acfd.zip |
Merge pull request #13001 from kdelee/moooore-dashboard
Moooore 🐮 dashboard
-rw-r--r-- | tools/docker-compose/ansible/roles/sources/templates/docker-compose.yml.j2 | 2 | ||||
-rw-r--r-- | tools/grafana/README.md | 11 | ||||
-rw-r--r-- | tools/grafana/alerting/alerts.yml | 301 | ||||
-rw-r--r-- | tools/grafana/dashboards/demo_dashboard.json | 2114 |
4 files changed, 1463 insertions, 965 deletions
diff --git a/tools/docker-compose/ansible/roles/sources/templates/docker-compose.yml.j2 b/tools/docker-compose/ansible/roles/sources/templates/docker-compose.yml.j2 index db4988b207..ee2e79f990 100644 --- a/tools/docker-compose/ansible/roles/sources/templates/docker-compose.yml.j2 +++ b/tools/docker-compose/ansible/roles/sources/templates/docker-compose.yml.j2 @@ -180,7 +180,7 @@ services: image: postgres:12 container_name: tools_postgres_1 # additional logging settings for postgres can be found https://www.postgresql.org/docs/current/runtime-config-logging.html - command: postgres -c log_destination=stderr -c log_min_messages=info -c log_min_duration_statement={{ pg_log_min_duration_statement|default(1000) }} + command: postgres -c log_destination=stderr -c log_min_messages=info -c log_min_duration_statement={{ pg_log_min_duration_statement|default(1000) }} -c max_connections={{ pg_max_connections|default(1024) }} environment: POSTGRES_HOST_AUTH_METHOD: trust POSTGRES_USER: {{ pg_username }} diff --git a/tools/grafana/README.md b/tools/grafana/README.md index f6e17c256b..a2fc38e69c 100644 --- a/tools/grafana/README.md +++ b/tools/grafana/README.md @@ -36,9 +36,18 @@ GRAFANA=true PROMETHEUS=true EXTRA_SOURCES_ANSIBLE_OPTS="-e scrape_interval=1 ad We are configuring alerts in grafana using the provisioning files method. This feature is new in Grafana as of August, 2022. Documentation can be found: https://grafana.com/docs/grafana/latest/administration/provisioning/#alerting however it does not fully show all parameters to the config. -One way to understand how to build rules is to build them in the UI and use chrometools to inspect the payload as you save the rules. It appears that the "data" portion of the payload for each rule is the same syntax as needed in the provisioning file config. To reload the alerts without restarting the container, from within the container you can send a POST with `curl -X POST http://admin:admin@localhost:3000/api/admin/provisioning/alerting/reload`. Keep in mind the grafana container does not contain `curl`. You can install it with the command `apk add curl`. +One way to understand how to build rules is to build them in the UI and use chrometools to inspect the payload as you save the rules. It appears that the "data" portion of the payload for each rule is the same syntax as needed in the provisioning file config. To reload the alerts without restarting the container, from your terminal you can send a POST with `curl -X POST http://admin:admin@localhost:3001/api/admin/provisioning/alerting/reload`. Another way to export rules is explore the api. 1. Get all the folders: `GET` to `/api/folders` 2. Get the rules `GET` to `/api/ruler/grafana/api/v1/rules/{{ Folder }}` +You can do this via curl or in the web browser. + +### Included Alerts + +#### Alert if remaining capacity low and pending jobs exist + +We want to know if jobs are in pending but we lack capacity in the cluster to run them. Our approach is to sum all remaining capacity in the cluster and compare it to the total capacity of the cluster. If less than 10% of our capacity is remaining and we have pending jobs, and this is true for more than 180s, we will fire the alert. + +This alert is named "capacity_below_10_percent" and can be found in this directory in https://github.com/ansible/awx/blob/devel/tools/grafana/alerting/alerts.yml diff --git a/tools/grafana/alerting/alerts.yml b/tools/grafana/alerting/alerts.yml index a6679e2989..d3e0f32189 100644 --- a/tools/grafana/alerting/alerts.yml +++ b/tools/grafana/alerting/alerts.yml @@ -2,15 +2,21 @@ apiVersion: 1 groups: - folder: awx - interval: 60s + interval: 10s name: awx_rules orgId: 1 + exec_err_state: Alerting + no_data_state: NoData rules: - - condition: if_failures_too_high - dashboardUid: awx + - for: 5m + noDataState: OK + panelId: 2 + title: failure_rate_exceeded_20_percent + uid: failure_rate_exceeded_20_percent + condition: compare data: - refId: total_errors - queryType: '' + queryType: "" relativeTimeRange: from: 600 to: 0 @@ -19,7 +25,7 @@ groups: editorMode: code expr: >- max(delta(awx_instance_status_total{instance="awx1:8013", - status="failed|error"}[30m])) + status=~"failed|error"}[30m])) hide: false intervalMs: 1000 legendFormat: __auto @@ -27,11 +33,11 @@ groups: range: true refId: total_errors - refId: max_errors - queryType: '' + queryType: "" relativeTimeRange: from: 0 to: 0 - datasourceUid: '-100' + datasourceUid: "-100" model: conditions: - evaluator: @@ -60,7 +66,7 @@ groups: refId: max_errors type: reduce - refId: total_success - queryType: '' + queryType: "" relativeTimeRange: from: 600 to: 0 @@ -80,11 +86,11 @@ groups: range: true refId: total_success - refId: max_success - queryType: '' + queryType: "" relativeTimeRange: from: 0 to: 0 - datasourceUid: '-100' + datasourceUid: "-100" model: conditions: - evaluator: @@ -113,11 +119,11 @@ groups: refId: max_success type: reduce - refId: compare - queryType: '' + queryType: "" relativeTimeRange: from: 0 to: 0 - datasourceUid: '-100' + datasourceUid: "-100" model: conditions: - evaluator: @@ -158,15 +164,19 @@ groups: maxDataPoints: 43200 refId: compare type: math - for: 30m + - for: 60s noDataState: OK - panelId: 2 - title: failure_rate_exceeded_20_percent - uid: failure_rate_exceeded_20_percent - - condition: if_redis_queue_too_large + panelId: 1 + title: redis_queue_too_large_to_clear_in_2_min + uid: redis_queue_too_large_to_clear_in_2_min + condition: redis_queue_growing_faster_than_insertion_rate dashboardUid: awx data: - - datasourceUid: awx_prometheus + - refId: events_insertion_rate_per_second + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: awx_prometheus model: editorMode: code expr: irate(callback_receiver_events_insert_db{node='awx_1'}[1m]) @@ -177,11 +187,11 @@ groups: range: true refId: events_insertion_rate_per_second queryType: "" - refId: events_insertion_rate_per_second + - refId: mean_event_insertion_rate relativeTimeRange: - from: 300 + from: 0 to: 0 - - datasourceUid: -100 + datasourceUid: -100 model: conditions: - evaluator: @@ -208,11 +218,11 @@ groups: refId: mean_event_insertion_rate type: reduce queryType: "" - refId: mean_event_insertion_rate + - refId: redis_queue_size relativeTimeRange: - from: 0 + from: 300 to: 0 - - datasourceUid: awx_prometheus + datasourceUid: awx_prometheus model: datasource: type: prometheus @@ -226,11 +236,11 @@ groups: range: true refId: redis_queue_size queryType: "" - refId: redis_queue_size + - refId: last_redis_queue_size relativeTimeRange: - from: 300 - to: 0 - - datasourceUid: -100 + from: 0 + to: 0 + datasourceUid: -100 model: conditions: - evaluator: @@ -257,11 +267,12 @@ groups: refId: last_redis_queue_size type: reduce queryType: "" - refId: last_redis_queue_size + - refId: redis_queue_growing_faster_than_insertion_rate + queryType: "" relativeTimeRange: from: 0 to: 0 - - datasourceUid: -100 + datasourceUid: -100 model: conditions: - evaluator: @@ -282,44 +293,167 @@ groups: name: Expression type: __expr__ uid: __expr__ - expression: '($last_redis_queue_size > ($mean_event_insertion_rate * 120))' + expression: "($last_redis_queue_size > ($mean_event_insertion_rate * 120))" hide: false intervalMs: 1000 maxDataPoints: 43200 - refId: redis_queue_growing_faster_than_insertion_rate type: math + - for: 60s + noDataState: OK + panelId: 3 + uid: capacity_below_10_percent + title: capacity_below_10_percent + condition: pending_jobs_and_capacity_compare + data: + - refId: remaining_capacity + queryType: "" + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: awx_prometheus + model: + editorMode: code + expr: sum(awx_instance_remaining_capacity) + hide: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: remaining_capacity + - refId: last_remaining_capacity queryType: "" - refId: redis_queue_growing_faster_than_insertion_rate relativeTimeRange: from: 0 to: 0 - for: 60s - noDataState: OK - panelId: 1 - title: redis_queue_too_large_to_clear_in_2_min - uid: redis_queue_too_large_to_clear_in_2_min - - condition: if_capacity_is_too_low - dashboardUid: awx - no_data_state: OK - exec_err_state: Error - data: - - refId: remaining_capacity - queryType: '' + datasourceUid: "-100" + model: + conditions: + - evaluator: + params: + - 3 + type: outside_range + operator: + type: and + query: + params: + - total_capacity + reducer: + params: [] + type: percent_diff + type: query + datasource: + type: __expr__ + uid: "-100" + expression: remaining_capacity + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: last_remaining_capacity + type: reduce + - refId: total_capacity + queryType: "" + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: awx_prometheus + model: + datasource: + type: prometheus + uid: awx_prometheus + editorMode: code + expr: sum(awx_instance_capacity{instance="awx1:8013"}) + hide: false + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: true + refId: total_capacity + - refId: last_total_capacity + queryType: "" + relativeTimeRange: + from: 0 + to: 0 + datasourceUid: "-100" + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: + - capacity_below_10% + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: total_capacity + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: last_total_capacity + type: reduce + - refId: 10_percent_total_capacity + queryType: "" relativeTimeRange: - from: 1800 + from: 0 + to: 0 + datasourceUid: "-100" + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: + - last_total_capacity + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: "$last_total_capacity*.10" + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + refId: 10_percent_total_capacity + type: math + - refId: pending_jobs + queryType: "" + relativeTimeRange: + from: 600 to: 0 datasourceUid: awx_prometheus model: + datasource: + type: prometheus + uid: awx_prometheus editorMode: builder - expr: awx_instance_remaining_capacity{instance="awx1:8013"} + expr: awx_pending_jobs_total{instance="awx1:8013"} hide: false intervalMs: 1000 legendFormat: __auto maxDataPoints: 43200 range: true - refId: remaining_capacity - - refId: if_capacity_is_too_low - queryType: '' + refId: pending_jobs + - refId: last_pending_jobs + queryType: "" relativeTimeRange: from: 0 to: 0 @@ -328,14 +462,14 @@ groups: conditions: - evaluator: params: - - 20 - 0 - type: lt + - 0 + type: gt operator: - type: when + type: and query: params: - - remaining_capacity + - pending_jobs_and_capacity_compare reducer: params: [] type: avg @@ -344,12 +478,59 @@ groups: name: Expression type: __expr__ uid: __expr__ - expression: remaining_capacity + expression: pending_jobs + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: last_pending_jobs + type: reduce + - refId: pending_jobs_and_capacity_compare + queryType: "" + relativeTimeRange: + from: 0 + to: 0 + datasourceUid: "-100" + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: + - 10_percent_total_capacity + reducer: + params: [] + type: last + type: query + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: + - pending_jobs + reducer: + params: [] + type: last + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: + "($10_percent_total_capacity > $last_remaining_capacity) && $last_pending_jobs + > 1" hide: false intervalMs: 1000 maxDataPoints: 43200 - refId: if_capacity_is_too_low - type: classic_conditions - for: 30m - title: if_capacity_is_too_low - uid: if_capacity_is_too_low + reducer: mean + refId: pending_jobs_and_capacity_compare + type: math diff --git a/tools/grafana/dashboards/demo_dashboard.json b/tools/grafana/dashboards/demo_dashboard.json index 3589794499..f654bc8e6f 100644 --- a/tools/grafana/dashboards/demo_dashboard.json +++ b/tools/grafana/dashboards/demo_dashboard.json @@ -29,6 +29,19 @@ "liveNow": false, "panels": [ { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 37, + "panels": [], + "title": "System", + "type": "row" + }, + { "datasource": { "type": "prometheus", "uid": "awx_prometheus" @@ -73,7 +86,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -82,61 +96,15 @@ ] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "semi-dark-red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "running" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "canceled" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "super-light-yellow", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 1 }, - "id": 8, + "id": 14, "options": { "legend": { "calcs": [], @@ -155,106 +123,90 @@ "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "code", - "expr": "awx_status_total", - "legendFormat": "{{status}}", + "editorMode": "builder", + "expr": "awx_database_connections_total", + "legendFormat": "__auto", "range": true, "refId": "A" } ], - "title": "job status", + "title": "Database", "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" - }, + "datasource": {}, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - }, - "mappings": [], + ], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "light-blue", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 6, - "w": 12, + "h": 4, + "w": 5, "x": 12, - "y": 0 + "y": 1 }, - "id": 27, + "id": 25, + "links": [], + "maxDataPoints": 100, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^tower_version$/", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "auto" }, + "pluginVersion": "9.1.6", "targets": [ { "datasource": { "type": "prometheus", - "uid": "awx_prometheus" + "uid": "000000021" }, - "editorMode": "builder", - "expr": "awx_instance_remaining_capacity", - "legendFormat": "remaining_capacity_{{hostname}}", - "range": true, + "editorMode": "code", + "exemplar": false, + "expr": "awx_system_info", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", "refId": "A" } ], - "title": "Remaining Instance Capacity", - "type": "timeseries" + "title": "Controller Version", + "type": "stat" }, { "datasource": { @@ -263,49 +215,14 @@ }, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, + "displayName": "Instances", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" - }, - { - "color": "red", - "value": 80 + "color": "light-blue", + "value": null } ] } @@ -313,886 +230,1277 @@ "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 6 + "h": 4, + "w": 5, + "x": 12, + "y": 5 }, - "id": 20, + "id": 13, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "auto" }, + "pluginVersion": "9.1.6", "targets": [ { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "builder", - "expr": "awx_instance_consumed_capacity", - "legendFormat": "__auto", + "editorMode": "code", + "expr": "count(awx_instance_info)", + "interval": "", + "legendFormat": " ", "range": true, "refId": "A" } ], - "title": "Consumed Instance Capacity", - "type": "timeseries" + "title": "Controller Node Count", + "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 32, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "id": 35, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "{instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - { - "id": "displayName", - "value": "Job Events Processed/Minute" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } - ] - }, - { - "matcher": { - "id": "byName", - "options": "{__name__=\"callback_receiver_events_queue_size_redis\", instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}" }, - "properties": [ + "overrides": [ { - "id": "color", - "value": { - "fixedColor": "semi-dark-red", - "mode": "fixed" - } + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-green", + "mode": "fixed" + } + } + ] }, { - "id": "displayName", - "value": "Redis Queue Size" + "matcher": { + "id": "byName", + "options": "canceled" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] } ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 24, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" }, - "editorMode": "builder", - "expr": "callback_receiver_events_queue_size_redis", - "legendFormat": "__auto", - "range": true, - "refId": "A" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "code", + "expr": "awx_status_total", + "legendFormat": "{{status}}", + "range": true, + "refId": "A" + } + ], + "title": "job status", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "code", - "expr": "irate(callback_receiver_events_insert_db[1m])*60", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "B" - } - ], - "title": "Redis Queue Size vs. Job Events Processed/Minute", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ + "overrides": [ { - "color": "green" + "matcher": { + "id": "byName", + "options": "awx_1-error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] }, { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "task_manager_pending_processed{instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}", - "task_manager_running_processed{instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}", - "task_manager_tasks_started{instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ + "matcher": { + "id": "byName", + "options": "awx_1-successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-purple", + "mode": "fixed" + } + } + ] + }, { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } + "matcher": { + "id": "byName", + "options": "awx_3-failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-orange", + "mode": "fixed" + } + } + ] } ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 14 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" }, - "expr": "task_manager_running_processed", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 }, - "expr": "task_manager_pending_processed", - "hide": false, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "expr": "task_manager_tasks_blocked", - "hide": false, - "refId": "D" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "builder", + "expr": "awx_instance_status_total", + "legendFormat": "{{node}}-{{status}}", + "range": true, + "refId": "A" + } + ], + "title": "Job Status per Instance", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "expr": "task_manager_tasks_started", - "hide": false, - "refId": "C" - } - ], - "title": "Task manager workload", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 18, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 6, - "scaleDistribution": { - "type": "linear" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "{__name__=\"callback_receiver_event_processing_avg_seconds\", instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}" + "editorMode": "builder", + "expr": "dependency_manager__schedule_seconds", + "legendFormat": "__auto", + "range": true, + "refId": "A" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" }, - { - "id": "displayName", - "value": "Avg Job Event Processing Lag Time" - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 14 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "editorMode": "builder", + "expr": "dependency_manager_generate_dependencies_seconds", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "builder", + "expr": "dependency_manager_get_tasks_seconds", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "hide": false, + "refId": "D" + } + ], + "title": "Dependency Manager Timings", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "builder", - "expr": "callback_receiver_event_processing_avg_seconds", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Avg Job Event Processing Lag Time", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 22 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "editorMode": "builder", + "expr": "workflow_manager__schedule_seconds", + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "hide": false, + "refId": "B" + } + ], + "title": "Workflow Manager Timings", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "builder", - "expr": "task_manager_process_pending_tasks_seconds", - "legendFormat": "__auto", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] }, - "editorMode": "builder", - "expr": "task_manager_process_running_tasks_seconds", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 26 }, - "editorMode": "builder", - "expr": "task_manager_get_tasks_seconds", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "D" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "editorMode": "builder", - "expr": "task_manager_commit_seconds", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "C" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "builder", + "expr": "awx_instance_remaining_capacity", + "legendFormat": "remaining_capacity_{{hostname}}", + "range": true, + "refId": "A" + } + ], + "title": "Remaining Instance Capacity", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "builder", - "expr": "task_manager__schedule_seconds", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "E" - } - ], - "title": "Task manager timings", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 22 - }, - "id": 16, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "editorMode": "builder", + "expr": "awx_instance_consumed_capacity", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Consumed Instance Capacity", + "type": "timeseries" } + ], + "title": "Jobs and Capacity", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 }, - "targets": [ + "id": 33, + "panels": [ { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "builder", - "expr": "dependency_manager__schedule_seconds", - "legendFormat": "__auto", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "task_manager_pending_processed{instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}", + "task_manager_running_processed{instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}", + "task_manager_tasks_started{instance=\"awx1:8013\", job=\"awx\", node=\"awx_1\"}" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] }, - "editorMode": "builder", - "expr": "dependency_manager_generate_dependencies_seconds", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 }, - "editorMode": "builder", - "expr": "dependency_manager_get_tasks_seconds", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "C" + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "expr": "task_manager_running_processed", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "expr": "task_manager_pending_processed", + "hide": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "expr": "task_manager_tasks_blocked", + "hide": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "expr": "task_manager_tasks_started", + "hide": false, + "refId": "C" + } + ], + "title": "Task manager workload", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "hide": false, - "refId": "D" - } - ], - "title": "Dependency Manager Timings", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, + "editorMode": "builder", + "expr": "task_manager_process_pending_tasks_seconds", + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "builder", + "expr": "task_manager_process_running_tasks_seconds", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "builder", + "expr": "task_manager_get_tasks_seconds", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "builder", + "expr": "task_manager_commit_seconds", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "builder", + "expr": "task_manager__schedule_seconds", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "E" + } + ], + "title": "Task manager timings", + "type": "timeseries" + } + ], + "title": "Task Manager", + "type": "row" + }, + { + "collapsed": true, "gridPos": { - "h": 8, - "w": 12, + "h": 1, + "w": 24, "x": 0, - "y": 30 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 11 }, - "targets": [ + "id": 31, + "panels": [ { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "builder", - "expr": "awx_database_connections_total", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Database", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 18, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 6, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 30 - }, - "id": 18, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "editorMode": "builder", + "expr": "callback_receiver_event_processing_avg_seconds", + "legendFormat": "{{node}}-processing-avg-seconds", + "range": true, + "refId": "A" + } + ], + "title": "Avg Job Event Processing Lag Time", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "awx_prometheus" }, - "editorMode": "builder", - "expr": "workflow_manager__schedule_seconds", - "legendFormat": "__auto", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "awx_prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 31, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "awx_2-job-event-processing-rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "awx_1-redis-queue" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "hide": false, - "refId": "B" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "builder", + "expr": "callback_receiver_events_queue_size_redis", + "legendFormat": "{{node}}-redis-queue", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "awx_prometheus" + }, + "editorMode": "code", + "expr": "irate(callback_receiver_events_insert_db[1m])*60", + "hide": false, + "legendFormat": "{{node}}-job-event-processing-rate", + "range": true, + "refId": "B" + } + ], + "title": "Redis Queue Size vs. Job Events Processed/Minute", + "type": "timeseries" } ], - "title": "Workflow Manager Timings", - "type": "timeseries" + "title": "Job Event Processing", + "type": "row" } ], "refresh": "5s", @@ -1210,6 +1518,6 @@ "timezone": "", "title": "awx-demo", "uid": "GISWZOXnk", - "version": 2, + "version": 12, "weekStart": "" } |