diff --git a/ansible/roles/prometheus/tasks/main.yml b/ansible/roles/prometheus/tasks/main.yml index c1c9453..d6a1b7c 100644 --- a/ansible/roles/prometheus/tasks/main.yml +++ b/ansible/roles/prometheus/tasks/main.yml @@ -20,6 +20,14 @@ notify: - restart prometheus +- name: Add alertmanager configuration + tags: alertmanager-configure + ansible.builtin.template: + src: alertmanager.yml.j2 + dest: '{{ alertmanager_config }}' + notify: + - restart alertmanager + - name: Create folder for rules definition ansible.builtin.file: path: /etc/prometheus/rules diff --git a/ansible/roles/prometheus/templates/alertmanager.yml.j2 b/ansible/roles/prometheus/templates/alertmanager.yml.j2 new file mode 100644 index 0000000..7934f50 --- /dev/null +++ b/ansible/roles/prometheus/templates/alertmanager.yml.j2 @@ -0,0 +1,66 @@ +{{ ansible_managed | comment }} +# See https://prometheus.io/docs/alerting/configuration/ for documentation. + +global: + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: 'mail.labolyon.fr:587' + smtp_from: 'alerts@labolyon.fr' + smtp_auth_username: 'alerts@labolyon.fr' + smtp_auth_password: {{ lookup('community.general.passwordstore', 'monitoring/e-mail/alerts@labolyon.fr')}} + +# The directory from which notification templates are read. +templates: +- '/etc/prometheus/alertmanager_templates/*.tmpl' + +# The root route on which each incoming alert enters. +route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + # + # To aggregate by all possible labels use '...' as the sole label name. + # This effectively disables aggregation entirely, passing through all + # alerts as-is. This is unlikely to be what you want, unless you have + # a very low alert volume or your upstream notification system performs + # its own grouping. Example: group_by: [...] + group_by: ['alertname', 'cluster', 'service'] + + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 3h + + # A default receiver + receiver: all-admins-email + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + +# Inhibition rules allow to mute a set of alerts given that another alert is +# firing. +# We use this to mute any warning-level notifications if the same alert is +# already critical. +inhibit_rules: + - source_matchers: [severity="critical"] + target_matchers: [severity="warning"] + # Apply inhibition if the alertname is the same. + # CAUTION: + # If all label names listed in `equal` are missing + # from both the source and target alerts, + # the inhibition rule will apply! + equal: [alertname, cluster, service] + +receivers: + - name: 'all-admins-email' + email_configs: + - to: 'mirsal@mirsal.fr'