Setup
Используется версия 1.80.0 - https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.80.0
https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.80.0/victoria-metrics-linux-amd64-v1.80.0-cluster.tar.gz
https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.80.0/vmutils-linux-amd64-v1.80.0.tar.gz
vmalert придется использовать версии enterprise
Указанные выше архивы содержат бинари с приложениями
Все приложения из обоих архивов распакованы в /usr/local/bin/*
Из имен бинарей убрано окончание -prod
: vmstorage-prod -> vmstorage
root@mon-vm-01:/usr/local/bin# ls -1
vmagent-prod
vmalert-prod
vmauth-prod
vmbackup-prod
vmctl-prod
vminsert-prod
vmrestore-prod
vmselect-prod
vmstorage-prod
root@mon-vm-01:/usr/local/bin# ls -1 | cut -d'-' -f1 | xargs -I{} mv {}-prod {}
root@mon-vm-01:/usr/local/bin# ls -1
vmagent
vmalert
vmauth
vmbackup
vmctl
vminsert
vmrestore
vmselect
vmstorage
Создан пользователь victoriametrics
с группой victoriametrics
useradd -d /nonexistent -M -r -s /usr/sbin/nologin -U victoriametrics
systemd service взят отсюда - https://github.com/VictoriaMetrics/ansible-playbooks/blob/master/roles/vmagent/templates/vmagent.service.j2
Вот такой шаблон для списка сервисов:
- vmagent
- vmalert
- vmauth
- vminsert
- vmselect
- vmstorage
[Unit]
Description=VictoriaMetrics ${app}
After=network.target
[Service]
Type=simple
User=victoriametrics
Group=victoriametrics
EnvironmentFile=/etc/${app}/${app}.conf
ExecStart=/usr/local/bin/${app} -envflag.enable
ExecReload=/bin/kill -SIGHUP $MAINPID
SyslogIdentifier=${app}
Restart=always
PrivateTmp=yes
ProtectHome=yes
NoNewPrivileges=yes
ProtectSystem=full
ProtectControlGroups=true
ProtectKernelModules=true
ProtectKernelTunables=yes
[Install]
WantedBy=multi-user.target
Создать сервисы:
root@mon-vm-01:~# echo 'vmagent
vmalert
vmauth
vminsert
vmselect
vmstorage' | xargs -I{} sh -c "sed -e 's/\${app}/{}/g' app.service > {}.service"
root@mon-vm-01:~# cat vmagent.service
[Unit]
Description=VictoriaMetrics vmagent
After=network.target
[Service]
Type=simple
User=victoriametrics
Group=victoriametrics
EnvironmentFile=/etc/vmagent/vmagent.conf
ExecStart=/usr/local/bin/vmagent -envflag.enable
SyslogIdentifier=vmagent
Restart=always
PrivateTmp=yes
ProtectHome=yes
NoNewPrivileges=yes
ProtectSystem=full
ProtectControlGroups=true
ProtectKernelModules=true
ProtectKernelTunables=yes
[Install]
WantedBy=multi-user.target
root@mon-vm-01:~# mv !(app.service) /etc/systemd/system/.
root@mon-vm-01:~# ls
app.service
root@mon-vm-01:~# systemctl daemon-reload
root@mon-vm-01:~# systemctl status vmagent
● vmagent.service - Description=VictoriaMetrics vmagent
Loaded: loaded (/etc/systemd/system/vmagent.service; disabled; vendor preset: enabled)
Active: inactive (dead)
Заэнейблить сервисы:
root@mon-vm-01:~# echo 'vmagent
vmalert
vmauth
vminsert
vmselect
vmstorage' | xargs -I{} systemctl enable {}
Created symlink /etc/systemd/system/multi-user.target.wants/vmagent.service → /etc/systemd/system/vmagent.service.
Created symlink /etc/systemd/system/multi-user.target.wants/vmalert.service → /etc/systemd/system/vmalert.service.
Created symlink /etc/systemd/system/multi-user.target.wants/vmauth.service → /etc/systemd/system/vmauth.service.
Created symlink /etc/systemd/system/multi-user.target.wants/vminsert.service → /etc/systemd/system/vminsert.service.
Created symlink /etc/systemd/system/multi-user.target.wants/vmselect.service → /etc/systemd/system/vmselect.service.
Created symlink /etc/systemd/system/multi-user.target.wants/vmstorage.service → /etc/systemd/system/vmstorage.service.
Приложения конфигурируются через переменные окружения (https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#environment-variables)
В файле /etc/${app}/${app}.conf
лежит список переменных в формате:
insert_maxQueueDuration=<duration>
...
То есть это просто имя флага где точка заменена на подчеркивание
Создать папки для конфигов приложений:
root@mon-vm-01:~# echo 'vmagent
vmalert
vmauth
vminsert
vmselect
vmstorage' | xargs -I{} sh -c "mkdir /etc/{}; chmod 755 /etc/{}"
root@mon-vm-01:~# ls -1 /etc/ | grep vm
vmagent
vmalert
vmauth
vminsert
vmselect
vmstorage
vmalert взят из enterprise версии https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.80.0/vmutils-linux-amd64-v1.80.0-enterprise.tar.gz
Конфигурирование:
- vmstorage:
-storageDataPath
-/var/lib/vmstorage
mkdir /var/lib/vmstorage chown -R victoriametrics:victoriametrics /var/lib/vmstorage
- vmselect:
-cacheDataPath
-/var/lib/vmselect
node_exporter версии 1.3.1 - https://github.com/prometheus/node_exporter/releases/download/v1.3.1/node_exporter-1.3.1.linux-amd64.tar.gz
Пользователь:
useradd -d /nonexistent -M -r -s /usr/sbin/nologin -U node_exporter
systemd service:
[Unit]
Description=Node Exporter
After=network.target
[Service]
Type=simple
User=node_exporter
Group=node_exporter
ExecStart=/usr/local/bin/node_exporter
SyslogIdentifier=node_exporter
Restart=always
PrivateTmp=yes
ProtectHome=yes
NoNewPrivileges=yes
ProtectSystem=full
ProtectControlGroups=true
ProtectKernelModules=true
ProtectKernelTunables=yes
[Install]
WantedBy=multi-user.target
windows_exporter 0.19.0
https://github.com/prometheus-community/windows_exporter/releases/tag/v0.19.0
https://github.com/prometheus-community/windows_exporter
mon-proxy-01
аналогичным всем остальным утилитам способ установлен vmauth
Порты везде открываются через AWX
Для решения вот этой проблемы https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3061 используется nginx
Со следующей конфигурацией:
root@mon-proxy-01x:/etc/vmauth# cat /etc/nginx/sites-enabled/default
upstream vminsert {
server mon-vm-01.g01.i-free.ru:8480;
server mon-vm-02.g01.i-free.ru:8480;
server mon-vm-03.g01.i-free.ru:8480;
}
upstream vmselect {
server mon-vm-01.g01.i-free.ru:8481;
server mon-vm-02.g01.i-free.ru:8481;
server mon-vm-03.g01.i-free.ru:8481;
}
server {
listen 8428;
location /insert {
proxy_pass http://vminsert;
}
location /select {
proxy_pass http://vmselect;
}
}
Alertmanager version - 0.24.0
https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz
Бинари alertmanager и amtool кладутся в /usr/local/bin/.
Пользователь:
useradd -d /nonexistent -M -r -s /usr/sbin/nologin -U alertmanager
Создать папку для хранения стейтов алертов (что акнуто, что отправлено, итд):
mkdir /var/lib/alertmanager; chown -R alertmanager:alertmanager /var/lib/alertmanager
Папка для конфигов:
mkdir /etc/alertmanager; chown -R alertmanager:alertmanager /etc/alertmanager/
systemd service:
root@mon-vm-01:~# cat /etc/systemd/system/alertmanager.service
[Unit]
Description=Alertmanager
After=network.target
[Service]
Type=simple
User=alertmanager
Group=alertmanager
ExecStart=/usr/local/bin/alertmanager --config.file=/etc/alertmanager/main.yaml --storage.path=/var/lib/alertmanager/ --log.level=debug --cluster.peer=mon-vm-02.g01.i-free.ru:9094 --cluster.peer=mon-vm-03.g01.i-free.ru:9094
ExecReload=/bin/kill -SIGHUP $MAINPID
SyslogIdentifier=alertmanager
Restart=always
PrivateTmp=yes
ProtectHome=yes
NoNewPrivileges=yes
ProtectSystem=full
ProtectControlGroups=true
ProtectKernelModules=true
ProtectKernelTunables=yes
[Install]
WantedBy=multi-user.target
grafana
stable релиз по инструкции - https://grafana.com/docs/grafana/latest/setup-grafana/installation/debian/
apt-get install -y apt-transport-https; apt-get install -y software-properties-common wget; wget -q -O /usr/share/keyrings/grafana.key https://packages.grafana.com/gpg.key; echo "deb [signed-by=/usr/share/keyrings/grafana.key] https://packages.grafana.com/oss/deb stable main" | sudo tee -a /etc/apt/sources.list.d/grafana.list; apt-get update; apt-get install grafana
Prometheus-msteams version - 1.5.1 https://github.com/prometheus-msteams/prometheus-msteams/releases/download/v1.5.1/prometheus-msteams-linux-amd64
бинарь в /usr/local/bin/
юзер
useradd -d /nonexistent -M -r -s /usr/sbin/nologin -U prometheus-msteams
папка для конфигов
mkdir /etc/prometheus-msteams; chown -R prometheus-msteams:prometheus-msteams /etc/prometheus-msteams
конфиг и шаблон для алерта
vim /etc/prometheus-msteams/config.yml
connectors:
- alert_channel: "WEBHOOK"
vim /etc/prometheus-msteams/card.tmpl
{{ define "teams.card" }}
{
"@type": "MessageCard",
"@context": "http://schema.org/extensions",
"themeColor": "{{- if eq .Status "resolved" -}}2DC72D
{{- else if eq .Status "firing" -}}
{{- if eq .CommonLabels.severity "critical" -}}8C1A1A
{{- else if eq .CommonLabels.severity "warning" -}}FFA500
{{- else -}}808080{{- end -}}
{{- else -}}808080{{- end -}}",
"summary": "VictoriaMetrics Alerts",
"title": "VictoriaMetrics ({{ .Status }})",
"sections": [ {{$externalUrl := .ExternalURL}}
{{- range $index, $alert := .Alerts }}{{- if $index }},{{- end }}
{
"facts": [
{{- range $key, $value := $alert.Annotations }}
{
"name": "{{ reReplaceAll "_" "\\\\_" $key }}",
"value": "{{ reReplaceAll "_" "\\\\_" $value }}"
},
{{- end -}}
{{$c := counter}}{{ range $key, $value := $alert.Labels }}{{if call $c}},{{ end }}
{
"name": "{{ reReplaceAll "_" "\\\\_" $key }}",
"value": "{{ reReplaceAll "_" "\\\\_" $value }}"
}
{{- end }}
],
"markdown": true
}
{{- end }}
]
}
{{ end }}
создаем юнит
vim /etc/systemd/system/prometheus-msteams.service
[Unit]
Description=Prometheus-msteams
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus-msteams
Group=prometheus-msteams
Type=simple
ExecStart=/usr/local/bin/prometheus-msteams -config-file /etc/prometheus-msteams/config.yml -template-file /etc/prometheus-msteams/card.tmpl
[Install]
WantedBy=multi-user.target
systemctl daemon-reload
systemctl start prometheus-msteams.service
systemctl enable prometheus-msteams.service
OpenResty установлен по этой статье https://www.installing.in/how-to-install-openresty-on-debian-10/
Изменения в systemd сервисе
root@mon-proxy-01y:/usr/local/openresty# diff /tmp/openresty.service.old /lib/systemd/system/openresty.service
21,24c21,24
< ExecStartPre=/usr/local/openresty/nginx/sbin/nginx -t -q -g 'daemon on; master_process on;'
< ExecStart=/usr/local/openresty/nginx/sbin/nginx -g 'daemon on; master_process on;'
< ExecReload=/usr/local/openresty/nginx/sbin/nginx -g 'daemon on; master_process on;' -s reload
< ExecStop=-/sbin/start-stop-daemon --quiet --stop --retry QUIT/5 --pidfile /usr/local/openresty/nginx/logs/nginx.pid
---
> ExecStartPre=/usr/local/openresty/nginx/sbin/nginx -t -q -g 'daemon on; master_process on;' -c /etc/nginx/nginx.conf
> ExecStart=/usr/local/openresty/nginx/sbin/nginx -g 'daemon on; master_process on;' -c /etc/nginx/nginx.conf
> ExecReload=/usr/local/openresty/nginx/sbin/nginx -g 'daemon on; master_process on;' -s reload -c /etc/nginx/nginx.conf
> ExecStop=-/sbin/start-stop-daemon --quiet --stop --retry QUIT/5 --pidfile /usr/local/openresty/nginx/logs/nginx.pid -c /etc/nginx/nginx.conf
Так же нужно перенести дефолтный конфиг openresty в /etc/nginx:
root@mon-proxy-01y:/etc/nginx# rm -rf !(sites-enabled) ^C
root@mon-proxy-01y:/etc/nginx# cp -R /usr/local/openresty/nginx/conf/. /etc/nginx/. ^C
И привести секцию http в nginx.conf к следующему виду:
http {
include mime.types;
default_type application/octet-stream;
#log_format main '$remote_addr - $remote_user [$time_local] "$request" '
# '$status $body_bytes_sent "$http_referer" '
# '"$http_user_agent" "$http_x_forwarded_for"';
#access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
include /etc/nginx/sites-enabled/*;
}
apt install libjq-dev
Вместо alertmanager'a используется cortex-alertmanager (потому что у нем есть multitenancy)
Взята версия Cortex 1.13.0
https://github.com/cortexproject/cortex/releases/download/v1.13.0/cortex-linux-amd64
Директория для конфигов - /etc/cortex-alertmanager
root@mon-vm-01:/etc/cortex-alertmanager# tree
.
├── configs
│ └── 1.yml
├── fallback.yaml
└── main.yaml
1 directory, 3 files
root@mon-vm-01:/etc/cortex-alertmanager# cat main.yaml
target: alertmanager
server:
http_listen_port: 9093
alertmanager:
external_url: http://mon-vm-01.test.i-free.ru:9093/alertmanager
data_dir: /var/lib/cortex-alertmanager
retention: 120h
poll_interval: 15s
sharding_enabled: false
fallback_config_file: /etc/cortex-alertmanager/fallback.yaml
# [auto_webhook_root: <string> | default = ""]
cluster:
listen_address: "0.0.0.0:9094"
advertise_address: ""
peers: "mon-vm-01.test.i-free.ru:9094,mon-vm-02.test.i-free.ru:9094,mon-vm-03.test.i-free.ru:9094"
peer_timeout: 15s
gossip_interval: 200ms
push_pull_interval: 1m
storage:
type: local
local:
path: /etc/cortex-alertmanager/configs
wget https://github.com/cortexproject/cortex/releases/download/v1.13.0/cortex-linux-amd64 -O /usr/local/bin/cortex-alertmanager
chmod 755 /usr/local/bin/cortex-alertmanager
useradd -d /nonexistent -M -r -s /usr/sbin/nologin -U cortex-alertmanager
systemd unit:
cat <<EOF > /etc/systemd/system/cortex-alertmanager.service
[Unit]
Description=Horizontally scalable, highly available, multi-tenant, long term Prometheus.
Documentation=https://cortexmetrics.io/docs
Wants=network-online.target
After=network-online.target
[Service]
Restart=always
User=cortex-alertmanager
ExecStart=/usr/local/bin/cortex-alertmanager --config.file /etc/cortex-alertmanager/main.yaml
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
SendSIGKILL=no
WorkingDirectory=/var/lib/cortex-alertmanager
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
mkdir /var/lib/cortex-alertmanager; chown -R cortex-alertmanager:cortex-alertmanager /var/lib/cortex-alertmanager
mkdir /etc/cortex-alertmanager; chown -R cortex-alertmanager:cortex-alertmanager /etc/cortex-alertmanager
mkdir /etc/cortex-alertmanager/configs; chown -R cortex-alertmanager:cortex-alertmanager /etc/cortex-alertmanager/configs/
systemctl enable cortex-alertmanager.service
Grafana Provisioning
Организации можно создавать только вручную (или через API)
Поэтому пока будем делать их вручную
LDAP
группы в ldap под тенанты кроме первого тенанта
в первый тенант (monitoring) добавляем по группе techsupp@i-free.com
Для ансиблизирования keepalived использована эта роль https://github.com/Oefenweb/ansible-keepalived (немного модифицирована)
Переналив машин
mon-proxy-01x.g01.i-free.ru (hoster-kvm-23.g01.i-free.ru)
virsh shutdown mon-proxy-01x.g01.i-free.ru
virsh undefine --remove-all-storage mon-proxy-01x.g01.i-free.ru
vm_create.sh -m 1024 -c 1 -s 20 -b vlanbr294 -i 172.27.245.237 -n 255.255.255.0 -g 172.27.247.254 ifree-buster-2.tar mon-proxy-01x.g01.i-free.ru -d "172.27.245.100" -e "Monitoring (prod)" --cpu-model SandyBridge
virsh -c qemu:///system start mon-proxy-01x.g01.i-free.ru
virsh attach-interface mon-proxy-01x.g01.i-free.ru bridge vlanbr300 --model virtio --persistent
mon-proxy-01y.g01.i-free.ru (hoster-kvm-24.g01.i-free.ru)
virsh shutdown mon-proxy-01y.g01.i-free.ru
virsh undefine --remove-all-storage mon-proxy-01y.g01.i-free.ru
vm_create.sh -m 1024 -c 1 -s 20 -b vlanbr294 -i 172.27.245.227 -n 255.255.255.0 -g 172.27.247.254 ifree-buster-2.tar mon-proxy-01y.g01.i-free.ru -d "172.27.245.100" -e "Monitoring (prod)" --cpu-model SandyBridge
virsh -c qemu:///system start mon-proxy-01y.g01.i-free.ru
virsh attach-interface mon-proxy-01y.g01.i-free.ru bridge vlanbr300 --model virtio --persistent
mon-exporters-01x.g01.i-free.ru (hoster-kvm-25.g01.i-free.ru)
vm_create.sh -m 4096 -c 4 -s 40 -b vlanbr294 -i 172.27.245.248 -n 255.255.255.0 -g 172.27.247.254 ifree-buster-2.tar mon-exporters-01x.g01.i-free.ru -d "172.27.245.100" -e "Monitoring (prod)" --cpu-model SandyBridge
virsh attach-interface mon-exporters-01x.g01.i-free.ru bridge vlanbr300 --model virtio --persistent
mon-exporters-01y.g01.i-free.ru (hoster-kvm-24.g01.i-free.ru)
vm_create.sh -m 4096 -c 4 -s 40 -b vlanbr294 -i 172.27.245.250 -n 255.255.255.0 -g 172.27.247.254 ifree-buster-2.tar mon-exporters-01y.g01.i-free.ru -d "172.27.245.100" -e "Monitoring (prod)" --cpu-model SandyBridge
virsh attach-interface mon-exporters-01y.g01.i-free.ru bridge vlanbr300 --model virtio --persistent
mtail
https://github.com/google/mtail/releases/download/v3.0.0-rc50/mtail_3.0.0-rc50_Linux_x86_64.tar.gz
No Comments