From 5020e66a58ffb920815b463f59fd232f6545727d Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 8 Jun 2025 09:58:35 +0200
Subject: [PATCH 01/13] Update Inventory

- add gw01n03 to external_vm
- add firmware to ffspveguests
- add gw05n01 and gw05n02 to external_vm
- fix dhcp4 (use dhcp04.vm)
- remove wiki-testing - id is used by pbs01
- remove dns03.vm.freifunk-stuttgart.de from ffspveguests
- add prometheus01 - selfnet-vm to external_vm
- update gws to use vm-entries
---
 inventory/external_vm  | 6 ++++++
 inventory/ffspveguests | 4 +---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/inventory/external_vm b/inventory/external_vm
index dc466f1..63d6dc8 100644
--- a/inventory/external_vm
+++ b/inventory/external_vm
@@ -7,3 +7,9 @@ external_vm:
     dhcp03.freifunk-stuttgart.de:
       ansible_ssh_port: 44353
     dns02.as208772.net:
+    gw01n03.vm.freifunk-stuttgart.de:
+      ansible_ssh_port: 44353
+    gw05n01.vm.freifunk-stuttgart.de:
+    gw05n02.vm.freifunk-stuttgart.de:
+    prometheus01.vm.freifunk-stuttgart.de:
+      ansible_ssh_host: freifunk.ext.selfnet.de
diff --git a/inventory/ffspveguests b/inventory/ffspveguests
index 3cca7f9..f94fb9b 100644
--- a/inventory/ffspveguests
+++ b/inventory/ffspveguests
@@ -16,7 +16,6 @@ ffspveguests:
       ffspve_id: 8194
     ripe-atlas01.vm.freifunk-stuttgart.de:
       ffspve_id: 8187
-    dhcp04.vm.freifunk-stuttgart.de:
     revproxy-05.vm.freifunk-stuttgart.de:
     openslides.vm.freifunk-stuttgart.de:
     glrunner-ffs05.vm.freifunk-stuttgart.de:
@@ -31,7 +30,6 @@ ffspveguests:
     nodealarm01.vm.freifunk-stuttgart.de:
     prometheus02.vm.freifunk-stuttgart.de:
     prometheus03.vm.freifunk-stuttgart.de:
-    wiki-testing.vm.freifunk-stuttgart.de:
     mailexpand.vm.freifunk-stuttgart.de:
     pad.vm.freifunk-stuttgart.de:
     revproxy-03.vm.freifunk-stuttgart.de:
@@ -64,10 +62,10 @@ ffspveguests:
     vaultwarden.vm.freifunk-stuttgart.de:
     nextbox.vm.freifunk-stuttgart.de:
     ffs13r.vm.freifunk-stuttgart.de:
-    dns03.vm.freifunk-stuttgart.de:
     ffs10.vm.freifunk-stuttgart.de:
     dhcp02.vm.freifunk-stuttgart.de:
     jumphost01.vm.freifunk-stuttgart.de:
     nrb-backbonetest2.vm.freifunk-stuttgart.de:
       ansible_ssh_host: 2a01:4f8:172:feff:be24:11ff:fe8b:8979
       ansible_ssh_user: root
+    firmware.vm.freifunk-stuttgart.de:
-- 
GitLab


From 6b5a0cd13bf3e1d90691d04409429e4ea30afa99 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 15 Jun 2025 21:43:30 +0200
Subject: [PATCH 02/13] Inventory: Add gws to role_gw in inventory

---
 inventory/role_gw | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 inventory/role_gw

diff --git a/inventory/role_gw b/inventory/role_gw
new file mode 100644
index 0000000..667c2b8
--- /dev/null
+++ b/inventory/role_gw
@@ -0,0 +1,9 @@
+---
+role_gw:
+  hosts:
+    gw01n03.vm.freifunk-stuttgart.de:
+      ansible_ssh_port: 44353
+    gw04n06.vm.freifunk-stuttgart.de:
+    gw05n02.vm.freifunk-stuttgart.de:
+    gw09n03.vm.freifunk-stuttgart.de:
+    gw09n04.vm.freifunk-stuttgart.de:
-- 
GitLab


From 3329bb8c39d9c7223e09fdb8a83a33f5efa56f62 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sat, 7 Jun 2025 18:12:43 +0200
Subject: [PATCH 03/13] Initial prometheus-exporters role

- open nft firewall for prometheus-exporter-exporter
---
 group_vars/all/prometheus_ca_vault           | 270 ++++++++++++++
 roles/prometheus-exporters/handlers/main.yml |  16 +
 roles/prometheus-exporters/tasks/main.yml    | 354 +++++++++++++++++++
 3 files changed, 640 insertions(+)
 create mode 100644 group_vars/all/prometheus_ca_vault
 create mode 100644 roles/prometheus-exporters/handlers/main.yml
 create mode 100644 roles/prometheus-exporters/tasks/main.yml

diff --git a/group_vars/all/prometheus_ca_vault b/group_vars/all/prometheus_ca_vault
new file mode 100644
index 0000000..0f8f183
--- /dev/null
+++ b/group_vars/all/prometheus_ca_vault
@@ -0,0 +1,270 @@
+$ANSIBLE_VAULT;1.2;AES256;ansible-ffs
+30636138643961623138666666663630653034653639323234336630633332393632396339396562
+6135313732663438653030666461376463366137303162380a643262613235336236666335633030
+37336139646262333532653134653737343964353530383066323664353465313530356336643861
+3839356565373565380a626662663265323962333839373339663361363262616263656337353139
+61646236623836633832376663363331616531623739313333333035393333356437613939626161
+31356330393964663565353762363733356562646664363065323835633036383434353739653433
+39346264386134353364323733383664303034616562346661326135646464643238316461313064
+36356365376263646134363637336336306436616534663737386533376132623434626462363763
+39613966656434613830373231313263306433613234666434366166343333313064326262383264
+66613731643633636134366230663562393263626237353865333934613538353532623237386331
+31373638386663323361356436383733303631323962303766663463656666306231363030633735
+35306366653037313931623233326133393032613464373531383366376434353133346566383166
+31366130363464343466336337666465396436356531626664383961623131663435313632613866
+39363236343737663834656130643965653236623862353663376365613334663363383338663463
+62306564353232663631393534663061316362643531656530636165666666396133333865626538
+33623563393866623563373333336666383732326537323363336134326632623363623666623162
+65346539323331663236383336623863343135376635626633353139623434663331656635386662
+36376634663863623762643862396339326635653933383739396465333061386166616339623430
+66326565303936393239386339356363663130656431396566663366386664353730393733336366
+64363665313631373038633365373334636261653465353964656139343962323465633835303831
+31313562663530316435646362613465356266376266616430376132663530356139633330366637
+30343931346333303230363065636432613036393262333666343530326537633162646339666536
+63663065386265383336333732306236633134393061343833343233326439313361643930613863
+66346335653666313335356564616530303663383966663035356565356666313561366633346539
+33356564373535313062303437323637393835616233666130633434643966343937393738386132
+39356538353965376139303438616463393061626238326436376634636335376566333761653461
+38663662633434386439666636323164643433626362626134646264396364316238636664633434
+62643966313934353263346361303033353966313139643238633335646164356337393334623766
+34373566366662373634663933343561343433343165343135663766393963333731643032326235
+37616663336438306536386463653865323135383637643162383964633638336632356166633033
+37643165346437303730306136363636643231393134666662326138336532363132393433613533
+37343430346335653365633834383164353232323634613437643938353530303239643831386564
+37636464316464396233323538633930613264633366353233623965376363616334623032343761
+32646435356364633332313562663265346465306635393337383364653366356239613239613264
+39353864373866363339636339333639656334636433393139353662323362616531343738626136
+35386335636539376330326366656338373232363434633063353136303930386234326139613535
+33633731343166636534633533393935353831363166363334376230663537313362646166376235
+61336335323838366539626631343531383561313531646534666236313036346562333334373262
+64383563386337653938663965623637646165633330656134393662643732653466363866306237
+62633731663631373263373564663637343630373830646336616132636632646135623537343865
+64666433346538666366613439643639663266633237353130316433353664663766343339336561
+63366334343733646631663531303263376164643765306261323434636338306132363634373032
+37363233373964613166386537633536346662393661333238633539333933623465663135336365
+37393134353038656365363230663462343132366135363336363964393534353632336461383939
+33306637633730623566303533653761613131363936653632656361363265643535313330356335
+63306132623734666234613365346336303565623630623865653832386465303736633130383632
+66336432633534343663343234346239613733623965376634613265383465633464616562633661
+33663231616133613931333330313663313531313137663730623462333139396330646531333231
+35366364316235616532633233363339303862613934306435376536613735326133306261613866
+30653666623039643035346531623130366238633066613631346432343364316237383532656132
+62653262343736356330386238343566656362633461633334373261343464373065656464373361
+63623665393339383431356634326566646237356565356238613233373831646534373065363862
+32373131626461316134353962323331386331366463343636643261373361383062303230393064
+37666434666533626162643635663634623932623330656162363138663833393433373533323364
+64643064633334643331373231623033633035326232336362313462623431326237366563323430
+32393136653436653134633333613930373433653337303464363962373731646535613966663531
+63303833343338343162393239623535646537636339333539326632636162383733623636373039
+37303766383832313738346266383232313265333261336465653961666261666539623230393964
+36646336346539313462356339343465383261343638613261333261366461303837323566656565
+35373461313662323431663235353030326538393261633437616666643137346364396661616166
+34316336333731373538353061646664353632626235373366656532653837343633363337363534
+65346330323731633437303964353838333165386664616134663535393538363639313032393435
+31343765643338633438386364303739366332333866373436653639396562303562666564653035
+62313236383039616366666533353835306131363233643131613734666135376533383333343363
+38663838316163653737643630636565633865303334363938316435376565323933336163326432
+63663033653931343366616435636565383033643661336366616535656539343062666632653631
+30373564643165653264313133373763393864616662396631333365303638376562356366303736
+63306631343165373565653330656234623437643863356265383166646434656632323266653632
+32363838306539636666623466353637306237373832353532353732643266646335366438643765
+37303739386433383039373533383334613137326235376137663062633237303239363638636134
+61633764643534316163633530613334323339646338313563626665343237666134636432376237
+36623536643639343836376165376431636139386561313034363363373664303364333162613866
+63623764643166383961646335326431623735326235653738353037346366356539626136323532
+38396633353136393136316630666631623465653537396366313938356661626461353965666334
+34643664373830613132643738646562373162373461313835656131653165653562343436363538
+37616139333265396338363635363532393333346438616661666639626137373038633537313465
+31336665653631666539386264396531346561363932363234346564343430343437396661643637
+38356439653037643436353938373138636634643932613163376266386138613439323635333539
+61326238393361333234373033316537613535663730386338616436643463376337376132383437
+34663835343831646664396161336536613066373135643636363539616565613832643462356335
+38656534666463356234373839313930646465336330356364663338663833333430383339366562
+31626131326465313039333737353437306230353034393536326364663862613732613631633031
+31356335656338373739386535383066666235383836356261626230623931636564663166376262
+37326239663038333366333135306238666365356635326430366539396232366263663066653363
+30366361323565336361306338666361613737326663366263643937393933666262646265353464
+62306133376235616231326465373032636563356133633466383763666232643931356661633630
+36353339323261623831356262363662313734363636386262323265653861303433643137383136
+33386432323361333937623139313363646463303638366266393733653433333436343863393833
+65393437373761636631616465363330383666626466613061376262663665316130633432363861
+61386239333536333731326636653965316334393436303139306138663338353562306463376364
+34626165613636373435326532653237616230373034383563616564653137633361653666653034
+39313038306639646132393465356334646266323064383134383631653834613863313931333235
+63653533616230383138326566353034363561303263616530366130383430633164366331393534
+32623661663865663862353339613761363466613433313763373464643263383634386330376232
+33376366646137616136343066353261643338356634313138303030336431356332653531333037
+32323063626237623532363834666233313934656333653964636465393631376333643130393562
+64623937633961353364306466383264646633343134626336356432323161333534623837336664
+30636666373930393239303031643032613064616633366638346234383931616663336662396336
+64303637313132303232353035313934316635383863373135343830343232646530623735656133
+32666365316439633636653332623839303934343563633639363030316563373531306261396638
+36613438373734646133653234626631363631306434656366636531363335373461363236326531
+66393932343337623162633437313939666334633665356438363137656534366231613830386638
+39313531666162366539613366623631326331613434633162303466613366313137386338626239
+30383534623835626332656564633639316137323866336335636338616361363534353835333465
+63643732383962656532613738383034656265373033353064353038363565656662626638373161
+36336564663733666461623331323338653932353632313439636662353664653739636335306363
+34313032326364363038353061616134363161643466653166373430326439346664383831313232
+65653464386466386232666635393735313030356266633133656166366536356665343031396562
+61653539306332646538376135343861636334383736313363393366363534643761623832363862
+30633930623764626263643263636331633764616330636431336163323166663965386530636665
+38366161613239643635383139663864366232633633336363356662343262336364376135613062
+61336262646336626133613734353939666533393531373732333132383263626566346234366438
+65663466376363623033303463343435626366633836366233323538386633323739313966383431
+32643764613537316139356132646436353661653432346562303263396531653132336537646362
+63356462346436343833346234353933383766386166363132323938646563646239303535383863
+66656334656562643636386531376438336639663362303536343264313430643962326164396134
+38666330316236353539306338393831333763626437643935303436653566383333323431666438
+63353737623933313939623633636430303636636464663031316263343038316161396161366432
+34636334333964613761396233636136656630643561623937636336323439336236656461616533
+31373566363038333133323866313335666139373961373131646463633831323063356532363533
+62343036343737653737336431323235623164303934353630623535333430326636663531386464
+38616136373264626534376234376431666532353636653731363964333538383639633034623262
+31333031323839373663633238356436656236383933616561356439323265306631653232643633
+37316139343831313933633335376438373663633837323565373939643533333832353232323765
+61306361666238663861343361623837626235663833393539323962343032343538356430633635
+33613465363965376133306339303061356438376662393432356664363536303732346434636533
+38363064313431333530333233373134333064636538363262316138323139393037626636613237
+30633663653237316231646363336163633032663338633534373061386263383061396336363161
+66363334656636346566363536613537666330333235353837303231666262356166376332613036
+35313834393331643664363133396635646633393935623236653738396565663633373733396662
+37363234316463326333653136343133393530363562326136616166386438383838313362386665
+65646262663830613135306562613130616434393563386464396338653130616366653966366239
+32393133616432346666366534333333366336633133366537613864656266626337633563653837
+34313935656637633632383035616430326533393464666363346636316235363333353633323733
+36373137626638336139373631303733363838383132373365353964666437666639646365393961
+36366339346166656364383236396461613233353331643034636666663732306234643662363338
+62636566376662356363356434623863656435376637366561633633616237346431343365663361
+33303736366133633936633233623430363265326232383033313165663039303764373464376336
+61653733643231323862323738333936323433383730373139633333636335653632316466386338
+34303139313838663861636264333966376633313661373637376138383865613764383166393537
+63386137363464356561383935366438373337383031633030373232623466333438353364353933
+34653563383731663231666365626434333235346637333462363334353365363733323936623565
+31313639383065396133333134346335323162313736663565656330353366313434373236306633
+34646264636533316533383833393466356630326330306464366537623632376363303861363364
+61393461386637363230653662373163316663313338383965626234383138633837336265393661
+32653034333764613537633931373366313632323335333365303930386165313662306132666430
+39346364653936356463643663633862326539373037363330323731373661353233396465376539
+30386262353638623930313763643234386262393237663731383533336466346133353766306463
+38626638333439353862373931633065656438663839626563386161366132366236323861616535
+37666361313238383637356633326162306233326462333938363233613431343538373239636230
+66363765613761646466376333303831636230346538386339373165333163396266643562343064
+32663762653339323339313166336132366537303535633038633436313361326438616637346162
+62313732326334653330326565363366623531616562633238336330663839303163393162346565
+61616431663637646534663033366261313934373239313335333563373962346563323537373163
+64633963626235323564353536393931393764363934613265336437643064346463386638323233
+32663238343833346663656135353864373639333364343066396235613961356662393665363630
+38336136386138316535326236333565663136653563303835353964303433636163643866643461
+64396333656630633933393538303863376462363565306564353766663763616166633332393338
+34653465363439323536383163366364306362313865633465303437653935643764326532363362
+62343266333464373963653265323463376363623261346364333132653432353937383632303538
+30373037323236373862643430666239613863646531653962653437333063646165393033656332
+34656536303239353661626339643761623262653835356533343037633937383934306233613734
+39626565393433346562643239306330623662623163643136303366656630363930396539343434
+31393033613139613162393339366132633230656464333839663737643164383761303730316538
+62623934366233626564333066326432366239383330353732646431383533336436663864653132
+37616562316234353834346134653634363738313966643831623766313032313762346230613030
+35383439326466396462346638646165666263363231363435653132326562616562636430643365
+38626439396237363830643831333037353931663064653731316534343661663935633433313737
+61653839316632386564313236663362313761623065663466656535303164643465343434363462
+64656334633266616335366361623630373335386663373830306166646439323636663266613037
+66633338326135653338663837613830633937633236353039383865363662313738393462623239
+65363938323038386465656535386431383864346130333961333436343262636232323033396435
+36303065333365613035636465343865333732363461653963643736623464356561373164313031
+33643037323063366364616363633534623830383866313033396138313335383661346666396537
+38613530653732336535343934326537343566393231333462313663623437343538636665386261
+39633732366138383363663634303532636633363333396532343736613734623962333865656265
+32386431373463626435386633663432323933373631353664336162326564363534643661383434
+64333133323961306633373362323863666233643663333535626531623032323932396536383161
+61646565623564336365343739623331356661393762326439383138353037393965303531313932
+62363464636564643939383661653166303163626435653538646365313064336664613336616138
+34653631663536303432663731333165396663333564363338326530353265656262636138653534
+62346437383636353834356561346663363832666339303864373435653331613437363939613239
+66393539356531303234613835303938353461373762306563356532653038386461383737313737
+39666361613762373134646161353539386665656635666537656266383035656530393730643665
+30326438383066363338303866333464333339376631633531393161353230353032356435663338
+30303834366465623861643130653064643464303366396666646237643237373962636530663863
+63313130396466386562363435376461666164313337373838646536616166623536663135343263
+64323261633133373366306665643638613930616331343064366532313131636362373431343639
+66393830666133656365393730343931636439626162616163393131633634396536343030626336
+30663266623432643234366337653838643630383736643330666436386335336431653662363964
+36323737303232356466373137323734316238383165316631343639393862343765313935646166
+61613734646539363638613865386534306238383464323639333066326565656665623937666162
+34316537393736356436646665313533303838393666653465386162346336306462303438313735
+37656338363335336232333638653038363030383533353032343133363662633531353338663438
+38326664643236336136343266653462386464343363616637393530393166323639343261356635
+64343438306630633538376235313637306664643236383532646633343635353931663561623566
+35616135316466656662336130633963386266336261353631306536376438323235366434346530
+36616161636561316661633662386562653030643264303533386463366132306434343735313139
+62623135323338366461353034623836626330643932303337323266636430643530656337323031
+62653335623565346333653630356565393738356231383366353639653064356230663432383736
+32663463616563643061323639393063343132623435643339363935346437653233663665666162
+37316263663139653930656537376434323464623030366538333161623434623061343462623261
+65313161393537353833313538643138656135626133373634313139333131393864643331333462
+33616166653661396439666366376333393239323931316438333430313463343834643466616265
+39623637323761646566396536386635393239653562363837393532356330393532663361323736
+64653264663064323239373932643565306561313038356461656162376466366335346136333233
+32313233316638353564646162613135646335373765356332636234663038343031646137393736
+62613937323664313463633637393966613230303365666563363363346262633537313837633035
+65376630613235616264626565313931643130363434316438393235343663646162393264666339
+30656137393563613933363639626663643936363039316239623261623438313061343634323037
+34656563613135363463636534366639343863326134376662386138653066353036653835633334
+37393032313539613338613438306434383763313336636137303964633766383537346665646139
+37343638376361623035633832633932316265626131636564323866393933636264643363626331
+64663131336130663538663464373738373461393237383866613931313662386233316261373563
+65353865353662633630336432373065643564613137393966613465363165373463613831306630
+32356665643266616566393539396230666661373964393737353433643735363535306364386336
+63353761643230663732383263643865323635353261666262656232343930373039363865366438
+36643564313737336234396531313837396332373834396464356238303739663732626537336337
+33313065363133333661616433353734343430636331376332336432646563653863366466623864
+36663564616232313731336363646536656263633564383165383965653936376464333663333465
+64356462343837343933383837303939393435626538626262303561346338613362366134636437
+39306466643563333361303163346564656631363462366164663630356338306138636266353633
+64363831353330376135386237333338396330333234633362663262323832633961353832303032
+31343637393432666661653531303134323364363264636539633565323232363739323937333838
+64346631393436373937636166393334303632316636396563643630653933366233663663633632
+34313037653038313437343663663432353234353032343439373865666462666664323261303933
+39343832363435336165613631396362303462323261376466386631636533303765363632626334
+35346461653464633734353265663238646164653466306436643565353566636138333565353438
+35316430646434643961353831326535396262646466396332656339306436633034626664666137
+34643664393136333935623963633631336233656430373665396136613938396332633065333137
+62653939633063616533646430636463336264376634393035623137626637636166353337363833
+61653233373330396232313939336135613862666135306235636135633035386262613339356333
+37356235383731396466636135613831643239373237346463353563646131333033363339343231
+31666162326632663032626130656565653334323633636565653263373336323438316430616436
+62663665343966646231616132653235373836616661333539313339343361653666613639326538
+38323463393333376265653936326632313037346261653934343634326530663338383264306134
+30316137383538353035653536663864356433643234343931323230656633656363376330346234
+39353362623533643861336264663662653863393435396336653334303830653466336236303530
+32356234313130613739346632373865633364323634303739353434383636643563333963383639
+38323039396230623838653234336339396164333933656463313064333262333863316266653732
+32623664353364656565316538373861336166643064646337646364333766653032373064646133
+34633663393935306134386536333363646233653033663139353838396135366131386366333539
+64303035356236326563613937616364346265616664646232663063326261333830356434306536
+63643533306465623866633230653430656236636538643038656139643265346265343835636130
+36393566623830313062303733393032353061326132353031373831386630626531626463643432
+36383232316639343033656132656663666134383962373561653539306466343435376436393666
+37353439633362373131373932656266343761616665646430373539373162616235376239333930
+34306138326263383666643065306564303062393031633563366662656336336163626166356330
+61663164353534303432353061386639633531303863306238336463666236313032306636616135
+37323864613030643937643634653738313861326538313334316231363236623936613337383465
+33353063623136336465643736356338343066306230303866633836653432386365366439396434
+31383533653838373266643436306237306266303261353964306134393033323366643937383138
+35633363383261316638623135393465363664363938323737643430313761303763383533303463
+61636536386165326432646266343764346130623735613439333633613662343735383737376261
+30663537623664633464383235633630653463353432353536643437333137383538343233303039
+36663333623736363635613134616330623564306434373361366264356337623162623363343736
+32616337626564336661626432646165663233643166356133666263366339386435363664303937
+39346439386639643066633063373939363737323862316138373738343330363163613062393233
+39376135353765313133636238623035383235336639663932353861653165636631353463623838
+30646633636539646633623632303761643338363438663663356337616235623766323930346433
+66333736313239306266373130616236313366663537333135656230373234326531333135366236
+38376165336166613563613337353935613632313762356530353465313136663337396630303836
+39336662646634316339653130363332613636613536343366343639363464653532663763353961
+63643935333530626534656666303465303939313032613363363463313366653937343438356630
+66313664336631363834376439643061396136376437663333393833663034343165363034336632
+66646239376134346432343766323564383534613837393165643562363538343562346564303661
+36396433633737396533353138653831643562303538346235313037353362376538626635363332
+37343435316264616239643635613263366531323363336565633261616534643366333264646364
+3731
diff --git a/roles/prometheus-exporters/handlers/main.yml b/roles/prometheus-exporters/handlers/main.yml
new file mode 100644
index 0000000..7ff6bd3
--- /dev/null
+++ b/roles/prometheus-exporters/handlers/main.yml
@@ -0,0 +1,16 @@
+- name: Restart prometheus-node-exporter
+  systemd:
+    name: prometheus-node-exporter
+    state: restarted
+    enabled: true
+
+- name: Restart prometheus-exporter-exporter
+  systemd:
+    name: prometheus-exporter-exporter
+    state: restarted
+    enabled: true
+
+- name: Restart nftables
+  systemd:
+    name: nftables.service
+    state: restarted
diff --git a/roles/prometheus-exporters/tasks/main.yml b/roles/prometheus-exporters/tasks/main.yml
new file mode 100644
index 0000000..4f3af55
--- /dev/null
+++ b/roles/prometheus-exporters/tasks/main.yml
@@ -0,0 +1,354 @@
+---
+# This does not respect hosts that still use ip6tables
+# like monitor01 and monitor02
+- name: Prüfe, ob nftables enabled
+  shell: systemctl is-enabled nftables.service
+  register: nft_enabled
+  failed_when: false
+
+- name: Prüfe, ob nftables active
+  shell: systemctl is-active nftables.service
+  register: nft_active
+  failed_when: false
+
+- name: Schreibe Prometheus-IPs nach /etc/nftables.conf
+  when: nft_enabled.stdout == 'enabled' and nft_active.stdout == 'active'
+  ansible.builtin.blockinfile:
+    path: /etc/nftables.conf
+    marker: "# {mark} ANSIBLE MANAGED PROMETHEUS IPS"
+    insertafter: "^flush ruleset"
+    block: |
+      define prometheus_exporter_ips = {
+      {% for ip in prometheus_ips %}
+        {{ ip }},
+      {% endfor %}
+      }
+  notify: Restart nftables
+
+- name: Füge Prometheus-Access-Block in chain input ein
+  when: nft_enabled.stdout == 'enabled' and nft_active.stdout == 'active'
+  ansible.builtin.blockinfile:
+    path: /etc/nftables.conf
+    marker: "# {mark} ANSIBLE MANAGED PROMETHEUS EXPORTER"
+    insertafter: '^\s*chain input \{'
+    block: |
+      ip6 saddr $prometheus_exporter_ips ct state new tcp dport { 9998 } accept;
+  notify: Restart nftables
+
+
+- name: Set Prometheus Node Exporter options for containers Debian 12 and later
+  copy:
+    content: |
+      ARGS="--no-collector.cpufreq --no-collector.thermal_zone \
+            --no-collector.hwmon --no-collector.diskstats \
+            --no-collector.vmstat --no-collector.mdadm \
+            --web.listen-address=[::1]:9100 \
+            --web.listen-address=127.0.0.1:9100"
+    dest: /etc/default/prometheus-node-exporter
+    owner: root
+    group: root
+    mode: '0644'
+  notify: Restart prometheus-node-exporter
+  when:
+    - ansible_facts['virtualization_type'] == "lxc" | default(false)
+    - ansible_facts['virtualization_role'] == "guest" | default(false)
+    - ansible_facts['distribution'] == "Debian" | default(false)
+    - (ansible_facts['distribution_version'] | int ) >= 12 | default(false)
+
+- name: Set Prometheus Node Exporter options for containers Debian 11
+  copy:
+    content: |
+      ARGS="--no-collector.cpufreq --no-collector.thermal_zone \
+            --no-collector.hwmon --no-collector.diskstats \
+            --no-collector.vmstat --no-collector.mdadm \
+            --web.listen-address=127.0.0.1:9100"
+    dest: /etc/default/prometheus-node-exporter
+    owner: root
+    group: root
+    mode: '0644'
+  notify: Restart prometheus-node-exporter
+  when:
+    - ansible_facts['virtualization_type'] == "lxc" | default(false)
+    - ansible_facts['virtualization_role'] == "guest" | default(false)
+    - ansible_facts['distribution'] == "Debian" | default(false)
+    - (ansible_facts['distribution_version'] | int ) < 12 | default(false)
+
+- name: Set Prometheus Node Exporter options for VMs Debian >= 12
+  copy:
+    content: |
+      ARGS="--no-collector.thermal_zone --no-collector.hwmon \
+            --no-collector.diskstats --no-collector.mdadm \
+            --web.listen-address=[::1]:9100 \
+            --web.listen-address=127.0.0.1:9100"
+    dest: /etc/default/prometheus-node-exporter
+    owner: root
+    group: root
+    mode: '0644'
+  notify: Restart prometheus-node-exporter
+  when:
+    - ansible_facts['virtualization_type'] == "kvm" | default(false)
+    - ansible_facts['virtualization_role'] == "guest" | default(false)
+    - ansible_facts['distribution'] == "Debian" | default(false)
+    - (ansible_facts['distribution_version'] | int ) >= 12 | default(false)
+
+- name: Set Prometheus Node Exporter options for VMs Debian 11
+  copy:
+    content: |
+      ARGS="--no-collector.thermal_zone --no-collector.hwmon \
+            --no-collector.diskstats --no-collector.mdadm \
+            --web.listen-address=127.0.0.1:9100"
+    dest: /etc/default/prometheus-node-exporter
+    owner: root
+    group: root
+    mode: '0644'
+  notify: Restart prometheus-node-exporter
+  when:
+    - ansible_facts['virtualization_type'] == "kvm" | default(false)
+    - ansible_facts['virtualization_role'] == "guest" | default(false)
+    - ansible_facts['distribution'] == "Debian" | default(false)
+    - (ansible_facts['distribution_version'] | int ) < 12 | default(false)
+
+- name: Set Prometheus Node Exporter options for hosts
+  copy:
+    content: |
+      ARGS="--web.listen-address=[::1]:9100 --web.listen-address=127.0.0.1:9100"
+    dest: /etc/default/prometheus-node-exporter
+    owner: root
+    group: root
+    mode: '0644'
+  notify: Restart prometheus-node-exporter
+  when:
+    - ansible_facts["virtualization_role"] == "host" | default(false)
+    - ansible_facts['distribution'] == "Debian" | default(false)
+    - not ansible_facts['distribution_version'] == "11" | default(false)
+
+- name: Installing prometheus exporter packages
+  apt:
+    name:
+      - openssl
+      - prometheus-exporter-exporter
+      - prometheus-node-exporter
+      - prometheus-node-exporter-collectors
+    state: present
+    install_recommends: false
+
+- name: Ensure /etc/prometheus/ssl exists
+  file:
+    path: /etc/prometheus/ssl
+    state: directory
+    owner: root
+    group: root
+    mode: '0755'
+
+- name: Kopiere ca_cert.pem zum Ziel
+  copy:
+    dest: /etc/prometheus/ssl/ca_cert.pem
+    content: "{{ ca_cert_pem }}"
+    owner: root
+    group: root
+    mode: '0644'
+
+- name: Check if exporter_key exists
+  stat:
+    path: "/etc/prometheus/ssl/exporter.key.pem"
+  register: exporter_key
+
+- name: create key
+  command: >
+    openssl genrsa -out /etc/prometheus/ssl/exporter.key.pem 2048
+  args:
+    creates: /etc/prometheus/ssl/exporter.key.pem
+  notify: Restart prometheus-exporter-exporter
+  when: not exporter_key.stat.exists
+
+- name: Change file ownership, group and permissions
+  ansible.builtin.file:
+    path: /etc/prometheus/ssl/exporter.key.pem
+    owner: prometheus
+    group: prometheus
+    mode: '0600'
+
+- name: create csr config
+  when: not exporter_key.stat.exists
+  copy:
+    dest: /etc/prometheus/ssl/csr_config.cnf
+    content: |
+      [req]
+      distinguished_name = dn
+      req_extensions = req_ext
+      prompt = no
+
+      [dn]
+      CN = {{ inventory_hostname }}
+
+      [req_ext]
+      subjectAltName = @alt_names
+
+      [alt_names]
+      DNS.1 = {{ inventory_hostname}}
+
+- name: create csr config
+  when: not exporter_key.stat.exists
+  delegate_to: localhost
+  copy:
+    dest: /tmp/{{ inventory_hostname}}.csr_config.cnf
+    content: |
+      [req]
+      distinguished_name = dn
+      req_extensions = req_ext
+      prompt = no
+
+      [dn]
+      CN = {{ inventory_hostname }}
+
+      [req_ext]
+      subjectAltName = @alt_names
+
+      [alt_names]
+      DNS.1 = {{ inventory_hostname}}
+
+- name: create csr
+  when: not exporter_key.stat.exists
+  command: >
+    openssl req -new -key /etc/prometheus/ssl/exporter.key.pem \
+      -out /etc/prometheus/ssl/exporter.csr.pem \
+      -config /etc/prometheus/ssl/csr_config.cnf
+
+- name: Fetch CSR from exporter
+  when: not exporter_key.stat.exists
+  fetch:
+    src: /etc/prometheus/ssl/exporter.csr.pem
+    dest: /tmp//{{ inventory_hostname }}.csr.pem
+    flat: true
+    mode: '0600'
+
+- name: Schreibe Private Key in RAM-Datei
+  when: not exporter_key.stat.exists
+  delegate_to: localhost
+  copy:
+    dest: /dev/shm/ca_key.pem
+    content: "{{ ca_key_pem }}"
+    mode: '0600'
+  no_log: true
+
+- name: Schreibe ca_cert.pem in RAM-Datei
+  when: not exporter_key.stat.exists
+  delegate_to: localhost
+  copy:
+    dest: /dev/shm/ca_cert.pem
+    content: "{{ ca_cert_pem }}"
+    mode: '0600'
+  no_log: false
+
+- name: sign cert
+  when: not exporter_key.stat.exists
+  delegate_to: localhost
+  shell: >
+    openssl x509 -req -in  /tmp/{{ inventory_hostname }}.csr.pem \
+      -CA /dev/shm/ca_cert.pem -CAkey /dev/shm/ca_key.pem \
+      -CAcreateserial -out /tmp/{{ inventory_hostname }}.cert.pem -days 8250 \
+      -extensions req_ext -extfile /tmp/{{ inventory_hostname}}.csr_config.cnf
+
+- name: Lösche mehrere RAM-Dateien
+  delegate_to: localhost
+  file:
+    path: "{{ item }}"
+    state: absent
+  loop:
+    - /dev/shm/ca_cert.pem
+    - /dev/shm/ca_key.pem
+    - /tmp/{{ inventory_hostname}}.csr_config.cnf
+    - /tmp/{{ inventory_hostname }}.csr.pem
+
+- name: Copy cert to exporter
+  when: not exporter_key.stat.exists
+  copy:
+    src: /tmp/{{ inventory_hostname }}.cert.pem
+    dest: /etc/prometheus/ssl/exporter.cert.pem
+    mode: '0644'
+  notify: Restart prometheus-exporter-exporter
+
+- name: Lösche files in tmp
+  delegate_to: localhost
+  file:
+    path: "{{ item }}"
+    state: absent
+  loop:
+    - /tmp/{{ inventory_hostname }}.cert.pem
+
+- name: Lösche temp files remote
+  file:
+    path: "{{ item }}"
+    state: absent
+  loop:
+    - /etc/prometheus/ssl/csr_config.cnf
+    - /etc/prometheus/ssl/exporter.csr.pem
+
+- name: Configure prometheus-exporter-exporter config
+  copy:
+    dest: /etc/prometheus/exporter-exporter.yml
+    content: |
+      modules:
+        prometheus:
+          method: http
+          http:
+            port: 9090
+        alertmanager:
+          method: http
+          http:
+            port: 9093
+        node:
+          method: http
+          http:
+            port: 9100
+        respondd:
+          method: http
+          http:
+            port: 9104
+        blackbox:
+          method: http
+          http:
+            port: 9115
+            path: '/probe'
+        bind:
+          method: http
+          http:
+            port: 9119
+        postfix:
+          method: http
+          http:
+            port: 9154
+        process:
+          method: http
+          http:
+            port: 9256
+        pve:
+          method: http
+          http:
+            port: 9221
+            path: /pve
+        bird:
+          method: http
+          http:
+            port: 9324
+        kea:
+          method: http
+          http:
+            port: 9547
+    owner: root
+    group: root
+    mode: '0644'
+  notify: Restart prometheus-exporter-exporter
+
+- name: Configure prometheus-exporter-exporter params
+  copy:
+    dest: /etc/default/prometheus-exporter-exporter
+    content: |
+      ARGS="-web.listen-address= -web.tls.listen-address=:9998 \
+      -web.tls.cert=/etc/prometheus/ssl/exporter.cert.pem \
+      -web.tls.key=/etc/prometheus/ssl/exporter.key.pem \
+      -web.tls.ca=/etc/prometheus/ssl/ca_cert.pem -web.tls.verify"
+    owner: root
+    group: root
+    mode: '0644'
+  notify: Restart prometheus-exporter-exporter
-- 
GitLab


From e1d6eeb3dae2928fe42e32795f900562e22376f7 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sat, 7 Jun 2025 21:02:10 +0200
Subject: [PATCH 04/13] Add prometheus client tls cert config role

---
 roles/prometheus/handlers/main.yml            |   5 +
 roles/prometheus/tasks/main.yml               | 137 ++++++++++++++++++
 .../templates/node_exporter_targets.yml.j2    |   4 +
 3 files changed, 146 insertions(+)
 create mode 100644 roles/prometheus/handlers/main.yml
 create mode 100644 roles/prometheus/tasks/main.yml
 create mode 100644 roles/prometheus/templates/node_exporter_targets.yml.j2

diff --git a/roles/prometheus/handlers/main.yml b/roles/prometheus/handlers/main.yml
new file mode 100644
index 0000000..8a8df0d
--- /dev/null
+++ b/roles/prometheus/handlers/main.yml
@@ -0,0 +1,5 @@
+- name: Restart prometheus
+  service:
+    name: prometheus
+    state: restarted
+
diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml
new file mode 100644
index 0000000..cf95926
--- /dev/null
+++ b/roles/prometheus/tasks/main.yml
@@ -0,0 +1,137 @@
+---
+- name: Check if /etc/prometheus/prometheus.yml exists
+  stat:
+    path: "/etc/prometheus/prometheus.yml"
+  register: is_prometheus
+
+- name: Fail if host is not prometheus host
+  fail:
+    msg: "This role must only be run on prometheus hosts"
+  when: not is_prometheus | default(false)
+
+    #- name: Create node_exporter_targets.yml file
+    #  template:
+    #    src: node_exporter_targets.yml.j2
+    #    dest: /etc/prometheus/node_exporter_targets.yml
+    #  notify:
+    #    - Restart prometheus
+
+- name: Check if client-cert exists
+  stat:
+    path: "/etc/prometheus/ssl/client.cert.pem"
+  register: client_cert_exists
+
+- name: Ensure /etc/prometheus/ssl exists
+  when: not client_cert_exists.stat.exists
+  file:
+    path: /etc/prometheus/ssl
+    state: directory
+    owner: root
+    group: root
+    mode: '0755'
+
+- name: Kopiere ca_cert.pem zum Ziel
+  copy:
+    dest: /etc/prometheus/ssl/ca_cert.pem
+    content: "{{ ca_cert_pem }}"
+    owner: root
+    group: root
+    mode: '0644'
+
+- name: create key
+  when: not client_cert_exists.stat.exists
+  command: >
+    openssl genrsa -out /etc/prometheus/ssl/client.key.pem 2048
+  args:
+    creates: /etc/prometheus/ssl/client.key.pem
+
+- name: Change file ownership, group and permissions
+  ansible.builtin.file:
+    path: /etc/prometheus/ssl/client.key.pem
+    owner: prometheus
+    group: prometheus
+    mode: '0600'
+
+- name: create csr
+  when: not client_cert_exists.stat.exists
+  command: >
+    openssl req -new -key /etc/prometheus/ssl/client.key.pem -out /tmp/client.csr.pem \
+    -subj "/CN={{ inventory_hostname }}"
+  args:
+    creates: /tmp/client.csr.pem
+
+- name: Fetch CSR from prometheus host
+  when: not client_cert_exists.stat.exists
+  fetch:
+    src: /tmp/client.csr.pem
+    dest: /tmp/{{ inventory_hostname }}.csr.pem
+    flat: true
+    mode: '0600'
+
+- name: Lösche csr.pem
+  file:
+    path: /tmp/client.csr.pem
+    state: absent
+
+- name: create csr conf
+  when: not client_cert_exists.stat.exists
+  delegate_to: localhost
+  copy:
+    content: |
+      extendedKeyUsage = clientAuth
+      subjectAltName = @alt_names
+      [alt_names]
+      DNS.1 = {{ inventory_hostname }}
+    dest: /tmp/{{ inventory_hostname }}_ext.cnf
+
+- name: Schreibe Private Key in RAM-Datei
+  when: not client_cert_exists.stat.exists
+  delegate_to: localhost
+  copy:
+    dest: /dev/shm/ca_key.pem
+    content: "{{ ca_key_pem }}"
+    mode: '0600'
+  no_log: true
+
+- name: Schreibe ca_cert.pem in RAM-Datei
+  when: not client_cert_exists.stat.exists
+  delegate_to: localhost
+  copy:
+    dest: /dev/shm/ca_cert.pem
+    content: "{{ ca_cert_pem }}"
+    mode: '0600'
+  no_log: false
+
+- name: create client cert
+  when: not client_cert_exists.stat.exists
+  delegate_to: localhost
+  command: >
+    openssl x509 -req -in /tmp/{{ inventory_hostname }}.csr.pem -CA /dev/shm/ca_cert.pem -CAkey /dev/shm/ca_key.pem \
+    -CAcreateserial -out /tmp/{{ inventory_hostname }}.cert.pem -days 3650 -sha256 \
+    -extfile /tmp/{{ inventory_hostname }}_ext.cnf
+  args:
+    creates: /tmp/{{ inventory_hostname }}.cert.pem
+
+- name: Lösche CA und ext-cnf
+  delegate_to: localhost
+  file:
+    path: "{{ item }}"
+    state: absent
+  loop:
+    - /dev/shm/ca_cert.pem
+    - /dev/shm/ca_key.pem
+    - /tmp/{{ inventory_hostname }}_ext.cnf
+    - /tmp/{{ inventory_hostname }}.csr.pem
+
+- name: Copy cert to prometheus host
+  when: not client_cert_exists.stat.exists
+  copy:
+    src: /tmp/{{ inventory_hostname }}.cert.pem
+    dest: /etc/prometheus/ssl/client.cert.pem
+    mode: '0644'
+
+- name: Lösche client cert
+  delegate_to: localhost
+  file:
+    path: /tmp/{{ inventory_hostname }}.cert.pem
+    state: absent
diff --git a/roles/prometheus/templates/node_exporter_targets.yml.j2 b/roles/prometheus/templates/node_exporter_targets.yml.j2
new file mode 100644
index 0000000..45f6755
--- /dev/null
+++ b/roles/prometheus/templates/node_exporter_targets.yml.j2
@@ -0,0 +1,4 @@
+- targets:
+    {% for container in groups['ffspveguests'] %}
+    - '{{ hostvars[container].ansible_host | default(container) }}.vm.freifunk-stuttgart.de:9998'
+    {% endfor %}
-- 
GitLab


From 362a31c7c89ead1d443ce3bbbbe7f026cadf1da5 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 15 Jun 2025 17:15:00 +0200
Subject: [PATCH 05/13] Exclude hosts from scrape rules if
 prometheus_exporters_ignore is set

---
 host_vars/bbnrb1/prometheus-exporters.yml     |  2 ++
 .../prometheus-exporters.yml                  |  2 ++
 .../prometheus-exporters.yml                  |  2 ++
 .../prometheus-exporters.yml                  |  2 ++
 .../prometheus-exporters.yml                  |  2 ++
 inventory/role_core.yml                       |  5 +++
 roles/prometheus/tasks/main.yml               | 36 +++++++++++++++----
 .../templates/bird_exporter_targets.yml.j2    |  6 ++++
 .../templates/kea_exporter_targets.yml.j2     |  6 ++++
 .../templates/node_exporter_targets.yml.j2    | 10 ++++--
 10 files changed, 64 insertions(+), 9 deletions(-)
 create mode 100644 host_vars/bbnrb1/prometheus-exporters.yml
 create mode 100644 host_vars/mailgw01.vm.freifunk-stuttgart.de/prometheus-exporters.yml
 create mode 100644 host_vars/mailgw02.vm.freifunk-stuttgart.de/prometheus-exporters.yml
 create mode 100644 host_vars/nrb-backbonetest2.vm.freifunk-stuttgart.de/prometheus-exporters.yml
 create mode 100644 host_vars/openslides.vm.freifunk-stuttgart.de/prometheus-exporters.yml
 create mode 100644 inventory/role_core.yml
 create mode 100644 roles/prometheus/templates/bird_exporter_targets.yml.j2
 create mode 100644 roles/prometheus/templates/kea_exporter_targets.yml.j2

diff --git a/host_vars/bbnrb1/prometheus-exporters.yml b/host_vars/bbnrb1/prometheus-exporters.yml
new file mode 100644
index 0000000..f56ca1f
--- /dev/null
+++ b/host_vars/bbnrb1/prometheus-exporters.yml
@@ -0,0 +1,2 @@
+---
+prometheus_exporters_ignore: true
diff --git a/host_vars/mailgw01.vm.freifunk-stuttgart.de/prometheus-exporters.yml b/host_vars/mailgw01.vm.freifunk-stuttgart.de/prometheus-exporters.yml
new file mode 100644
index 0000000..f56ca1f
--- /dev/null
+++ b/host_vars/mailgw01.vm.freifunk-stuttgart.de/prometheus-exporters.yml
@@ -0,0 +1,2 @@
+---
+prometheus_exporters_ignore: true
diff --git a/host_vars/mailgw02.vm.freifunk-stuttgart.de/prometheus-exporters.yml b/host_vars/mailgw02.vm.freifunk-stuttgart.de/prometheus-exporters.yml
new file mode 100644
index 0000000..f56ca1f
--- /dev/null
+++ b/host_vars/mailgw02.vm.freifunk-stuttgart.de/prometheus-exporters.yml
@@ -0,0 +1,2 @@
+---
+prometheus_exporters_ignore: true
diff --git a/host_vars/nrb-backbonetest2.vm.freifunk-stuttgart.de/prometheus-exporters.yml b/host_vars/nrb-backbonetest2.vm.freifunk-stuttgart.de/prometheus-exporters.yml
new file mode 100644
index 0000000..f56ca1f
--- /dev/null
+++ b/host_vars/nrb-backbonetest2.vm.freifunk-stuttgart.de/prometheus-exporters.yml
@@ -0,0 +1,2 @@
+---
+prometheus_exporters_ignore: true
diff --git a/host_vars/openslides.vm.freifunk-stuttgart.de/prometheus-exporters.yml b/host_vars/openslides.vm.freifunk-stuttgart.de/prometheus-exporters.yml
new file mode 100644
index 0000000..f56ca1f
--- /dev/null
+++ b/host_vars/openslides.vm.freifunk-stuttgart.de/prometheus-exporters.yml
@@ -0,0 +1,2 @@
+---
+prometheus_exporters_ignore: true
diff --git a/inventory/role_core.yml b/inventory/role_core.yml
new file mode 100644
index 0000000..e895327
--- /dev/null
+++ b/inventory/role_core.yml
@@ -0,0 +1,5 @@
+---
+role_core:
+  hosts:
+    core01-z10a.vm.freifunk-stuttgart.de:
+    core02-z10a.vm.freifunk-stuttgart.de:
diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml
index cf95926..1be87d7 100644
--- a/roles/prometheus/tasks/main.yml
+++ b/roles/prometheus/tasks/main.yml
@@ -9,12 +9,26 @@
     msg: "This role must only be run on prometheus hosts"
   when: not is_prometheus | default(false)
 
-    #- name: Create node_exporter_targets.yml file
-    #  template:
-    #    src: node_exporter_targets.yml.j2
-    #    dest: /etc/prometheus/node_exporter_targets.yml
-    #  notify:
-    #    - Restart prometheus
+- name: Create node_exporter_targets.yml file
+  template:
+    src: node_exporter_targets.yml.j2
+    dest: /etc/prometheus/node_exporter_targets.yml
+  notify:
+    - Restart prometheus
+
+- name: Create kea_exporter_targets.yml file
+  template:
+    src: kea_exporter_targets.yml.j2
+    dest: /etc/prometheus/kea_exporter_targets.yml
+  notify:
+    - Restart prometheus
+
+- name: Create bird_exporter_targets.yml file
+  template:
+    src: bird_exporter_targets.yml.j2
+    dest: /etc/prometheus/bird_exporter_targets.yml
+  notify:
+    - Restart prometheus
 
 - name: Check if client-cert exists
   stat:
@@ -135,3 +149,13 @@
   file:
     path: /tmp/{{ inventory_hostname }}.cert.pem
     state: absent
+
+- name: Installing prometheus exporter packages
+  apt:
+    name:
+      - prometheus
+      - prometheus-alertmanager
+      - prometheus-blackbox-exporter
+      - yamllint
+    state: present
+    install_recommends: false
diff --git a/roles/prometheus/templates/bird_exporter_targets.yml.j2 b/roles/prometheus/templates/bird_exporter_targets.yml.j2
new file mode 100644
index 0000000..4db77e9
--- /dev/null
+++ b/roles/prometheus/templates/bird_exporter_targets.yml.j2
@@ -0,0 +1,6 @@
+{% for container in groups['role_core'] %}
+- targets:
+    - '{{ hostvars[container].ansible_host | default(container) }}:9998'
+  labels:
+    instance: {{ (hostvars[container].ansible_host | default(container)).split('.')[0] }}
+{% endfor %}
diff --git a/roles/prometheus/templates/kea_exporter_targets.yml.j2 b/roles/prometheus/templates/kea_exporter_targets.yml.j2
new file mode 100644
index 0000000..471c336
--- /dev/null
+++ b/roles/prometheus/templates/kea_exporter_targets.yml.j2
@@ -0,0 +1,6 @@
+{% for container in groups['role_dhcpserver'] %}
+- targets:
+    - '{{ hostvars[container].ansible_host | default(container) }}:9998'
+  labels:
+    instance: {{ (hostvars[container].ansible_host | default(container)).split('.')[0] }}
+{% endfor %}
diff --git a/roles/prometheus/templates/node_exporter_targets.yml.j2 b/roles/prometheus/templates/node_exporter_targets.yml.j2
index 45f6755..b2f629b 100644
--- a/roles/prometheus/templates/node_exporter_targets.yml.j2
+++ b/roles/prometheus/templates/node_exporter_targets.yml.j2
@@ -1,4 +1,8 @@
+{% for host in groups['all'] %}
+{% if not hostvars[host].prometheus_exporters_ignore | default(false) %}
 - targets:
-    {% for container in groups['ffspveguests'] %}
-    - '{{ hostvars[container].ansible_host | default(container) }}.vm.freifunk-stuttgart.de:9998'
-    {% endfor %}
+    - '{{ hostvars[host].ansible_host | default(host) }}:9998'
+  labels:
+    instance: {{ (hostvars[host].ansible_host | default(host)).split('.')[0] }}
+{% endif %}
+{% endfor %}
-- 
GitLab


From f72f5f51bda8951fc1c6d8f31febae5059e438eb Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 15 Jun 2025 21:44:26 +0200
Subject: [PATCH 06/13] Add prometheus IPs to group_vars

---
 group_vars/all/promeheus_hosts | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 group_vars/all/promeheus_hosts

diff --git a/group_vars/all/promeheus_hosts b/group_vars/all/promeheus_hosts
new file mode 100644
index 0000000..315c1e7
--- /dev/null
+++ b/group_vars/all/promeheus_hosts
@@ -0,0 +1,4 @@
+---
+prometheus_ips:
+  - 2a0f:d607:e:2::137
+  - 2001:7c7:2110::21 
-- 
GitLab


From 0d60888ebf9f541b52ea12682ecc4c6486c0a8b4 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 15 Jun 2025 21:45:52 +0200
Subject: [PATCH 07/13] make prometheus01 a revproxy

---
 host_vars/prometheus01.vm.freifunk-stuttgart.de/revproxy | 7 +++++++
 host_vars/prometheus01.vm.freifunk-stuttgart.de/ssh      | 2 ++
 inventory/role_revproxy                                  | 1 +
 3 files changed, 10 insertions(+)
 create mode 100644 host_vars/prometheus01.vm.freifunk-stuttgart.de/revproxy
 create mode 100644 host_vars/prometheus01.vm.freifunk-stuttgart.de/ssh

diff --git a/host_vars/prometheus01.vm.freifunk-stuttgart.de/revproxy b/host_vars/prometheus01.vm.freifunk-stuttgart.de/revproxy
new file mode 100644
index 0000000..3dfde3e
--- /dev/null
+++ b/host_vars/prometheus01.vm.freifunk-stuttgart.de/revproxy
@@ -0,0 +1,7 @@
+---
+letsencrypt_account_email: "hostmaster@freifunk-stuttgart.de"
+letsencrypt_acme_directory: "https://acme-v02.api.letsencrypt.org/directory"
+vhosts:
+  - domain: prometheus01.vm.freifunk-stuttgart.de
+    backend: "http://127.0.0.1:9090/"
+    type: proxy
diff --git a/host_vars/prometheus01.vm.freifunk-stuttgart.de/ssh b/host_vars/prometheus01.vm.freifunk-stuttgart.de/ssh
new file mode 100644
index 0000000..a74af17
--- /dev/null
+++ b/host_vars/prometheus01.vm.freifunk-stuttgart.de/ssh
@@ -0,0 +1,2 @@
+---
+ssh_disable_password_login: yes
diff --git a/inventory/role_revproxy b/inventory/role_revproxy
index d7b6d87..2a9d66b 100644
--- a/inventory/role_revproxy
+++ b/inventory/role_revproxy
@@ -4,3 +4,4 @@ role_revproxy:
     revproxy-05.vm.freifunk-stuttgart.de:
     revproxy-03.vm.freifunk-stuttgart.de:
     revproxy-z10a.vm.freifunk-stuttgart.de:
+    prometheus01.vm.freifunk-stuttgart.de:
-- 
GitLab


From 990a099e95dc93d90dab9a0d24ea6225b5ddba85 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 13 Jul 2025 15:25:20 +0200
Subject: [PATCH 08/13] Add inventory/role_prometheus

---
 inventory/role_prometheus | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 inventory/role_prometheus

diff --git a/inventory/role_prometheus b/inventory/role_prometheus
new file mode 100644
index 0000000..e1da387
--- /dev/null
+++ b/inventory/role_prometheus
@@ -0,0 +1,5 @@
+---
+role_prometheus:
+  hosts:
+    prometheus01.vm.freifunk-stuttgart.de:
+    prometheus02.vm.freifunk-stuttgart.de:
-- 
GitLab


From 87852e3b1d4684ce86be4f3d8d498e55fe266af2 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 13 Jul 2025 15:28:06 +0200
Subject: [PATCH 09/13] Add prometheus-alertmanager

---
 roles/prometheus/files/alertmanager.yml       | 113 ++++++
 .../files/alerts/alert_healthchecks.yml       |   9 +
 .../files/alerts/alert_loadbalancing.yml      |  11 +
 .../files/alerts/blackbox-exporter.yml        |  68 ++++
 roles/prometheus/files/alerts/general.yml     |  39 ++
 .../prometheus/files/alerts/node-exporter.yml | 366 ++++++++++++++++++
 .../files/alerts/smartctl-exporter.yml        |  50 +++
 .../prometheus/templates/alertmanager.yml.j2  | 113 ++++++
 8 files changed, 769 insertions(+)
 create mode 100644 roles/prometheus/files/alertmanager.yml
 create mode 100644 roles/prometheus/files/alerts/alert_healthchecks.yml
 create mode 100644 roles/prometheus/files/alerts/alert_loadbalancing.yml
 create mode 100644 roles/prometheus/files/alerts/blackbox-exporter.yml
 create mode 100644 roles/prometheus/files/alerts/general.yml
 create mode 100644 roles/prometheus/files/alerts/node-exporter.yml
 create mode 100644 roles/prometheus/files/alerts/smartctl-exporter.yml
 create mode 100644 roles/prometheus/templates/alertmanager.yml.j2

diff --git a/roles/prometheus/files/alertmanager.yml b/roles/prometheus/files/alertmanager.yml
new file mode 100644
index 0000000..0cba29e
--- /dev/null
+++ b/roles/prometheus/files/alertmanager.yml
@@ -0,0 +1,113 @@
+# Sample configuration.
+# See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+global:
+  # The smarthost and SMTP sender used for mail notifications.
+  smtp_smarthost: 'localhost:25'
+  smtp_from: 'alertmanager@freifunk-stuttgart.de'
+
+# The directory from which notification templates are read.
+templates: 
+- '/etc/prometheus/alertmanager_templates/*.tmpl'
+
+# The root route on which each incoming alert enters.
+route:
+  # The labels by which incoming alerts are grouped together. For example,
+  # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
+  # be batched into a single group.
+  group_by: ['alertname', 'cluster', 'service', 'severity']
+
+  # When a new group of alerts is created by an incoming alert, wait at
+  # least 'group_wait' to send the initial notification.
+  # This way ensures that you get multiple alerts for the same group that start
+  # firing shortly after another are batched together on the first 
+  # notification.
+  group_wait: 30s
+
+  # When the first notification was sent, wait 'group_interval' to send a batch
+  # of new alerts that started firing for that group.
+  group_interval: 5m
+
+  # If an alert has successfully been sent, wait 'repeat_interval' to
+  repeat_interval: 24h
+
+  # A default receiver
+  receiver: 'null'
+
+  routes:
+### leonard monitoring ###
+  - receiver: 'leonard_healthchecks'
+    repeat_interval: 5m
+    continue: false
+    #group_wait: 1s
+    #group_interval: 1m
+    matchers:
+      - alertname = SelfMonitoringAlwaysFiring 
+      - severity = info
+### leonard ###
+  - receiver: 'leonard_pushover'
+    repeat_interval: 4h
+    continue: true
+    matchers:
+      - severity =~ "warning|critical"
+  - receiver: 'leonard_selfhosted'
+    repeat_interval: 4h
+    continue: true
+    matchers:
+      - severity =~ "warning|critical"
+  - receiver: 'leonard_selfhosted'
+    repeat_interval: 24h
+    continue: true
+    matchers:
+      - severity = info
+### nrb ###
+  - receiver: 'nrb'
+    repeat_interval: 4h
+    continue: true
+    matchers:
+      - severity =~ "warning|critical"
+  - receiver: 'nrb'
+    repeat_interval: 24h
+    continue: true
+    matchers:
+      - severity =~ "info"
+
+
+# Inhibition rules allow to mute a set of alerts given that another alert is
+# firing.
+# We use this to mute any warning-level notifications if the same alert is 
+# already critical.
+inhibit_rules:
+- source_match:
+    severity: 'critical'
+  target_match:
+    severity: 'warning'
+  # Apply inhibition if the alertname is the same.
+  equal: ['alertname', 'cluster', 'service']
+
+
+receivers:
+#- name: 'ffs-gw-admins'
+#  email_configs:
+#  - to: 'gw-admins@freifunk-stuttgart.de'
+#  webhook_configs:
+#  - url: 'http://localhost:9199/alert'
+- name: 'leonard_healthchecks'
+  email_configs:
+  - to: 'f133a6c2-eea4-4723-ae0e-45859fa34471@healthchecks.selfhosted.de'
+- name: 'leonard_selfhosted'
+  email_configs:
+  - to: 'leonard@selfhosted.de'
+    send_resolved: true
+- name: 'null'
+  email_configs: []  # Kein Versand
+- name: leonard_pushover
+  pushover_configs:
+    - token: aRd3o4cy1sEoPqXaoDnzHZsMgLLdWW
+      user_key: ueyxtapXg7Mw84vjsgQKLGZQkheNHd
+      priority: 0
+      send_resolved: true
+- name: 'nrb'
+  email_configs:
+  - to: 'ffs-alerts@nicoboehr.de'
+    send_resolved: true
diff --git a/roles/prometheus/files/alerts/alert_healthchecks.yml b/roles/prometheus/files/alerts/alert_healthchecks.yml
new file mode 100644
index 0000000..7741c9a
--- /dev/null
+++ b/roles/prometheus/files/alerts/alert_healthchecks.yml
@@ -0,0 +1,9 @@
+groups:
+- name: Selfmonitoring
+  rules:
+  - alert: 'SelfMonitoringAlwaysFiring'
+    expr: minute() >= 0
+    for: 1s
+    labels:
+      severity: info
+      application: leonard_healthchecks
diff --git a/roles/prometheus/files/alerts/alert_loadbalancing.yml b/roles/prometheus/files/alerts/alert_loadbalancing.yml
new file mode 100644
index 0000000..1fa7242
--- /dev/null
+++ b/roles/prometheus/files/alerts/alert_loadbalancing.yml
@@ -0,0 +1,11 @@
+groups:
+- name: lowpref
+  rules:
+  - alert: LowGatewayPreference
+    expr: gw_loadbalancing_pref{segment="1"} < 10
+    for: 1d
+    labels:
+      severity: page
+    annotations:
+      summary: |
+        {{ .Labels.gateway }} has low gateway preference ({{ .Value }})
diff --git a/roles/prometheus/files/alerts/blackbox-exporter.yml b/roles/prometheus/files/alerts/blackbox-exporter.yml
new file mode 100644
index 0000000..b83797a
--- /dev/null
+++ b/roles/prometheus/files/alerts/blackbox-exporter.yml
@@ -0,0 +1,68 @@
+groups:
+
+- name: BlackboxExporter
+
+  rules:
+
+    - alert: BlackboxProbeFailed
+      expr: 'probe_success{job!~"node_pve01|blackbox_tls_pve01"} == 0'
+      for: 10m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox probe failed (instance {{ $labels.instance }})
+        description: "Probe failed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxConfigurationReloadFailure
+      expr: 'blackbox_exporter_config_last_reload_successful != 1'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox configuration reload failure (instance {{ $labels.instance }})
+        description: "Blackbox configuration reload failure\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateWillExpireSoon
+      expr: '3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
+        description: "SSL certificate expires in less than 20 days\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateWillExpireSoon
+      expr: '0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
+        description: "SSL certificate expires in less than 3 days\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateExpired
+      expr: 'round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 0'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
+        description: "SSL certificate has expired already\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxProbeSlowHttp
+      expr: 'avg_over_time(probe_http_duration_seconds[1m]) > 1'
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
+        description: "HTTP request took more than 1s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxProbeSlowPing
+      expr: 'avg_over_time(probe_icmp_duration_seconds[1m]) > 1'
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox probe slow ping (instance {{ $labels.instance }})
+        description: "Blackbox ping took more than 1s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
diff --git a/roles/prometheus/files/alerts/general.yml b/roles/prometheus/files/alerts/general.yml
new file mode 100644
index 0000000..267589c
--- /dev/null
+++ b/roles/prometheus/files/alerts/general.yml
@@ -0,0 +1,39 @@
+groups:
+- name: up_success
+  rules:
+  - alert: UP_FAILED
+    expr: up{ignore_down!="1"} < 1
+    for: 15m
+    labels:
+      severity: warning
+      application: prometheus
+    annotations:
+      summary: "Scrapes not functional"
+- name: reload_success
+  rules:
+  - alert: PROMETHEUS_RELOAD_FAILED
+    expr: prometheus_config_last_reload_successful < 1
+    for: 1m
+    labels:
+      severity: warning
+      application: prometheus
+    annotations:
+      summary: "Reload of prometheus config failed"
+  - alert: ALERTMANAGER_RELOAD_FAILED
+    expr: alertmanager_config_last_reload_successful < 1
+    for: 1m
+    labels:
+      severity: warning
+      application: prometheus
+    annotations:
+      summary: "Reload of alertmanager config failed"
+- name: probe_success
+  rules:
+  - alert: PROBE_FAILED_TCP
+    expr: probe_success{} < 1
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: "Blackbox probe failed"
+
diff --git a/roles/prometheus/files/alerts/node-exporter.yml b/roles/prometheus/files/alerts/node-exporter.yml
new file mode 100644
index 0000000..fe3e8d7
--- /dev/null
+++ b/roles/prometheus/files/alerts/node-exporter.yml
@@ -0,0 +1,366 @@
+groups:
+
+- name: NodeExporter
+
+  rules:
+
+    - alert: HostOutOfMemory
+      expr: '(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host out of memory (instance {{ $labels.instance }})
+        description: "Node memory is filling up (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostMemoryUnderMemoryPressure
+      expr: '(rate(node_vmstat_pgmajfault[1m]) > 2000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 30m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host memory under memory pressure (instance {{ $labels.instance }})
+        description: "The node is under heavy memory pressure. High rate of major page faults\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+          #    Many containers
+          #    - alert: HostMemoryIsUnderutilized
+          #      expr: '(100 - (avg_over_time(node_memory_MemAvailable_bytes[30m]) / node_memory_MemTotal_bytes * 100) < 20) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+          #      for: 1w
+          #      labels:
+          #        severity: info
+          #      annotations:
+          #        summary: Host Memory is underutilized (instance {{ $labels.instance }})
+          #        description: "Node memory is < 20% for 1 week. Consider reducing memory space. (instance {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+          #    - alert: HostUnusualNetworkThroughputIn
+          #      expr: '(sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+          #      for: 5m
+          #      labels:
+          #        severity: warning
+          #      annotations:
+          #        summary: Host unusual network throughput in (instance {{ $labels.instance }})
+          #        description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+          #
+          #    - alert: HostUnusualNetworkThroughputOut
+          #      expr: '(sum by (instance) (rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+          #      for: 5m
+          #      labels:
+          #        severity: warning
+          #      annotations:
+          #        summary: Host unusual network throughput out (instance {{ $labels.instance }})
+          #        description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+# All Hosts durcing Backup
+#    - alert: HostUnusualDiskReadRate
+#      expr: '(sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+#      for: 5m
+#      labels:
+#        severity: warning
+#      annotations:
+#        summary: Host unusual disk read rate (instance {{ $labels.instance }})
+#        description: "Disk is probably reading too much data (> 50 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+#  pve01 und backup01, vielleicht auch mal pbs?
+#    - alert: HostUnusualDiskWriteRate
+#      expr: '(sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+#      for: 2m
+#      labels:
+#        severity: warning
+#      annotations:
+#        summary: Host unusual disk write rate (instance {{ $labels.instance }})
+#        description: "Disk is probably writing too much data (> 50 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostOutOfDiskSpace
+      expr: '((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host out of disk space (instance {{ $labels.instance }})
+        description: "Disk is almost full (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostDiskWillFillIn24Hours
+      expr: '((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host disk will fill in 24 hours (instance {{ $labels.instance }})
+        description: "Filesystem is predicted to run out of space within the next 24 hours at current write rate\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostOutOfInodes
+      expr: '(node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host out of inodes (instance {{ $labels.instance }})
+        description: "Disk is almost running out of available inodes (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostFilesystemDeviceError
+      expr: 'node_filesystem_device_error == 1'
+      for: 2m
+      labels:
+        severity: critical
+      annotations:
+        summary: Host filesystem device error (instance {{ $labels.instance }})
+        description: "{{ $labels.instance }}: Device error with the {{ $labels.mountpoint }} filesystem\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostInodesWillFillIn24Hours
+      expr: '(node_filesystem_files_free{fstype!="msdosfs"} / node_filesystem_files{fstype!="msdosfs"} * 100 < 10 and predict_linear(node_filesystem_files_free{fstype!="msdosfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly{fstype!="msdosfs"} == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host inodes will fill in 24 hours (instance {{ $labels.instance }})
+        description: "Filesystem is predicted to run out of inodes within the next 24 hours at current write rate\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualDiskReadLatency
+      expr: '(rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual disk read latency (instance {{ $labels.instance }})
+        description: "Disk latency is growing (read operations > 100ms)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualDiskWriteLatency
+      expr: '(rate(node_disk_write_time_seconds_total{nodename!="gw05n02"}[1m]) / rate(node_disk_writes_completed_total{nodename!="gw05n02"}[1m]) > 0.1 and rate(node_disk_writes_completed_total{nodename!="gw05n02"}[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename!="gw05n02"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual disk write latency (instance {{ $labels.instance }})
+        description: "Disk latency is growing (write operations > 100ms)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+# Kann beim backup auf allen containern vorkommen
+#    - alert: HostHighCpuLoad
+#      expr: '(sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m]))) > 0.8) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+#      for: 10m
+#      labels:
+#        severity: warning
+#      annotations:
+#        summary: Host high CPU load (instance {{ $labels.instance }})
+#        description: "CPU load is > 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+#    nas/nextcloud vm
+#    - alert: HostCpuIsUnderutilized
+#      expr: '(100 - (rate(node_cpu_seconds_total{mode="idle"}[30m]) * 100) < 20) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+#      for: 1w
+#      labels:
+#        severity: info
+#      annotations:
+#        summary: Host CPU is underutilized (instance {{ $labels.instance }})
+#        description: "CPU load is < 20% for 1 week. Consider reducing the number of CPUs.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostCpuStealNoisyNeighbor
+      expr: '(avg by(instance) (rate(node_cpu_seconds_total{mode="steal"}[5m])) * 100 > 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 10m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }})
+        description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+# pve01 und backup01
+#    - alert: HostCpuHighIowait
+#      expr: '(avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+#      for: 0m
+#      labels:
+#        severity: warning
+#      annotations:
+#        summary: Host CPU high iowait (instance {{ $labels.instance }})
+#        description: "CPU iowait > 10%. A high iowait means that you are disk or network bound.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostUnusualDiskIo
+      expr: '(rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename!="gw05n02"}'
+      for: 15m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host unusual disk IO (instance {{ $labels.instance }})
+        description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+# All hosts during backup
+#    - alert: HostContextSwitchingHigh
+#      expr: '(rate(node_context_switches_total[15m])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"}))
+#/
+#(rate(node_context_switches_total[1d])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) > 2
+#'
+#      for: 0m
+#      labels:
+#        severity: warning
+#      annotations:
+#        summary: Host context switching high (instance {{ $labels.instance }})
+#        description: "Context switching is growing on the node (twice the daily average during the last 15m)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostSwapIsFillingUp
+      expr: '((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host swap is filling up (instance {{ $labels.instance }})
+        description: "Swap is filling up (>80%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostSystemdServiceCrashed
+      expr: '(node_systemd_unit_state{state="failed"} == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 10m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host systemd service crashed (instance {{ $labels.instance }})
+        description: "systemd service crashed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: CpuTooHot
+      expr: '((node_hwmon_temp_celsius * ignoring(label) group_left(instance, job, node, sensor) node_hwmon_sensor_label{label!="tctl",chip=~"pci0000:00_0000:00:18_3"} > 98)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host physical component too hot (instance {{ $labels.instance }})
+        description: "Physical hardware component too hot\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostPhysicalComponentTooHot
+      expr: '((node_hwmon_temp_celsius * ignoring(label) group_left(instance, job, node, sensor) node_hwmon_sensor_label{label!="tctl",chip!="pci0000:00_0000:00:18_3"} > 75)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host physical component too hot (instance {{ $labels.instance }})
+        description: "Physical hardware component too hot\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNodeOvertemperatureAlarm
+      expr: '((node_hwmon_temp_crit_alarm_celsius == 1) or (node_hwmon_temp_alarm == 1)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Host node overtemperature alarm (instance {{ $labels.instance }})
+        description: "Physical node temperature alarm triggered\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostRaidArrayGotInactive
+      expr: '(node_md_state{state="inactive"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Host RAID array got inactive (instance {{ $labels.instance }})
+        description: "RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostRaidDiskFailure
+      expr: '(node_md_disks{state="failed"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host RAID disk failure (instance {{ $labels.instance }})
+        description: "At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostKernelVersionDeviations
+      expr: '(count(sum(label_replace(node_uname_info, "kernel", "$1", "release", "([0-9]+.[0-9]+.[0-9]+).*")) by (kernel)) > 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 6h
+      labels:
+        severity: warning
+      annotations:
+        summary: Host kernel version deviations (instance {{ $labels.instance }})
+        description: "Different kernel versions are running\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostOomKillDetected
+      expr: '(increase(node_vmstat_oom_kill[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host OOM kill detected (instance {{ $labels.instance }})
+        description: "OOM kill detected\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostEdacCorrectableErrorsDetected
+      expr: '(increase(node_edac_correctable_errors_total[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: info
+      annotations:
+        summary: Host EDAC Correctable Errors detected (instance {{ $labels.instance }})
+        description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostEdacUncorrectableErrorsDetected
+      expr: '(node_edac_uncorrectable_errors_total > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})
+        description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNetworkReceiveErrors
+      expr: '(rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host Network Receive Errors (instance {{ $labels.instance }})
+        description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNetworkTransmitErrors
+      expr: '(rate(node_network_transmit_errs_total{device!~"^g09n03abbtesta|^g09n03amobrtra|^g09n03bbbtestb"}[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host Network Transmit Errors (instance {{ $labels.instance }})
+        description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last two minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNetworkInterfaceSaturated
+      expr: '((rate(node_network_receive_bytes_total{device!~"^bb.*|^tap.*|^vnet.*|^veth.*|^tun.*|^vp.*"}[1m]) + rate(node_network_transmit_bytes_total{device!~"^bb.*|^tap.*|^vnet.*|^veth.*|^tun.*|^vp.*"}[1m])) / node_network_speed_bytes{device!~"^bb.*|^tap.*|^vnet.*|^veth.*|^tun.*|^vp.*"} > 0.8 < 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host Network Interface Saturated (instance {{ $labels.instance }})
+        description: "The network interface \"{{ $labels.device }}\" on \"{{ $labels.instance }}\" is getting overloaded.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostNetworkBondDegraded
+      expr: '((node_bonding_active - node_bonding_slaves) != 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host Network Bond Degraded (instance {{ $labels.instance }})
+        description: "Bond \"{{ $labels.device }}\" degraded on \"{{ $labels.instance }}\".\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostConntrackLimit
+      expr: '(node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host conntrack limit (instance {{ $labels.instance }})
+        description: "The number of conntrack is approaching limit\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostClockSkew
+      expr: '((node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 10m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host clock skew (instance {{ $labels.instance }})
+        description: "Clock skew detected. Clock is out of sync. Ensure NTP is configured correctly on this host.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostClockNotSynchronising
+      expr: '(min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Host clock not synchronising (instance {{ $labels.instance }})
+        description: "Clock not synchronising. Ensure NTP is configured on this host.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: HostRequiresReboot
+      expr: '(node_reboot_required > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
+      for: 4h
+      labels:
+        severity: info
+      annotations:
+        summary: Host requires reboot (instance {{ $labels.instance }})
+        description: "{{ $labels.instance }} requires a reboot.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
diff --git a/roles/prometheus/files/alerts/smartctl-exporter.yml b/roles/prometheus/files/alerts/smartctl-exporter.yml
new file mode 100644
index 0000000..1946c38
--- /dev/null
+++ b/roles/prometheus/files/alerts/smartctl-exporter.yml
@@ -0,0 +1,50 @@
+groups:
+
+- name: SmartctlExporter
+
+  rules:
+
+    - alert: SmartDeviceTemperatureWarning
+      expr: 'smartctl_device_temperature > 60'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Smart device temperature warning (instance {{ $labels.instance }})
+        description: "Device temperature  warning (instance {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: SmartDeviceTemperatureCritical
+      expr: 'smartctl_device_temperature > 80'
+      for: 2m
+      labels:
+        severity: critical
+      annotations:
+        summary: Smart device temperature critical (instance {{ $labels.instance }})
+        description: "Device temperature critical  (instance {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: SmartCriticalWarning
+      expr: 'smartctl_device_critical_warning > 0'
+      for: 15m
+      labels:
+        severity: critical
+      annotations:
+        summary: Smart critical warning (instance {{ $labels.instance }})
+        description: "device has critical warning (instance {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: SmartMediaErrors
+      expr: 'smartctl_device_media_errors > 0'
+      for: 15m
+      labels:
+        severity: critical
+      annotations:
+        summary: Smart media errors (instance {{ $labels.instance }})
+        description: "device has media errors (instance {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: SmartNvmeWearoutIndicator
+      expr: 'smartctl_device_available_spare{device=~"nvme.*"} < smartctl_device_available_spare_threshold{device=~"nvme.*"}'
+      for: 15m
+      labels:
+        severity: critical
+      annotations:
+        summary: Smart NVME Wearout Indicator (instance {{ $labels.instance }})
+        description: "NVMe device is wearing out (instance {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
diff --git a/roles/prometheus/templates/alertmanager.yml.j2 b/roles/prometheus/templates/alertmanager.yml.j2
new file mode 100644
index 0000000..0cba29e
--- /dev/null
+++ b/roles/prometheus/templates/alertmanager.yml.j2
@@ -0,0 +1,113 @@
+# Sample configuration.
+# See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+global:
+  # The smarthost and SMTP sender used for mail notifications.
+  smtp_smarthost: 'localhost:25'
+  smtp_from: 'alertmanager@freifunk-stuttgart.de'
+
+# The directory from which notification templates are read.
+templates: 
+- '/etc/prometheus/alertmanager_templates/*.tmpl'
+
+# The root route on which each incoming alert enters.
+route:
+  # The labels by which incoming alerts are grouped together. For example,
+  # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
+  # be batched into a single group.
+  group_by: ['alertname', 'cluster', 'service', 'severity']
+
+  # When a new group of alerts is created by an incoming alert, wait at
+  # least 'group_wait' to send the initial notification.
+  # This way ensures that you get multiple alerts for the same group that start
+  # firing shortly after another are batched together on the first 
+  # notification.
+  group_wait: 30s
+
+  # When the first notification was sent, wait 'group_interval' to send a batch
+  # of new alerts that started firing for that group.
+  group_interval: 5m
+
+  # If an alert has successfully been sent, wait 'repeat_interval' to
+  repeat_interval: 24h
+
+  # A default receiver
+  receiver: 'null'
+
+  routes:
+### leonard monitoring ###
+  - receiver: 'leonard_healthchecks'
+    repeat_interval: 5m
+    continue: false
+    #group_wait: 1s
+    #group_interval: 1m
+    matchers:
+      - alertname = SelfMonitoringAlwaysFiring 
+      - severity = info
+### leonard ###
+  - receiver: 'leonard_pushover'
+    repeat_interval: 4h
+    continue: true
+    matchers:
+      - severity =~ "warning|critical"
+  - receiver: 'leonard_selfhosted'
+    repeat_interval: 4h
+    continue: true
+    matchers:
+      - severity =~ "warning|critical"
+  - receiver: 'leonard_selfhosted'
+    repeat_interval: 24h
+    continue: true
+    matchers:
+      - severity = info
+### nrb ###
+  - receiver: 'nrb'
+    repeat_interval: 4h
+    continue: true
+    matchers:
+      - severity =~ "warning|critical"
+  - receiver: 'nrb'
+    repeat_interval: 24h
+    continue: true
+    matchers:
+      - severity =~ "info"
+
+
+# Inhibition rules allow to mute a set of alerts given that another alert is
+# firing.
+# We use this to mute any warning-level notifications if the same alert is 
+# already critical.
+inhibit_rules:
+- source_match:
+    severity: 'critical'
+  target_match:
+    severity: 'warning'
+  # Apply inhibition if the alertname is the same.
+  equal: ['alertname', 'cluster', 'service']
+
+
+receivers:
+#- name: 'ffs-gw-admins'
+#  email_configs:
+#  - to: 'gw-admins@freifunk-stuttgart.de'
+#  webhook_configs:
+#  - url: 'http://localhost:9199/alert'
+- name: 'leonard_healthchecks'
+  email_configs:
+  - to: 'f133a6c2-eea4-4723-ae0e-45859fa34471@healthchecks.selfhosted.de'
+- name: 'leonard_selfhosted'
+  email_configs:
+  - to: 'leonard@selfhosted.de'
+    send_resolved: true
+- name: 'null'
+  email_configs: []  # Kein Versand
+- name: leonard_pushover
+  pushover_configs:
+    - token: aRd3o4cy1sEoPqXaoDnzHZsMgLLdWW
+      user_key: ueyxtapXg7Mw84vjsgQKLGZQkheNHd
+      priority: 0
+      send_resolved: true
+- name: 'nrb'
+  email_configs:
+  - to: 'ffs-alerts@nicoboehr.de'
+    send_resolved: true
-- 
GitLab


From f4074fe8bc48d4bb016f7d6e236af3f7bc9e620b Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 13 Jul 2025 15:50:14 +0200
Subject: [PATCH 10/13] Copy alertmanager.yml and alerting rules

---
 roles/prometheus/tasks/main.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml
index 1be87d7..677d739 100644
--- a/roles/prometheus/tasks/main.yml
+++ b/roles/prometheus/tasks/main.yml
@@ -30,6 +30,22 @@
   notify:
     - Restart prometheus
 
+- name: Copy all alerting rules
+  copy:
+    src: alerts/
+    dest: /etc/prometheus/alerts
+    mode: preserve
+    owner: root
+    group: root
+
+- name: Copy alertmanager.yml
+  copy:
+    src: alertmanager.yml
+    dest: /etc/prometheus/
+    mode: preserve
+    owner: root
+    group: root
+
 - name: Check if client-cert exists
   stat:
     path: "/etc/prometheus/ssl/client.cert.pem"
-- 
GitLab


From a881f696c02f7ba286d671b8d0f305456cf87689 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 13 Jul 2025 15:38:49 +0200
Subject: [PATCH 11/13] Add prometheus reload handler and use it where
 appropriate

---
 roles/prometheus/handlers/main.yml |  5 ++++-
 roles/prometheus/tasks/main.yml    | 10 +++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/roles/prometheus/handlers/main.yml b/roles/prometheus/handlers/main.yml
index 8a8df0d..a05d19c 100644
--- a/roles/prometheus/handlers/main.yml
+++ b/roles/prometheus/handlers/main.yml
@@ -2,4 +2,7 @@
   service:
     name: prometheus
     state: restarted
-
+- name: Reload prometheus
+  service:
+    name: prometheus
+    state: reloaded
diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml
index 677d739..75f0d17 100644
--- a/roles/prometheus/tasks/main.yml
+++ b/roles/prometheus/tasks/main.yml
@@ -14,21 +14,21 @@
     src: node_exporter_targets.yml.j2
     dest: /etc/prometheus/node_exporter_targets.yml
   notify:
-    - Restart prometheus
+    - Reload prometheus
 
 - name: Create kea_exporter_targets.yml file
   template:
     src: kea_exporter_targets.yml.j2
     dest: /etc/prometheus/kea_exporter_targets.yml
   notify:
-    - Restart prometheus
+    - Reload prometheus
 
 - name: Create bird_exporter_targets.yml file
   template:
     src: bird_exporter_targets.yml.j2
     dest: /etc/prometheus/bird_exporter_targets.yml
   notify:
-    - Restart prometheus
+    - Reload prometheus
 
 - name: Copy all alerting rules
   copy:
@@ -37,6 +37,8 @@
     mode: preserve
     owner: root
     group: root
+  notify:
+    - Reload prometheus
 
 - name: Copy alertmanager.yml
   copy:
@@ -175,3 +177,5 @@
       - yamllint
     state: present
     install_recommends: false
+  notify:
+    - Reload prometheus
-- 
GitLab


From d382bb60dee6befd7ec3115ef71e4cace2a77056 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 13 Jul 2025 15:30:17 +0200
Subject: [PATCH 12/13] Install json-exporter on prometheus-hosts

---
 roles/prometheus/files/json-exporter.service | 16 +++++++
 roles/prometheus/files/json-exporter.yml     |  9 ++++
 roles/prometheus/tasks/main.yml              | 50 ++++++++++++++++++++
 3 files changed, 75 insertions(+)
 create mode 100644 roles/prometheus/files/json-exporter.service
 create mode 100644 roles/prometheus/files/json-exporter.yml

diff --git a/roles/prometheus/files/json-exporter.service b/roles/prometheus/files/json-exporter.service
new file mode 100644
index 0000000..74716db
--- /dev/null
+++ b/roles/prometheus/files/json-exporter.service
@@ -0,0 +1,16 @@
+Unit]
+Description=Prometheus Json Exporter
+Wants=network-online.target
+After=network-online.target
+
+[Service]
+Restart=always
+User=prometheus
+Group=prometheus
+StandardError=syslog
+Restart=on-failure
+KillSignal=SIGQUIT
+ExecStart=/opt/json-exporter/json_exporter --web.listen-address=localhost:7979 --config.file /etc/prometheus/json-exporter.yml
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/prometheus/files/json-exporter.yml b/roles/prometheus/files/json-exporter.yml
new file mode 100644
index 0000000..62c4395
--- /dev/null
+++ b/roles/prometheus/files/json-exporter.yml
@@ -0,0 +1,9 @@
+---
+modules:
+  gwpref:
+    metrics:
+    - name: gw_loadbalancing_pref
+      help: "Current Preference. Range -inf to 100, where 100 is most willing to accept more nodes."
+      path: '{ .segments.1.preference }'
+      labels:
+        segment: '1'
diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml
index 75f0d17..28f7ef5 100644
--- a/roles/prometheus/tasks/main.yml
+++ b/roles/prometheus/tasks/main.yml
@@ -179,3 +179,53 @@
     install_recommends: false
   notify:
     - Reload prometheus
+
+- name: Erstelle Zielverzeichnis
+  file:
+    path: /opt/json-exporter
+    state: directory
+    mode: '0755'
+
+- name: Lade json_exporter herunter
+  get_url:
+    url: https://github.com/prometheus-community/json_exporter/releases/download/v0.7.0/json_exporter-0.7.0.linux-amd64.tar.gz
+    dest: /opt/json-exporter/json_exporter.tar.gz
+    mode: '0644'
+
+- name: Entpacke json_exporter
+  unarchive:
+    src: /opt/json-exporter/json_exporter.tar.gz
+    dest: /opt/json-exporter
+    remote_src: yes
+
+- name: Verschiebe Binärdatei in /opt/json-exporter
+  command: mv /opt/json-exporter/json_exporter-0.7.0.linux-amd64/json_exporter /opt/json-exporter/json_exporter
+  args:
+    creates: /opt/json-exporter/json_exporter
+
+- name: Mache json_exporter ausführbar
+  file:
+    path: /opt/json-exporter/json_exporter
+    mode: '0755'
+
+- name: Kopiere json-exporter.yml nach /etc/prometheus
+  copy:
+    src: files/json-exporter.yml
+    dest: /etc/prometheus/json-exporter.yml
+    mode: '0644'
+
+- name: Kopiere systemd service file für json_exporter
+  copy:
+    src: files/json-exporter.service
+    dest: /etc/systemd/system/json-exporter.service
+    mode: '0644'
+
+- name: Lade systemd neu, um neue Unit-Dateien zu erkennen
+  systemd:
+    daemon_reload: yes
+
+- name: Aktiviere und starte json_exporter
+  systemd:
+    name: json-exporter
+    enabled: yes
+    state: started
-- 
GitLab


From 666ac0cd441528f9db17229992a21c6dd119ab49 Mon Sep 17 00:00:00 2001
From: Leonard Penzer <leonard@penzer.de>
Date: Sun, 13 Jul 2025 15:41:58 +0200
Subject: [PATCH 13/13] Generate prometheus.yml from template

---
 roles/prometheus/tasks/main.yml              |   7 +
 roles/prometheus/templates/prometheus.yml.j2 | 172 +++++++++++++++++++
 2 files changed, 179 insertions(+)
 create mode 100644 roles/prometheus/templates/prometheus.yml.j2

diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml
index 28f7ef5..dbc59fb 100644
--- a/roles/prometheus/tasks/main.yml
+++ b/roles/prometheus/tasks/main.yml
@@ -9,6 +9,13 @@
     msg: "This role must only be run on prometheus hosts"
   when: not is_prometheus | default(false)
 
+- name: Create node_exporter_targets.yml file
+  template:
+    src: prometheus.yml.j2
+    dest: /etc/prometheus/prometheus.yml
+  notify:
+    - Reload prometheus
+
 - name: Create node_exporter_targets.yml file
   template:
     src: node_exporter_targets.yml.j2
diff --git a/roles/prometheus/templates/prometheus.yml.j2 b/roles/prometheus/templates/prometheus.yml.j2
new file mode 100644
index 0000000..2700986
--- /dev/null
+++ b/roles/prometheus/templates/prometheus.yml.j2
@@ -0,0 +1,172 @@
+---
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+  external_labels:
+    monitor: '{{ ( inventory_hostname | default(host)).split('.')[0] }}'
+
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets: ['localhost:9093']
+
+# Load rules once and periodically evaluate them
+# according to the global 'evaluation_interval'.
+rule_files:
+  - 'alerts/*.yml'
+
+scrape_configs:
+  - job_name: 'prometheus'
+    static_configs:
+{% for host in groups['role_prometheus'] %}
+      - targets: ['{{ hostvars[host].ansible_host | default(host) }}:9998']
+        labels:
+          instance: '{{ (hostvars[host].ansible_host | default(host)).split('.')[0] }}'
+{% endfor %}
+    scheme: https
+    metrics_path: /proxy
+    params:
+      module:
+        - prometheus
+    tls_config:
+      ca_file: /etc/prometheus/ssl/ca_cert.pem
+      cert_file: /etc/prometheus/ssl/client.cert.pem
+      key_file: /etc/prometheus/ssl/client.key.pem
+      insecure_skip_verify: false  # only true for debugging
+
+  - job_name: 'alertmanager'
+    static_configs:
+      - targets: ['localhost:9093']
+
+  - job_name: 'collectd'
+    scrape_interval: 60s
+    static_configs:
+      - targets:
+          - 'yanic01.vm.freifunk-stuttgart.de:9998'
+        labels:
+          instance: "10.0.3.236:9104"
+    scheme: https
+    metrics_path: /proxy
+    params:
+      module:
+        - respondd
+    tls_config:
+      ca_file: /etc/prometheus/ssl/ca_cert.pem
+      cert_file: /etc/prometheus/ssl/client.cert.pem
+      key_file: /etc/prometheus/ssl/client.key.pem
+      insecure_skip_verify: false  # only true for debugging
+
+  - job_name: 'bird'
+    scrape_interval: 15s
+    file_sd_configs:
+      - files:
+          - /etc/prometheus/bird_exporter_targets.yml
+    scheme: https
+    metrics_path: /proxy
+    params:
+      module:
+        - bird
+    tls_config:
+      ca_file: /etc/prometheus/ssl/ca_cert.pem
+      cert_file: /etc/prometheus/ssl/client.cert.pem
+      key_file: /etc/prometheus/ssl/client.key.pem
+      insecure_skip_verify: false  # only true for debugging
+
+  - job_name: monitor01_blackbox
+    scrape_interval: 15s
+    metrics_path: /proxy
+    scheme: https
+    tls_config:
+      ca_file: /etc/prometheus/ssl/ca_cert.pem
+      cert_file: /etc/prometheus/ssl/client.cert.pem
+      key_file: /etc/prometheus/ssl/client.key.pem
+      insecure_skip_verify: false  # only true for debugging
+    params:
+      module:
+        - blackbox
+        - icmp
+    static_configs:
+      - targets:
+          - 10.190.0.93
+          - 10.190.176.93
+    relabel_configs:
+      - source_labels: [__address__]
+        target_label: __param_target
+      - source_labels: [__param_target]
+        target_label: instance
+      - target_label: __address__
+        replacement: monitor01.vm.freifunk-stuttgart.de:9998
+  - job_name: 'node'
+    scrape_interval: 15s
+    file_sd_configs:
+      - files:
+          - /etc/prometheus/node_exporter_targets.yml
+    scheme: https
+    metrics_path: /proxy
+    params:
+      module:
+        - node
+    tls_config:
+      ca_file: /etc/prometheus/ssl/ca_cert.pem
+      cert_file: /etc/prometheus/ssl/client.cert.pem
+      key_file: /etc/prometheus/ssl/client.key.pem
+      insecure_skip_verify: false  # only true for debugging
+
+  - job_name: 'kea'
+    scrape_interval: 15s
+    file_sd_configs:
+      - files:
+          - /etc/prometheus/kea_exporter_targets.yml
+    scheme: https
+    metrics_path: /proxy
+    params:
+      module:
+        - kea
+    tls_config:
+      ca_file: /etc/prometheus/ssl/ca_cert.pem
+      cert_file: /etc/prometheus/ssl/client.cert.pem
+      key_file: /etc/prometheus/ssl/client.key.pem
+      insecure_skip_verify: false  # only true for debugging
+
+      # Re-activate when fastd-exporter is installed on gws
+      #  - job_name: bb_fastd
+      #    scrape_interval: 15s
+      #    file_sd_configs:
+      #    - files:
+      #      - 'target-fastd.json'
+
+  - job_name: json_gwpref
+    metrics_path: /probe
+    params:
+      module: [gwpref]
+    static_configs:
+{% for host in groups['role_gw'] %}
+      - targets: ['http://{{ hostvars[host].ansible_host | default(host) }}/data/gwstatus.json']
+        labels:
+          instance: '{{ (hostvars[host].ansible_host | default(host)).split('.')[0] }}'
+{% endfor %}
+    relabel_configs:
+      - source_labels: [__address__]
+        target_label: __param_target
+      - target_label: __address__
+        ## Location of the json exporter's real <hostname>:<port>
+        replacement: localhost:7979
+
+  - job_name: 'federate'
+    scrape_interval: 15s
+    honor_labels: true
+    metrics_path: '/federate'
+    params:
+      'match[]':
+        - '{job="blackbox"}'
+        - '{job="blackbox-5g"}'
+        - '{job="blackbox-starlink"}'
+        - '{job="zyxel"}'
+        - '{job="node"}'
+        - '{job="snmp"}'
+        - '{job="unifi"}'
+    static_configs:
+      - targets:
+          - '10.191.255.172:9090'
+        labels:
+          ignore_down: "1"
-- 
GitLab