From 5390f3ac3c5e5aec516031e0e9abeaa14e1abc09 Mon Sep 17 00:00:00 2001
From: mwiegand <mwiegand@seibert-media.net>
Date: Sat, 30 Oct 2021 18:57:06 +0200
Subject: [PATCH] snartctl

---
 bundles/hdparm/items.py         | 18 ------------------
 bundles/hdparm/metadata.py      |  8 --------
 bundles/smartctl/README.md      | 15 +++++++++++++++
 bundles/smartctl/files/hdd_temp | 18 ++++++++++++++++++
 bundles/smartctl/items.py       | 25 +++++++++++++++++++++++++
 bundles/smartctl/metadata.py    | 27 +++++++++++++++++++++++++++
 data/grafana/rows/smartctl.py   | 17 +++++++++++++++++
 nodes/home.backups.py           | 10 +++++-----
 nodes/home.server.py            |  3 ++-
 nodes/wb.offsite-backups.py     |  6 ++++++
 10 files changed, 115 insertions(+), 32 deletions(-)
 delete mode 100644 bundles/hdparm/items.py
 delete mode 100644 bundles/hdparm/metadata.py
 create mode 100644 bundles/smartctl/README.md
 create mode 100644 bundles/smartctl/files/hdd_temp
 create mode 100644 bundles/smartctl/items.py
 create mode 100644 bundles/smartctl/metadata.py
 create mode 100644 data/grafana/rows/smartctl.py

diff --git a/bundles/hdparm/items.py b/bundles/hdparm/items.py
deleted file mode 100644
index c6c4d16..0000000
--- a/bundles/hdparm/items.py
+++ /dev/null
@@ -1,18 +0,0 @@
-previous_action = []
-
-for device, options in node.metadata.get('hdparm').items():
-    for option, value in options.items():
-        if option == 'power_management':
-            name = f'hdparm_{option}_{device}'
-            actions[name] = {
-                'command': f'hdparm -B {value} "{device}"',
-                'unless': f'hdparm -B "{device}" | grep APM_level | cut -d= -f2 | xargs | grep -q "^{value}$"',
-                'needs': [
-                    'pkg_apt:hdparm',
-                    *previous_action,
-                ],
-            }
-        else:
-            raise ValueError(f'unsupported hdparm option: {option}')
-            
-    previous_action = [f'action:{name}']
diff --git a/bundles/hdparm/metadata.py b/bundles/hdparm/metadata.py
deleted file mode 100644
index 313cec5..0000000
--- a/bundles/hdparm/metadata.py
+++ /dev/null
@@ -1,8 +0,0 @@
-defaults = {
-    'apt': {
-        'packages': {
-            'hdparm': {},
-        },
-    },
-    'hdparm': {},
-}
diff --git a/bundles/smartctl/README.md b/bundles/smartctl/README.md
new file mode 100644
index 0000000..1df7adc
--- /dev/null
+++ b/bundles/smartctl/README.md
@@ -0,0 +1,15 @@
+# state
+smartctl -n idle /dev/sda
+
+# temp
+smartctl -n idle -A /dev/sdb --json=c | jq .temperature.current
+
+# apm
+smartctl --get apm /dev/sdb --json=c | jq .ata_apm.level
+smartctl --set apm,20 /dev/sdb --json=c
+
+# power state
+smartctl -n idle /dev/sdb
+
+# devices
+smartctl --scan | cut -d' ' -f1
diff --git a/bundles/smartctl/files/hdd_temp b/bundles/smartctl/files/hdd_temp
new file mode 100644
index 0000000..01a1cd2
--- /dev/null
+++ b/bundles/smartctl/files/hdd_temp
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+for device in $(smartctl --scan | cut -d' ' -f1)
+do
+  temp=$(smartctl -n idle -A --json=c $device | jq .temperature.current)
+
+  if [[ $temp == ?(-)+([0-9]) ]]
+  then
+    echo "smartctl,host=${node.name},device=$device temperature=$temp $(date --utc +%s%N)"
+  elif [[ $temp == null ]]
+  then
+    # hdd might be sleeping
+    continue
+  else
+    # hdd might be unsupported
+    continue
+  fi
+done
diff --git a/bundles/smartctl/items.py b/bundles/smartctl/items.py
new file mode 100644
index 0000000..5922f7d
--- /dev/null
+++ b/bundles/smartctl/items.py
@@ -0,0 +1,25 @@
+files = {
+    '/usr/local/share/icinga/plugins/hdd_temp': {
+        'content_type': 'mako',
+        'mode': '0755',
+    },
+}
+
+previous_action = []
+
+for device, conf in node.metadata.get('smartctl').items():
+    for option, value in conf.items():
+        if option == 'apm':
+            action_name = f'smartctl_apm_{device}'
+            actions[action_name] = {
+                'command': f'smartctl --set apm,{value} "{device}"',
+                'unless': f'smartctl --get apm "{device}" --json=c | jq .ata_apm.level | grep -q "^{value}$"',
+                'needs': [
+                    'pkg_apt:smartmontools',
+                    *previous_action,
+                ],
+            }
+        else:
+            raise ValueError(f'{node.name}: unkown smartctl option: {option}')
+        
+        previous_action = [f'action:{action_name}']
diff --git a/bundles/smartctl/metadata.py b/bundles/smartctl/metadata.py
new file mode 100644
index 0000000..3079b77
--- /dev/null
+++ b/bundles/smartctl/metadata.py
@@ -0,0 +1,27 @@
+defaults = {
+    'apt': {
+        'packages': {
+            'smartmontools': {},
+        },
+    },
+    'grafana_rows': {
+        'smartctl',
+    },
+    'smartctl': {},
+    'telegraf': {
+        'config': {
+            'inputs': {
+                'exec': [{
+                    'commands': [
+                        f'sudo /usr/local/share/icinga/plugins/hdd_temp',
+                    ],
+                    'data_format': 'influx',
+                    'interval': '60s',
+                }],
+            },
+        },
+    },
+    'sudoers': {
+        'telegraf': ['/usr/local/share/icinga/plugins/hdd_temp'],
+    },
+}
diff --git a/data/grafana/rows/smartctl.py b/data/grafana/rows/smartctl.py
new file mode 100644
index 0000000..7c5fae5
--- /dev/null
+++ b/data/grafana/rows/smartctl.py
@@ -0,0 +1,17 @@
+{
+    'temperature': {
+        'queries': {
+            'usage': {
+                'filters': {
+                    '_measurement': 'smartctl',
+                    '_field': [
+                        'temperature',
+                    ],
+                },
+                'function': 'mean',
+            },
+        },
+        'min': 0,
+        'unit': 'degrees',
+    },
+}
diff --git a/nodes/home.backups.py b/nodes/home.backups.py
index cf01408..d0b0087 100644
--- a/nodes/home.backups.py
+++ b/nodes/home.backups.py
@@ -6,9 +6,9 @@
         'monitored',
     ],
     'bundles': [
+        'smartctl',
         'zfs',
         'zfs-mirror',
-        'hdparm',
     ],
     'metadata': {
         'id': '9cf52515-63a1-4659-a8ec-6c3c881727e5',
@@ -22,15 +22,15 @@
         'backup-server': {
             'hostname': 'backups.sublimity.de',
         },
-        'hdparm': {
+        'smartctl': {
             '/dev/disk/by-id/ata-HGST_HDN726040ALE614_K3GV6TPL': {
-                'power_management': 1,
+                'apm': 32,
             },
             '/dev/disk/by-id/ata-HGST_HDN726040ALE614_K4KAJXEB': {
-                'power_management': 1,
+                'apm': 32,
             },
             '/dev/disk/by-id/ata-TOSHIBA_HDWQ140_19VZK0EMFAYG': {
-                'power_management': 1,
+                'apm': 32,
             },
         },
         'zfs-mirror': {
diff --git a/nodes/home.server.py b/nodes/home.server.py
index fd8ad6d..d33df89 100644
--- a/nodes/home.server.py
+++ b/nodes/home.server.py
@@ -9,14 +9,15 @@
     ],
     'bundles': [
         'gitea',
+        'gollum',
         'grafana',
         'influxdb2',
         'mirror',
         'postgresql',
         'redis',
+        'smartctl',
         'wireguard',
         'zfs',
-        'gollum',
     ],
     'metadata': {
         'id': 'af96709e-b13f-4965-a588-ef2cd476437a',
diff --git a/nodes/wb.offsite-backups.py b/nodes/wb.offsite-backups.py
index ce54812..60a3d23 100644
--- a/nodes/wb.offsite-backups.py
+++ b/nodes/wb.offsite-backups.py
@@ -7,6 +7,7 @@
     ],
     'bundles': [
         'dm-crypt',
+        'smartctl',
         'wireguard',
         'zfs',
     ],
@@ -53,6 +54,11 @@
                 'device': '/dev/disk/by-id/ata-TOSHIBA_MG06ACA10TE_61C0A1B1FKQE',
             },
         },
+        'smartctl': {
+            '/dev/disk/by-id/ata-TOSHIBA_MG06ACA10TE_61C0A1B1FKQE': {
+                'apm': 32,
+            },
+        },
         'zfs': {
             'pools': {
                 'tank': {