diff --git a/bundles/smartctl/files/telegraf_plugin_errors b/bundles/smartctl/files/telegraf_plugin_errors new file mode 100644 index 0000000..f275b4b --- /dev/null +++ b/bundles/smartctl/files/telegraf_plugin_errors @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +from subprocess import check_output +from json import loads +import time + + +nanosecond = time.time_ns() + +for line in check_output(['/usr/sbin/smartctl', '--scan', '-d', 'scsi']).splitlines(): + device = line.split()[0].decode() + smart = loads(check_output(['/usr/sbin/smartctl', '-a', device, '-j'])) + attributes = { + attribute['name']: attribute['flags']['value'] + for attribute in smart['ata_smart_attributes']['table'] + if int(attribute['id']) in [ + # https://www.backblaze.com/blog/what-smart-stats-indicate-hard-drive-failures/ + 5, + 187, + 188, + 197, + 198, + ] + } + attributes_string = ','.join(f'{k}={v}' for k, v in attributes.items()) + + print(f"smart_errors,host=${node.name},device={device} {attributes_string} {nanosecond}") diff --git a/bundles/smartctl/files/telegraf_plugin b/bundles/smartctl/files/telegraf_plugin_power_mode similarity index 100% rename from bundles/smartctl/files/telegraf_plugin rename to bundles/smartctl/files/telegraf_plugin_power_mode diff --git a/bundles/smartctl/items.py b/bundles/smartctl/items.py index 3e73575..c7b220e 100644 --- a/bundles/smartctl/items.py +++ b/bundles/smartctl/items.py @@ -1,6 +1,11 @@ files = { - '/usr/local/share/telegraf/smartctl': { - 'source': 'telegraf_plugin', + '/usr/local/share/telegraf/smartctl_power_mode': { + 'source': 'telegraf_plugin_power_mode', + 'content_type': 'mako', + 'mode': '0755', + }, + '/usr/local/share/telegraf/smartctl_errors': { + 'source': 'telegraf_plugin_errors', 'content_type': 'mako', 'mode': '0755', }, @@ -22,5 +27,5 @@ for device, conf in node.metadata.get('smartctl').items(): } else: raise ValueError(f'{node.name}: unkown smartctl option: {option}') - + previous_action = [f'action:{action_name}'] diff --git a/bundles/smartctl/metadata.py b/bundles/smartctl/metadata.py index f221b95..db15ad2 100644 --- a/bundles/smartctl/metadata.py +++ b/bundles/smartctl/metadata.py @@ -13,17 +13,29 @@ defaults = { 'telegraf': { 'config': { 'inputs': { - 'exec': {h({ - 'commands': [ - f'sudo /usr/local/share/telegraf/smartctl', - ], - 'data_format': 'influx', - 'interval': '20s', - })}, + 'exec': { + h({ + 'commands': [ + f'sudo /usr/local/share/telegraf/smartctl_power_mode', + ], + 'data_format': 'influx', + 'interval': '20s', + }), + h({ + 'commands': [ + f'sudo /usr/local/share/telegraf/smartctl_errors', + ], + 'data_format': 'influx', + 'interval': '6h', + }) + }, }, }, }, 'sudoers': { - 'telegraf': {'/usr/local/share/telegraf/smartctl'}, + 'telegraf': { + '/usr/local/share/telegraf/smartctl_power_mode', + '/usr/local/share/telegraf/smartctl_errors', + }, }, } diff --git a/data/grafana/rows/smartctl.py b/data/grafana/rows/smartctl.py index 4511e35..9689cf4 100644 --- a/data/grafana/rows/smartctl.py +++ b/data/grafana/rows/smartctl.py @@ -33,4 +33,20 @@ 'min': 0, 'tooltip': 'multi', }, + 'errors': { + 'stacked': True, + 'queries': { + 'power_level': { + 'filters': { + '_measurement': 'smart_errors', + }, + }, + }, + 'display_name': '__field.labels.device} ${__field.name', + 'min': 0, + 'tooltip': 'multi', + 'legend': { + 'displayMode': 'hidden', + }, + }, }