From 1dc6fab755783e62fc5beaf84cfb4405fa69e573 Mon Sep 17 00:00:00 2001 From: CroneKorkN Date: Tue, 16 Dec 2025 17:53:03 +0100 Subject: [PATCH] routeros better port error monitoring --- bundles/routeros-monitoring/metadata.py | 86 ++++++++++++++++++++++--- data/grafana/rows/routeros_discards.py | 42 ------------ data/grafana/rows/routeros_errors.py | 71 +++++++++++++++++--- groups/os/routeros.py | 1 - 4 files changed, 139 insertions(+), 61 deletions(-) delete mode 100644 data/grafana/rows/routeros_discards.py diff --git a/bundles/routeros-monitoring/metadata.py b/bundles/routeros-monitoring/metadata.py index 68628d5..bda29e5 100644 --- a/bundles/routeros-monitoring/metadata.py +++ b/bundles/routeros-monitoring/metadata.py @@ -171,61 +171,131 @@ def routeros_monitoring_telegraf_inputs(metadata): }, # Interface statistics (MikroTik-specific mib) { - "name": "interface_mikrotik", + "name": "interface_errors", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsTable", "field": [ - # Join / label + # Join key / label (usually identical to IF-MIB ifName) { "name": "ifName", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsName", "is_tag": True, }, - # RX errors (physisch + framing) + # ========================= + # Physical layer (L1/L2) + # ========================= + # CRC/FCS errors → very often cabling, connectors, SFPs, signal quality (EMI) { "name": "rx_fcs_errors", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxFCSError", }, + # Alignment errors → typically duplex mismatch or PHY problems { "name": "rx_align_errors", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxAlignError", }, + # Code errors → PHY encoding errors (signal/SFP/PHY) { "name": "rx_code_errors", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxCodeError", }, + # Carrier errors → carrier lost (copper issues, autoneg, PHY instability) { "name": "rx_carrier_errors", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxCarrierError", }, + # Jabber → extremely long invalid frames (faulty NIC/PHY, very severe) { "name": "rx_jabber", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxJabber", }, - # RX drops + # ================================== + # Length / framing anomalies (diagnostic) + # ================================== + # Frames shorter than minimum (noise, collisions, broken sender) + { + "name": "rx_too_short", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxTooShort", + }, + # Frames longer than allowed (MTU mismatch, framing errors) + { + "name": "rx_too_long", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxTooLong", + }, + # Fragments (often collision-related or duplex mismatch) + { + "name": "rx_fragment", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxFragment", + }, + # Generic length errors + { + "name": "rx_length_errors", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxLengthError", + }, + + # ================== + # Drops (real packet loss) + # ================== + # RX drops (queue/ASIC/policy/overload) → highly alert-worthy { "name": "rx_drop", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxDrop", }, - - # TX drops + # TX drops (buffer/queue exhaustion, scheduling, ASIC limits) { "name": "tx_drop", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsTxDrop", }, - # Duplex / collision (sollten 0 sein) + # ========================================= + # Duplex / collision indicators + # (should be zero on full-duplex links) + # ========================================= + # Total collisions (relevant only for half-duplex or misconfigurations) + { + "name": "tx_collisions", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsTxCollision", + }, + # Late collisions → almost always duplex mismatch / bad autoneg { "name": "tx_late_collisions", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsTxLateCollision", }, + # Aggregate collision counter (context) + { + "name": "tx_total_collisions", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsTxTotalCollision", + }, + # Excessive collisions → persistent duplex problems { "name": "tx_excessive_collisions", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsTxExcessiveCollision", }, - # Stabilität + # ================== + # Flow control (diagnostic) + # ================== + # Pause frames received (peer throttling you) + { + "name": "rx_pause", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsRxPause", + }, + # Pause frames sent (you throttling the peer) + { + "name": "tx_pause", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsTxPause", + }, + # Pause frames actually honored + { + "name": "tx_pause_honored", + "oid": "MIKROTIK-MIB::mtxrInterfaceStatsTxPauseHonored", + }, + + # ========== + # Stability + # ========== + # Link-down events (loose cables, bad SFPs, PoE power drops, reboots) { "name": "link_downs", "oid": "MIKROTIK-MIB::mtxrInterfaceStatsLinkDowns", diff --git a/data/grafana/rows/routeros_discards.py b/data/grafana/rows/routeros_discards.py deleted file mode 100644 index 7974da4..0000000 --- a/data/grafana/rows/routeros_discards.py +++ /dev/null @@ -1,42 +0,0 @@ -{ - 'in': { - 'stacked': True, - 'queries': { - 'in': { - 'filters': { - '_measurement': 'interface', - '_field': ['in_discards'], - 'operating_system': 'routeros', - }, - 'function': 'derivative', - }, - }, - 'min': 0, - 'unit': 'pps', - 'tooltip': 'multi', - 'display_name': '${__field.labels.ifName} - ${__field.labels.ifAlias}', - 'legend': { - 'displayMode': 'hidden', - }, - }, - 'out': { - 'stacked': True, - 'queries': { - 'out': { - 'filters': { - '_measurement': 'interface', - '_field': ['out_discards'], - 'operating_system': 'routeros', - }, - 'function': 'derivative', - }, - }, - 'min': 0, - 'unit': 'pps', - 'tooltip': 'multi', - 'display_name': '${__field.labels.ifName} - ${__field.labels.ifAlias}', - 'legend': { - 'displayMode': 'hidden', - }, - }, -} diff --git a/data/grafana/rows/routeros_errors.py b/data/grafana/rows/routeros_errors.py index e7a3f3c..d36beb9 100644 --- a/data/grafana/rows/routeros_errors.py +++ b/data/grafana/rows/routeros_errors.py @@ -1,11 +1,33 @@ { - 'in': { + 'critical': { 'stacked': True, 'queries': { - 'in': { + 'generic': { 'filters': { '_measurement': 'interface', - '_field': ['in_errors'], + '_field': [ + 'in_errors', + 'out_errors', + ], + 'operating_system': 'routeros', + }, + 'function': 'derivative', + }, + 'mikrotik': { + 'filters': { + '_measurement': 'interface_mikrotik', + '_field': [ + 'rx_fcs_errors', + 'rx_align_errors', + 'rx_code_errors', + 'rx_carrier_errors', + 'rx_jabber', + 'rx_fragment', + 'rx_length_errors', + 'tx_late_collisions', + 'tx_excessive_collisions', + 'link_downs', + ], 'operating_system': 'routeros', }, 'function': 'derivative', @@ -14,18 +36,43 @@ 'min': 0, 'unit': 'pps', 'tooltip': 'multi', - 'display_name': '${__field.labels.ifName} - ${__field.labels.ifAlias}', + 'display_name': '${__field.name} ${__field.labels.ifName}', 'legend': { - 'displayMode': 'hidden', + 'displayMode': 'table', + 'placement': 'right', + 'calcs': [ + 'max', + ], }, }, - 'out': { + 'warning': { 'stacked': True, 'queries': { - 'out': { + 'generic': { 'filters': { '_measurement': 'interface', - '_field': ['out_errors'], + '_field': [ + 'in_discards', + 'out_discards', + ], + 'operating_system': 'routeros', + }, + 'function': 'derivative', + }, + 'mikrotik': { + 'filters': { + '_measurement': 'interface_mikrotik', + '_field': [ + 'rx_too_short', + 'rx_too_long', + 'rx_drop', + 'tx_drop', + 'rx_pause', + 'tx_pause', + 'tx_pause_honored', + 'tx_collisions', + 'tx_total_collisions', + ], 'operating_system': 'routeros', }, 'function': 'derivative', @@ -34,9 +81,13 @@ 'min': 0, 'unit': 'pps', 'tooltip': 'multi', - 'display_name': '${__field.labels.ifName} - ${__field.labels.ifAlias}', + 'display_name': '${__field.name} ${__field.labels.ifName}', 'legend': { - 'displayMode': 'hidden', + 'displayMode': 'table', + 'placement': 'right', + 'calcs': [ + 'max', + ], }, }, } diff --git a/groups/os/routeros.py b/groups/os/routeros.py index 264f2bb..b44aed7 100644 --- a/groups/os/routeros.py +++ b/groups/os/routeros.py @@ -10,7 +10,6 @@ ], 'metadata': { 'grafana_rows': { - 'routeros_discards', 'routeros_errors', 'routeros_throughput', 'routeros_poe',