From 61d3487f8ae8332d15ec1eb04f0228113d1d19c4 Mon Sep 17 00:00:00 2001 From: Simon Smith Date: Thu, 2 Jan 2025 23:38:13 +0000 Subject: [PATCH] add prometheus metrics (#6654) Signed-off-by: si458 --- meshcentral-config-schema.json | 8 +++ meshcentral.js | 4 ++ monitoring.js | 114 +++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 monitoring.js diff --git a/meshcentral-config-schema.json b/meshcentral-config-schema.json index 462895b3..aebe15dc 100644 --- a/meshcentral-config-schema.json +++ b/meshcentral-config-schema.json @@ -767,6 +767,14 @@ "default": false, "description": "When set to true, the MPS server will only accept TLS 1.2 and 1.3 connections. Older Intel AMT devices will not be able to connect." }, + "prometheus": { + "type": [ + "boolean", + "number" + ], + "default": false, + "description": "When set to true, a prometheus metrics endpoint will be available \"0.0.0.0:9464/metrics\". If you specify a number instead, the prometheus metrics will listen on this port instead of the default 9464." + }, "no2FactorAuth": { "type": "boolean", "default": false diff --git a/meshcentral.js b/meshcentral.js index fe6c3548..abc864f1 100644 --- a/meshcentral.js +++ b/meshcentral.js @@ -2011,6 +2011,9 @@ function CreateMeshCentralServer(config, args) { obj.firebase = require('./firebase').CreateFirebaseRelay(obj, 'https://alt.meshcentral.com/firebaserelay.aspx'); } + // Setup monitoring + obj.monitoring = require('./monitoring.js').CreateMonitoring(obj, obj.args); + // Start periodic maintenance obj.maintenanceTimer = setInterval(obj.maintenanceActions, 1000 * 60 * 60); // Run this every hour @@ -4260,6 +4263,7 @@ function mainStart() { if (sendgrid || (config.sendgrid != null)) { modules.push('@sendgrid/mail'); } // Add SendGrid support if ((args.translate || args.dev) && (Number(process.version.match(/^v(\d+\.\d+)/)[1]) >= 16)) { modules.push('jsdom@22.1.0'); modules.push('esprima@4.0.1'); modules.push('html-minifier@4.0.0'); } // Translation support if (typeof config.settings.crowdsec == 'object') { modules.push('@crowdsec/express-bouncer@0.1.0'); } // Add CrowdSec bounser module (https://www.npmjs.com/package/@crowdsec/express-bouncer) + if (config.settings.prometheus != null) { modules.push('prom-client'); } // Add Prometheus Metrics support if (typeof config.settings.autobackup == 'object') { // Setup encrypted zip support if needed diff --git a/monitoring.js b/monitoring.js new file mode 100644 index 00000000..9fc1e422 --- /dev/null +++ b/monitoring.js @@ -0,0 +1,114 @@ +/** +* @description MeshCentral monitoring module +* @author Simon Smith +* @license Apache-2.0 +* @version v0.0.1 +*/ + +"use strict"; + +module.exports.CreateMonitoring = function (parent, args) { + var obj = {}; + obj.args = args; + obj.parent = parent; + obj.express = require('express'); + obj.app = obj.express(); + obj.prometheus = null; + if (args.compression !== false) { obj.app.use(require('compression')()); } + obj.app.disable('x-powered-by'); + obj.counterMetrics = { // Counter Metrics always start at 0 and increase but never decrease + RelayErrors: { description: "Relay Errors" }, // parent.webserver.relaySessionErrorCount + UnknownGroup: { description: "Unknown Group" }, // meshDoesNotExistCount + InvalidPKCSsignature: { description: "Invalid PKCS signature" }, // invalidPkcsSignatureCount + InvalidRSAsignature: { description: "Invalid RSA signature" }, // invalidRsaSignatureCount + InvalidJSON: { description: "Invalid JSON" }, // invalidJsonCount + UnknownAction: { description: "Unknown Action" }, // unknownAgentActionCount + BadWebCertificate: { description: "Bad Web Certificate" }, // agentBadWebCertHashCount + BadSignature: { description: "Bad Signature" }, // (agentBadSignature1Count + agentBadSignature2Count) + MaxSessionsReached: { description: "Max Sessions Reached" }, // agentMaxSessionHoldCount + UnknownDeviceGroup: { description: "Unknown Device Group" }, // (invalidDomainMeshCount + invalidDomainMesh2Count) + InvalidDeviceGroupType: { description: "Invalid Device Group Type" }, // invalidMeshTypeCount + DuplicateAgent: { description: "Duplicate Agent" }, // duplicateAgentCount + blockedUsers: { description: "Blocked Users" }, // blockedUsers + blockedAgents: { description: "Blocked Agents" }, // blockedAgents + }; + obj.guageMetrics = { // Guage Metrics always start at 0 and can increase and decrease + ConnectedIntelAMT: { description: "Connected Intel AMT" }, // parent.mpsserver.ciraConnections[i].length + UserAccounts: { description: "User Accounts" }, // Object.keys(parent.webserver.users).length + DeviceGroups: { description: "Device Groups" }, // parent.webserver.meshes (ONLY WHERE deleted=null) + AgentSessions: { description: "Agent Sessions" }, // Object.keys(parent.webserver.wsagents).length + ConnectedUsers: { description: "Connected Users" }, // Object.keys(parent.webserver.wssessions).length + UsersSessions: { description: "Users Sessions" }, // Object.keys(parent.webserver.wssessions2).length + RelaySessions: { description: "Relay Sessions" }, // parent.webserver.relaySessionCount + RelayCount: { description: "Relay Count" } // Object.keys(parent.webserver.wsrelays).length30bb4fb74dfb758d36be52a7 + } + if (parent.config.settings.prometheus != null) { // Create Prometheus Monitoring Endpoint + if ((typeof parent.config.settings.prometheus == 'number') && ((parent.config.settings.prometheus < 1) || (parent.config.settings.prometheus > 65535))) { + console.log('Promethus port number is invalid, Prometheus metrics endpoint has be disabled'); + delete parent.config.settings.prometheus; + } else { + const port = ((typeof parent.config.settings.prometheus == 'number') ? parent.config.settings.prometheus : 9464); + obj.prometheus = require('prom-client'); + const collectDefaultMetrics = obj.prometheus.collectDefaultMetrics; + collectDefaultMetrics(); + for (const key in obj.guageMetrics) { + obj.guageMetrics[key].prometheus = new obj.prometheus.Gauge({ name: 'meshcentral_' + String(key).toLowerCase(), help: obj.guageMetrics[key].description }); + } + for (const key in obj.counterMetrics) { + obj.counterMetrics[key].prometheus = new obj.prometheus.Counter({ name: 'meshcentral_' + String(key).toLowerCase(), help: obj.counterMetrics[key].description }); + } + obj.app.get('/', function (req, res) { res.send('MeshCentral Prometheus server.'); }); + obj.app.listen(port, function () { + console.log('MeshCentral Prometheus server running on port ' + port + '.'); + obj.parent.updateServerState('prometheus-port', port); + }); + obj.app.get('/metrics', async (req, res) => { + try { + // Count the number of device groups that are not deleted + var activeDeviceGroups = 0; + for (var i in parent.webserver.meshes) { if (parent.webserver.meshes[i].deleted == null) { activeDeviceGroups++; } } // This is not ideal for performance, we want to dome something better. + var guages = { + UserAccounts: Object.keys(parent.webserver.users).length, + DeviceGroups: activeDeviceGroups, + AgentSessions: Object.keys(parent.webserver.wsagents).length, + ConnectedUsers: Object.keys(parent.webserver.wssessions).length, + UsersSessions: Object.keys(parent.webserver.wssessions2).length, + RelaySessions: parent.webserver.relaySessionCount, + RelayCount: Object.keys(parent.webserver.wsrelays).length, + ConnectedIntelAMT: 0 + }; + if (parent.mpsserver != null) { + for (var i in parent.mpsserver.ciraConnections) { + guages.ConnectedIntelAMT += parent.mpsserver.ciraConnections[i].length; + } + } + for (const key in guages) { obj.guageMetrics[key].prometheus.set(guages[key]); } + // Take a look at agent errors + var agentstats = parent.webserver.getAgentStats(); + const counters = { + RelayErrors: parent.webserver.relaySessionErrorCount, + UnknownGroup: agentstats.meshDoesNotExistCount, + InvalidPKCSsignature: agentstats.invalidPkcsSignatureCount, + InvalidRSAsignature: agentstats.invalidRsaSignatureCount, + InvalidJSON: agentstats.invalidJsonCount, + UnknownAction: agentstats.unknownAgentActionCount, + BadWebCertificate: agentstats.agentBadWebCertHashCount, + BadSignature: (agentstats.agentBadSignature1Count + agentstats.agentBadSignature2Count), + MaxSessionsReached: agentstats.agentMaxSessionHoldCount, + UnknownDeviceGroup: (agentstats.invalidDomainMeshCount + agentstats.invalidDomainMesh2Count), + InvalidDeviceGroupType: (agentstats.invalidMeshTypeCount + agentstats.invalidMeshType2Count), + DuplicateAgent: agentstats.duplicateAgentCount, + blockedUsers: parent.webserver.blockedUsers, + blockedAgents: parent.webserver.blockedAgents + }; + for (const key in counters) { obj.counterMetrics[key].prometheus.reset(); obj.counterMetrics[key].prometheus.inc(counters[key]); } + res.set('Content-Type', obj.prometheus.register.contentType); + res.end(await obj.prometheus.register.metrics()); + } catch (ex) { + console.log(ex); + res.status(500).end(); + } + }); + } + } +} \ No newline at end of file