nvidia tested and working, added options for web and api port in jenkinsfile

This commit is contained in:
2026-03-19 00:30:12 -07:00
parent d7d2507d43
commit cf269b83af
11 changed files with 172 additions and 60 deletions

View File

@ -6,3 +6,7 @@ The web dashboard is built from a stack of Node.js, PHP, and nginx. The Node.js
The docker stack also uses a network on the 192.168.37.0/24 subnet, and secures all traffic to be within this subnet. The dashboard can only be accessed locally at port 80 on 192.168.37.1 when the secure_api variable is set to true. The docker stack also uses a network on the 192.168.37.0/24 subnet, and secures all traffic to be within this subnet. The dashboard can only be accessed locally at port 80 on 192.168.37.1 when the secure_api variable is set to true.
Working on the network descriptor, and I think to make it easier on me i will use json output from ip
The problem is that if an interface has multiple IPs, then I have multiple lines of data for IPs
I think perhaps I will update my code to be aware of properties that may be arrays
I do think this makes more sense than having multiple cards for a single interface...

View File

@ -13,6 +13,7 @@ cosmostat_packages:
- lm-sensors - lm-sensors
- jc - jc
- smartmontools - smartmontools
- inxi
# python venv packages # python venv packages
cosmostat_venv_packages: | cosmostat_venv_packages: |
@ -47,10 +48,12 @@ api_service_name: "cosmostat_api"
api_service_folder: "{{ service_folder }}/api" api_service_folder: "{{ service_folder }}/api"
venv_folder: "{{ service_folder }}/venv" venv_folder: "{{ service_folder }}/venv"
api_service_exe: "{{ venv_folder }}/bin/python -u {{ api_service_folder }}/app.py" api_service_exe: "{{ venv_folder }}/bin/python -u {{ api_service_folder }}/app.py"
custom_api_port: "5000"
# dashboard vars # dashboard vars
service_control_web_folder: "{{ service_folder }}/web" service_control_web_folder: "{{ service_folder }}/web"
public_dashboard: true public_dashboard: true
custom_port: "80"
# will skip init when true # will skip init when true
quick_refresh: false quick_refresh: false

View File

@ -49,16 +49,25 @@ class Component:
# store static properties # store static properties
self.multi_check = self.is_multi() self.multi_check = self.is_multi()
self.virt_ignore = self._descriptor.get('virt_ignore', []) self.virt_ignore = self._descriptor.get('virt_ignore', [])
self.multi_metrics = self._descriptor.get('multi_metrics', [])
#if 'precheck' in self._descriptor:
# precheck_command = self._descriptor.get('precheck', [])
# precheck_value = int(run_command(precheck_command, zero_only = True))
# if precheck_value == 0:
# raise ValueError(f"No devices of type {self.type}")
if self.is_virtual: if self.is_virtual:
self.virt_ignore = [] self.virt_ignore = []
self._properties: Dict[str, str] = {} self._properties: Dict[str, str | list[str]] = {}
for key, command in descriptor.get('properties', {}).items(): for key, command in descriptor.get('properties', {}).items():
return_string = True
if key in self.multi_metrics:
return_string = False
if self.this_device != "None": if self.this_device != "None":
# this means this component type is a multi and the commands need templating for each device # this means this component type is a multi and the commands need templating for each device
formatted_command = command.format(this_device=self.this_device) formatted_command = command.format(this_device=self.this_device)
self._properties[key] = run_command(formatted_command, True) self._properties[key] = run_command(formatted_command, zero_only = return_string)
else: else:
self._properties[key] = run_command(command, zero_only = True) self._properties[key] = run_command(command, zero_only = return_string)
print(self._properties[key]) print(self._properties[key])
# build the description string # build the description string
self._description_template: str | None = descriptor.get("description") self._description_template: str | None = descriptor.get("description")
@ -114,31 +123,32 @@ class Component:
component_properties = self._properties.items() component_properties = self._properties.items()
else: else:
component_properties = self.get_property(component) component_properties = self.get_property(component)
for name, value in component_properties: for name, values in component_properties:
this_property = { for value in (values if isinstance(values, list) else [values]):
"Source": self.name, this_property = {
"Property": name, "Source": self.name,
"Value": value "Property": name,
} "Value": value
if name not in self.virt_ignore: }
result.append(this_property) if name not in self.virt_ignore:
result.append(this_property)
return result return result
def get_properties_strings(self, return_simple = False): def get_properties_strings(self, return_simple = False):
result = [] result = []
component_properties = self._properties.items() component_properties = self._properties.items()
print(component_properties) for name, values in component_properties:
for name, value in component_properties: for value in (values if isinstance(values, list) else [values]):
simple_property = f"{name}: {value}" simple_property = f"{name}: {value}"
complex_property = { complex_property = {
"Source": self.name, "Source": self.name,
"Property": simple_property "Property": simple_property
} }
if name not in self.virt_ignore: if name not in self.virt_ignore:
if return_simple: if return_simple:
result.append(simple_property) result.append(simple_property)
else: else:
result.append(complex_property) result.append(complex_property)
return result return result
def get_metrics_keys(self): def get_metrics_keys(self):
@ -318,15 +328,15 @@ class System:
multi_check = component["multi_check"] multi_check = component["multi_check"]
# if multi, note that the command in device_list creates the list of things to pipe into this_device # if multi, note that the command in device_list creates the list of things to pipe into this_device
if multi_check: if multi_check:
letters = [chr(c) for c in range(ord('A'), ord('Z')+1)]
print(f"Creating one component of type {component_name} for each one found") print(f"Creating one component of type {component_name} for each one found")
component_type_device_list = get_device_list(component_name) component_type_device_list = get_device_list(component_name)
component_id = 0
for this_device in component_type_device_list: for this_device in component_type_device_list:
this_component_letter = letters[component_type_device_list.index(this_device)] this_component_ID = component_type_device_list.index(this_device)
this_component_name = f"{component_name} {this_component_letter}" this_component_name = f"{component_name} {this_component_ID}"
print(f"{this_component_name} - {component_name} - {this_device}") print(f"{this_component_name} - {component_name} - {this_device}")
self.add_components(Component(name = this_component_name, comp_type = component_name, this_device = this_device)) new_component = Component(name = this_component_name, comp_type = component_name, this_device = this_device)
self.add_components(new_component)
else: else:
if debug_output: if debug_output:
@ -538,7 +548,13 @@ def run_command(cmd, zero_only=False, use_shell=True, req_check = True):
def get_device_list(device_type_name: str): def get_device_list(device_type_name: str):
result = [] result = []
for component in component_class_tree: for component in component_class_tree:
if component["name"] == device_type_name: precheck_value = 1
if "precheck" in component:
precheck_command = component["precheck"]
precheck_value_output = run_command(precheck_command, zero_only = True)
precheck_value = int(precheck_value_output)
print(f"Precheck found - {precheck_command} - {precheck_value}")
if component["name"] == device_type_name and precheck_value != 0:
device_list_command = component["device_list"] device_list_command = component["device_list"]
device_list_result = run_command(device_list_command) device_list_result = run_command(device_list_command)
result = device_list_result result = device_list_result

View File

@ -21,7 +21,8 @@ app_settings = {
"secure_api" : True, "secure_api" : True,
"push_redis" : False, "push_redis" : False,
"run_background" : True, "run_background" : True,
"update_frequency": 1 "update_frequency": 1,
"custom_api_port": "5000"
} }
with open('cosmostat_settings.yaml', 'r') as f: with open('cosmostat_settings.yaml', 'r') as f:
@ -60,6 +61,9 @@ def service_gateway_ip():
else: else:
return "0.0.0.0" return "0.0.0.0"
def service_api_port():
return cosmostat_settings["custom_api_port"]
####################################################################### #######################################################################
### Redis Functions ### Redis Functions
####################################################################### #######################################################################
@ -295,7 +299,7 @@ if __name__ == '__main__':
print("Skipping flask background task") print("Skipping flask background task")
# Flask API # Flask API
app.run(debug=False, host=service_gateway_ip(), port=5000) app.run(debug=False, host=service_gateway_ip(), port=service_api_port())

View File

@ -51,5 +51,48 @@
"metrics": { "metrics": {
"placeholder": "" "placeholder": ""
} }
},
{
"name": "LAN",
"description": "{Device ID} - {Device Name} - {MAC Address}",
"multi_check": "True",
"device_list": "ip link | grep default | grep -v -e docker -e 127.0.0.1 -e br- -e veth -e lo -e tun | cut -d ':' -f 2 | awk '{{print $1}}' ",
"properties": {
"MAC Address": "ip link | grep -A1 ' {this_device}' | grep ether | awk '{{print $2}}'",
"Device Name": "echo {this_device}",
"Device ID": "udevadm info -q property -p $(ls -l /sys/class/net/ | grep {this_device} | cut -d '>' -f2 | cut -b 8- ) | grep ID_MODEL_FROM_DATABASE | cut -d '=' -f2 "
},
"metrics": {
"IP Address": "ip -o -4 ad | grep -v -e docker -e 127.0.0.1 -e br- | grep {this_device} | awk '{{print $4}}'",
"Data Transmitted": "ifconfig {this_device} | grep RX | grep bytes | cut -d '(' -f2 | tr -d ')'",
"Data Received": "ifconfig {this_device} | grep TX | grep bytes | cut -d '(' -f2 | tr -d ')'",
"Link State": "cat /sys/class/net/{this_device}/operstate",
"Link Speed": "cat /sys/class/net/{this_device}/speed"
},
"multi_metrics": [
"IP Address"
]
},
{
"name": "NVGPU",
"description": "NVGPU{Device ID} - {Device Model} with {Memory Size}, Max Power {Maximum Power}",
"multi_check": "True",
"device_list": "nvidia-smi --query-gpu=index --format=csv,noheader,nounits",
"properties": {
"Device Model": "nvidia-smi --id={this_device} --query-gpu=name --format=csv,noheader,nounits",
"Device ID": "echo NVGPU{this_device}",
"Driver Version": "nvidia-smi --id={this_device} --query-gpu=driver_version --format=csv,noheader,nounits",
"Maximum Power": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits",
"Memory Size": "nvidia-smi --id={this_device} --query-gpu=memory.total --format=csv,noheader,nounits"
},
"metrics": {
"Power Draw": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits",
"Used Memory": "nvidia-smi --id={this_device} --query-gpu=memory.used --format=csv,noheader,nounits",
"Temperature": "nvidia-smi --id={this_device} --query-gpu=temperature.gpu --format=csv,noheader,nounits",
"GPU Load": "nvidia-smi --id={this_device} --query-gpu=utilization.gpu --format=csv,noheader,nounits"
},
"precheck": "lspci | grep NV | wc -l"
} }
] ]

View File

@ -1,24 +1,22 @@
[ [
{ {
"name": "LAN", "name": "",
"description": "", "description": "",
"multi_check": "True", "multi_check": "True",
"device_list": "", "device_list": " ",
"properties": { "properties": {
"MAC Address": "",
"Device Name": "",
"Device ID": ""
}, },
"metrics": { "metrics": {
"IP Address": "", },
"MB Transmitted": "", "multi_metrics": [
"MB Received": "", ],
"Link State": "", "virt_ignore": [
"Link Speed": "" ]
}
}, },
{ {
"SATA GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true", "SATA GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true",
"NVMe GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true" "NVMe GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true"
} }
] ]

View File

@ -2,7 +2,7 @@
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<title>Matt-Cloud Cosmostat</title> <title>Cosmostat - <?php echo $_SERVER['SERVER_NAME'] ?></title>
<style> <style>
.components {display:grid; grid-template-columns:repeat(auto-fill, minmax(280px, 1fr)); gap:1rem;} .components {display:grid; grid-template-columns:repeat(auto-fill, minmax(280px, 1fr)); gap:1rem;}
@ -27,8 +27,28 @@
<!-- PHP to render static components --> <!-- PHP to render static components -->
<?php <?php
# load API settings, this requires a simple yaml file
$raw_api_settings = file('/opt/api_settings/cosmostat_settings.yaml', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$api_settings = [];
foreach ($raw_api_settings as $line) {
if ($line[0] === '#') {
continue;
}
$pos = strpos($line, ':');
if ($pos === false) {
continue;
}
$key = trim(substr($line, 0, $pos));
$value = trim(substr($line, $pos + 1));
if ($value === '') {
$value = null;
}
$api_settings[$key] = $value;
}
$dockerGateway = trim($api_settings['docker_gateway'], "\"'") ?? null;
$customApiPort = trim($api_settings['custom_api_port'], "\"'") ?? null;
# load API data # load API data
$apiUrl = 'http://192.168.37.1:5000/php_summary'; $apiUrl = 'http://'.$dockerGateway.':'.$customApiPort.'/php_summary';
$context = stream_context_create([ $context = stream_context_create([
'http' => [ 'http' => [
'timeout' => 5, // seconds 'timeout' => 5, // seconds

View File

@ -9,6 +9,7 @@
"express": "^4.18.2", "express": "^4.18.2",
"socket.io": "^4.7.2", "socket.io": "^4.7.2",
"redis": "^4.6.7", "redis": "^4.6.7",
"node-fetch": "^2.6.7" "node-fetch": "^2.6.7",
"js-yaml": "^4.1.0"
} }
} }

View File

@ -4,21 +4,39 @@ const express = require('express');
const { createClient } = require('redis'); const { createClient } = require('redis');
const { Server } = require('socket.io'); const { Server } = require('socket.io');
const fetch = require('node-fetch'); // npm i node-fetch@2 const fetch = require('node-fetch'); // npm i node-fetch@2
const fs = require('fs');
const yaml = require('js-yaml'); // npm i js-yaml
const path = require('path');
const app = express(); const app = express();
const server = http.createServer(app); const server = http.createServer(app);
const io = new Server(server); const io = new Server(server);
// ---------- Socket.io ---------- /* --------------------------------------------------------------------- */
/* ---------- 1. Load the YAML configuration file ---------------------- */
/* --------------------------------------------------------------------- */
let config = {};
try {
const file = fs.readFileSync(path.resolve(__dirname, 'cosmostat_settings.yaml'), 'utf8');
config = yaml.load(file);
} catch (e) {
console.error('Failed to load config.yaml:', e);
process.exit(1);
}
const API_PORT = config.custom_api_port || 5000; // fallback to 5000
const API_HOST = config.docker_gateway || '192.168.37.1'; // fallback IP
const API_BASE = `http://${API_HOST}:${API_PORT}`;
// ---------------------------------------------------------------------
// ---------- 2. Socket.io ------------------------------------------------
// ---------------------------------------------------------------------
io.on('connection', async socket => { io.on('connection', async socket => {
console.log('client connected:', socket.id); console.log('client connected:', socket.id);
// Call the external API every time a client connects // Call the external API every time a client connects
try { try {
const resp = await fetch('http://192.168.37.1:5000/start_timer', { const resp = await fetch(`${API_BASE}/start_timer`, { method: 'GET' });
method: 'GET'
});
const data = await resp.json(); const data = await resp.json();
console.log('API responded to connect:', data); console.log('API responded to connect:', data);
} catch (err) { } catch (err) {
@ -29,9 +47,7 @@ io.on('connection', async socket => {
socket.on('tableRendered', async () => { socket.on('tableRendered', async () => {
console.log('Client reported table rendered - starting timer'); console.log('Client reported table rendered - starting timer');
try { try {
const resp = await fetch('http://192.168.37.1:5000/start_timer', { const resp = await fetch(`${API_BASE}/start_timer`, { method: 'GET' });
method: 'GET'
});
const text = await resp.text(); const text = await resp.text();
console.log('Timer endpoint responded:', text); console.log('Timer endpoint responded:', text);
} catch (err) { } catch (err) {
@ -40,17 +56,19 @@ io.on('connection', async socket => {
}); });
}); });
// Serve static files (index.html, etc.) /* --------------------------------------------------------------------- */
/* ---------- 3. Serve static files ----------------------------------- */
/* --------------------------------------------------------------------- */
app.use(express.static('public')); app.use(express.static('public'));
// ---------- Redis subscriber ---------- /* --------------------------------------------------------------------- */
/* ---------- 4. Redis subscriber ------------------------------------- */
/* --------------------------------------------------------------------- */
const redisClient = createClient({ url: 'redis://192.168.37.1:6379' }); const redisClient = createClient({ url: 'redis://192.168.37.1:6379' });
redisClient.on('error', err => console.error('Redis error', err)); redisClient.on('error', err => console.error('Redis error', err));
(async () => { (async () => {
await redisClient.connect(); await redisClient.connect();
const sub = redisClient.duplicate(); // duplicate to keep separate pub/sub const sub = redisClient.duplicate(); // duplicate to keep separate pub/sub
await sub.connect(); await sub.connect();
@ -72,7 +90,9 @@ redisClient.on('error', err => console.error('Redis error', err));
sub.on('error', err => console.error('Subscriber error', err)); sub.on('error', err => console.error('Subscriber error', err));
})(); })();
// ---------- Start ---------- /* --------------------------------------------------------------------- */
/* ---------- 5. Start the HTTP server --------------------------------- */
/* --------------------------------------------------------------------- */
const PORT = process.env.PORT || 3000; const PORT = process.env.PORT || 3000;
server.listen(PORT, () => { server.listen(PORT, () => {
console.log(`Server listening on http://localhost:${PORT}`); console.log(`Server listening on http://localhost:${PORT}`);

View File

@ -29,4 +29,5 @@ push_redis: {{ push_redis }}
run_background : {{ run_background }} run_background : {{ run_background }}
log_output: {{ log_output }} log_output: {{ log_output }}
update_frequency: {{ update_frequency }} update_frequency: {{ update_frequency }}
custom_api_port: {{ custom_api_port }}
... ...

View File

@ -17,6 +17,7 @@ services:
volumes: volumes:
- "{{ service_control_web_folder }}/html:/usr/src/app/public" - "{{ service_control_web_folder }}/html:/usr/src/app/public"
- "{{ service_control_web_folder }}/node_server:/app" - "{{ service_control_web_folder }}/node_server:/app"
- "{{ api_service_folder }}/cosmostat_settings.yaml:/app/cosmostat_settings.yaml:ro"
- /app/node_modules - /app/node_modules
ports: ports:
- "{{ docker_gateway }}:3000:3000" - "{{ docker_gateway }}:3000:3000"
@ -33,6 +34,7 @@ services:
- "{{ docker_gateway }}:8080:80" - "{{ docker_gateway }}:8080:80"
volumes: volumes:
- ./html:/var/www/html/ - ./html:/var/www/html/
- "{{ api_service_folder }}/cosmostat_settings.yaml:/opt/api_settings/cosmostat_settings.yaml:ro"
networks: networks:
- cosmostat_net - cosmostat_net
restart: always restart: always
@ -42,7 +44,7 @@ services:
container_name: cosmostat_nginx_proxy container_name: cosmostat_nginx_proxy
image: nginx:latest image: nginx:latest
ports: ports:
- "{{ (docker_gateway + ':') if not public_dashboard | bool else '' }}80:80" - "{{ (docker_gateway + ':') if not public_dashboard | bool else '' }}{{ custom_port }}:80"
volumes: volumes:
- ./proxy/nginx.conf:/etc/nginx/conf.d/default.conf - ./proxy/nginx.conf:/etc/nginx/conf.d/default.conf
networks: networks: