nvidia tested and working, added options for web and api port in jenkinsfile

This commit is contained in:
2026-03-19 00:30:12 -07:00
parent d7d2507d43
commit cf269b83af
11 changed files with 172 additions and 60 deletions

View File

@ -6,3 +6,7 @@ The web dashboard is built from a stack of Node.js, PHP, and nginx. The Node.js
The docker stack also uses a network on the 192.168.37.0/24 subnet, and secures all traffic to be within this subnet. The dashboard can only be accessed locally at port 80 on 192.168.37.1 when the secure_api variable is set to true.
Working on the network descriptor, and I think to make it easier on me i will use json output from ip
The problem is that if an interface has multiple IPs, then I have multiple lines of data for IPs
I think perhaps I will update my code to be aware of properties that may be arrays
I do think this makes more sense than having multiple cards for a single interface...

View File

@ -13,6 +13,7 @@ cosmostat_packages:
- lm-sensors
- jc
- smartmontools
- inxi
# python venv packages
cosmostat_venv_packages: |
@ -47,10 +48,12 @@ api_service_name: "cosmostat_api"
api_service_folder: "{{ service_folder }}/api"
venv_folder: "{{ service_folder }}/venv"
api_service_exe: "{{ venv_folder }}/bin/python -u {{ api_service_folder }}/app.py"
custom_api_port: "5000"
# dashboard vars
service_control_web_folder: "{{ service_folder }}/web"
public_dashboard: true
custom_port: "80"
# will skip init when true
quick_refresh: false

View File

@ -49,16 +49,25 @@ class Component:
# store static properties
self.multi_check = self.is_multi()
self.virt_ignore = self._descriptor.get('virt_ignore', [])
self.multi_metrics = self._descriptor.get('multi_metrics', [])
#if 'precheck' in self._descriptor:
# precheck_command = self._descriptor.get('precheck', [])
# precheck_value = int(run_command(precheck_command, zero_only = True))
# if precheck_value == 0:
# raise ValueError(f"No devices of type {self.type}")
if self.is_virtual:
self.virt_ignore = []
self._properties: Dict[str, str] = {}
self._properties: Dict[str, str | list[str]] = {}
for key, command in descriptor.get('properties', {}).items():
return_string = True
if key in self.multi_metrics:
return_string = False
if self.this_device != "None":
# this means this component type is a multi and the commands need templating for each device
formatted_command = command.format(this_device=self.this_device)
self._properties[key] = run_command(formatted_command, True)
self._properties[key] = run_command(formatted_command, zero_only = return_string)
else:
self._properties[key] = run_command(command, zero_only = True)
self._properties[key] = run_command(command, zero_only = return_string)
print(self._properties[key])
# build the description string
self._description_template: str | None = descriptor.get("description")
@ -114,31 +123,32 @@ class Component:
component_properties = self._properties.items()
else:
component_properties = self.get_property(component)
for name, value in component_properties:
this_property = {
"Source": self.name,
"Property": name,
"Value": value
}
if name not in self.virt_ignore:
result.append(this_property)
for name, values in component_properties:
for value in (values if isinstance(values, list) else [values]):
this_property = {
"Source": self.name,
"Property": name,
"Value": value
}
if name not in self.virt_ignore:
result.append(this_property)
return result
def get_properties_strings(self, return_simple = False):
result = []
component_properties = self._properties.items()
print(component_properties)
for name, value in component_properties:
simple_property = f"{name}: {value}"
complex_property = {
"Source": self.name,
"Property": simple_property
}
if name not in self.virt_ignore:
if return_simple:
result.append(simple_property)
else:
result.append(complex_property)
for name, values in component_properties:
for value in (values if isinstance(values, list) else [values]):
simple_property = f"{name}: {value}"
complex_property = {
"Source": self.name,
"Property": simple_property
}
if name not in self.virt_ignore:
if return_simple:
result.append(simple_property)
else:
result.append(complex_property)
return result
def get_metrics_keys(self):
@ -318,15 +328,15 @@ class System:
multi_check = component["multi_check"]
# if multi, note that the command in device_list creates the list of things to pipe into this_device
if multi_check:
letters = [chr(c) for c in range(ord('A'), ord('Z')+1)]
print(f"Creating one component of type {component_name} for each one found")
component_type_device_list = get_device_list(component_name)
component_id = 0
for this_device in component_type_device_list:
this_component_letter = letters[component_type_device_list.index(this_device)]
this_component_name = f"{component_name} {this_component_letter}"
this_component_ID = component_type_device_list.index(this_device)
this_component_name = f"{component_name} {this_component_ID}"
print(f"{this_component_name} - {component_name} - {this_device}")
self.add_components(Component(name = this_component_name, comp_type = component_name, this_device = this_device))
new_component = Component(name = this_component_name, comp_type = component_name, this_device = this_device)
self.add_components(new_component)
else:
if debug_output:
@ -538,7 +548,13 @@ def run_command(cmd, zero_only=False, use_shell=True, req_check = True):
def get_device_list(device_type_name: str):
result = []
for component in component_class_tree:
if component["name"] == device_type_name:
precheck_value = 1
if "precheck" in component:
precheck_command = component["precheck"]
precheck_value_output = run_command(precheck_command, zero_only = True)
precheck_value = int(precheck_value_output)
print(f"Precheck found - {precheck_command} - {precheck_value}")
if component["name"] == device_type_name and precheck_value != 0:
device_list_command = component["device_list"]
device_list_result = run_command(device_list_command)
result = device_list_result

View File

@ -21,7 +21,8 @@ app_settings = {
"secure_api" : True,
"push_redis" : False,
"run_background" : True,
"update_frequency": 1
"update_frequency": 1,
"custom_api_port": "5000"
}
with open('cosmostat_settings.yaml', 'r') as f:
@ -60,6 +61,9 @@ def service_gateway_ip():
else:
return "0.0.0.0"
def service_api_port():
return cosmostat_settings["custom_api_port"]
#######################################################################
### Redis Functions
#######################################################################
@ -295,7 +299,7 @@ if __name__ == '__main__':
print("Skipping flask background task")
# Flask API
app.run(debug=False, host=service_gateway_ip(), port=5000)
app.run(debug=False, host=service_gateway_ip(), port=service_api_port())

View File

@ -51,5 +51,48 @@
"metrics": {
"placeholder": ""
}
},
{
"name": "LAN",
"description": "{Device ID} - {Device Name} - {MAC Address}",
"multi_check": "True",
"device_list": "ip link | grep default | grep -v -e docker -e 127.0.0.1 -e br- -e veth -e lo -e tun | cut -d ':' -f 2 | awk '{{print $1}}' ",
"properties": {
"MAC Address": "ip link | grep -A1 ' {this_device}' | grep ether | awk '{{print $2}}'",
"Device Name": "echo {this_device}",
"Device ID": "udevadm info -q property -p $(ls -l /sys/class/net/ | grep {this_device} | cut -d '>' -f2 | cut -b 8- ) | grep ID_MODEL_FROM_DATABASE | cut -d '=' -f2 "
},
"metrics": {
"IP Address": "ip -o -4 ad | grep -v -e docker -e 127.0.0.1 -e br- | grep {this_device} | awk '{{print $4}}'",
"Data Transmitted": "ifconfig {this_device} | grep RX | grep bytes | cut -d '(' -f2 | tr -d ')'",
"Data Received": "ifconfig {this_device} | grep TX | grep bytes | cut -d '(' -f2 | tr -d ')'",
"Link State": "cat /sys/class/net/{this_device}/operstate",
"Link Speed": "cat /sys/class/net/{this_device}/speed"
},
"multi_metrics": [
"IP Address"
]
},
{
"name": "NVGPU",
"description": "NVGPU{Device ID} - {Device Model} with {Memory Size}, Max Power {Maximum Power}",
"multi_check": "True",
"device_list": "nvidia-smi --query-gpu=index --format=csv,noheader,nounits",
"properties": {
"Device Model": "nvidia-smi --id={this_device} --query-gpu=name --format=csv,noheader,nounits",
"Device ID": "echo NVGPU{this_device}",
"Driver Version": "nvidia-smi --id={this_device} --query-gpu=driver_version --format=csv,noheader,nounits",
"Maximum Power": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits",
"Memory Size": "nvidia-smi --id={this_device} --query-gpu=memory.total --format=csv,noheader,nounits"
},
"metrics": {
"Power Draw": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits",
"Used Memory": "nvidia-smi --id={this_device} --query-gpu=memory.used --format=csv,noheader,nounits",
"Temperature": "nvidia-smi --id={this_device} --query-gpu=temperature.gpu --format=csv,noheader,nounits",
"GPU Load": "nvidia-smi --id={this_device} --query-gpu=utilization.gpu --format=csv,noheader,nounits"
},
"precheck": "lspci | grep NV | wc -l"
}
]

View File

@ -1,24 +1,22 @@
[
{
"name": "LAN",
"name": "",
"description": "",
"multi_check": "True",
"device_list": "",
"device_list": " ",
"properties": {
"MAC Address": "",
"Device Name": "",
"Device ID": ""
},
"metrics": {
"IP Address": "",
"MB Transmitted": "",
"MB Received": "",
"Link State": "",
"Link Speed": ""
}
},
"multi_metrics": [
],
"virt_ignore": [
]
},
{
"SATA GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true",
"NVMe GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true"
}
]
]

View File

@ -2,7 +2,7 @@
<html lang="en">
<head>
<meta charset="utf-8">
<title>Matt-Cloud Cosmostat</title>
<title>Cosmostat - <?php echo $_SERVER['SERVER_NAME'] ?></title>
<style>
.components {display:grid; grid-template-columns:repeat(auto-fill, minmax(280px, 1fr)); gap:1rem;}
@ -27,8 +27,28 @@
<!-- PHP to render static components -->
<?php
# load API settings, this requires a simple yaml file
$raw_api_settings = file('/opt/api_settings/cosmostat_settings.yaml', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$api_settings = [];
foreach ($raw_api_settings as $line) {
if ($line[0] === '#') {
continue;
}
$pos = strpos($line, ':');
if ($pos === false) {
continue;
}
$key = trim(substr($line, 0, $pos));
$value = trim(substr($line, $pos + 1));
if ($value === '') {
$value = null;
}
$api_settings[$key] = $value;
}
$dockerGateway = trim($api_settings['docker_gateway'], "\"'") ?? null;
$customApiPort = trim($api_settings['custom_api_port'], "\"'") ?? null;
# load API data
$apiUrl = 'http://192.168.37.1:5000/php_summary';
$apiUrl = 'http://'.$dockerGateway.':'.$customApiPort.'/php_summary';
$context = stream_context_create([
'http' => [
'timeout' => 5, // seconds

View File

@ -9,6 +9,7 @@
"express": "^4.18.2",
"socket.io": "^4.7.2",
"redis": "^4.6.7",
"node-fetch": "^2.6.7"
"node-fetch": "^2.6.7",
"js-yaml": "^4.1.0"
}
}

View File

@ -4,21 +4,39 @@ const express = require('express');
const { createClient } = require('redis');
const { Server } = require('socket.io');
const fetch = require('node-fetch'); // npm i node-fetch@2
const fs = require('fs');
const yaml = require('js-yaml'); // npm i js-yaml
const path = require('path');
const app = express();
const server = http.createServer(app);
const io = new Server(server);
// ---------- Socket.io ----------
/* --------------------------------------------------------------------- */
/* ---------- 1. Load the YAML configuration file ---------------------- */
/* --------------------------------------------------------------------- */
let config = {};
try {
const file = fs.readFileSync(path.resolve(__dirname, 'cosmostat_settings.yaml'), 'utf8');
config = yaml.load(file);
} catch (e) {
console.error('Failed to load config.yaml:', e);
process.exit(1);
}
const API_PORT = config.custom_api_port || 5000; // fallback to 5000
const API_HOST = config.docker_gateway || '192.168.37.1'; // fallback IP
const API_BASE = `http://${API_HOST}:${API_PORT}`;
// ---------------------------------------------------------------------
// ---------- 2. Socket.io ------------------------------------------------
// ---------------------------------------------------------------------
io.on('connection', async socket => {
console.log('client connected:', socket.id);
// Call the external API every time a client connects
try {
const resp = await fetch('http://192.168.37.1:5000/start_timer', {
method: 'GET'
});
const resp = await fetch(`${API_BASE}/start_timer`, { method: 'GET' });
const data = await resp.json();
console.log('API responded to connect:', data);
} catch (err) {
@ -29,9 +47,7 @@ io.on('connection', async socket => {
socket.on('tableRendered', async () => {
console.log('Client reported table rendered - starting timer');
try {
const resp = await fetch('http://192.168.37.1:5000/start_timer', {
method: 'GET'
});
const resp = await fetch(`${API_BASE}/start_timer`, { method: 'GET' });
const text = await resp.text();
console.log('Timer endpoint responded:', text);
} catch (err) {
@ -40,17 +56,19 @@ io.on('connection', async socket => {
});
});
// Serve static files (index.html, etc.)
/* --------------------------------------------------------------------- */
/* ---------- 3. Serve static files ----------------------------------- */
/* --------------------------------------------------------------------- */
app.use(express.static('public'));
// ---------- Redis subscriber ----------
/* --------------------------------------------------------------------- */
/* ---------- 4. Redis subscriber ------------------------------------- */
/* --------------------------------------------------------------------- */
const redisClient = createClient({ url: 'redis://192.168.37.1:6379' });
redisClient.on('error', err => console.error('Redis error', err));
(async () => {
await redisClient.connect();
const sub = redisClient.duplicate(); // duplicate to keep separate pub/sub
await sub.connect();
@ -72,7 +90,9 @@ redisClient.on('error', err => console.error('Redis error', err));
sub.on('error', err => console.error('Subscriber error', err));
})();
// ---------- Start ----------
/* --------------------------------------------------------------------- */
/* ---------- 5. Start the HTTP server --------------------------------- */
/* --------------------------------------------------------------------- */
const PORT = process.env.PORT || 3000;
server.listen(PORT, () => {
console.log(`Server listening on http://localhost:${PORT}`);

View File

@ -29,4 +29,5 @@ push_redis: {{ push_redis }}
run_background : {{ run_background }}
log_output: {{ log_output }}
update_frequency: {{ update_frequency }}
custom_api_port: {{ custom_api_port }}
...

View File

@ -17,6 +17,7 @@ services:
volumes:
- "{{ service_control_web_folder }}/html:/usr/src/app/public"
- "{{ service_control_web_folder }}/node_server:/app"
- "{{ api_service_folder }}/cosmostat_settings.yaml:/app/cosmostat_settings.yaml:ro"
- /app/node_modules
ports:
- "{{ docker_gateway }}:3000:3000"
@ -33,6 +34,7 @@ services:
- "{{ docker_gateway }}:8080:80"
volumes:
- ./html:/var/www/html/
- "{{ api_service_folder }}/cosmostat_settings.yaml:/opt/api_settings/cosmostat_settings.yaml:ro"
networks:
- cosmostat_net
restart: always
@ -42,7 +44,7 @@ services:
container_name: cosmostat_nginx_proxy
image: nginx:latest
ports:
- "{{ (docker_gateway + ':') if not public_dashboard | bool else '' }}80:80"
- "{{ (docker_gateway + ':') if not public_dashboard | bool else '' }}{{ custom_port }}:80"
volumes:
- ./proxy/nginx.conf:/etc/nginx/conf.d/default.conf
networks: