nvidia tested and working, added options for web and api port in jenkinsfile
This commit is contained in:
@ -6,3 +6,7 @@ The web dashboard is built from a stack of Node.js, PHP, and nginx. The Node.js
|
||||
|
||||
The docker stack also uses a network on the 192.168.37.0/24 subnet, and secures all traffic to be within this subnet. The dashboard can only be accessed locally at port 80 on 192.168.37.1 when the secure_api variable is set to true.
|
||||
|
||||
Working on the network descriptor, and I think to make it easier on me i will use json output from ip
|
||||
The problem is that if an interface has multiple IPs, then I have multiple lines of data for IPs
|
||||
I think perhaps I will update my code to be aware of properties that may be arrays
|
||||
I do think this makes more sense than having multiple cards for a single interface...
|
||||
@ -13,6 +13,7 @@ cosmostat_packages:
|
||||
- lm-sensors
|
||||
- jc
|
||||
- smartmontools
|
||||
- inxi
|
||||
|
||||
# python venv packages
|
||||
cosmostat_venv_packages: |
|
||||
@ -47,10 +48,12 @@ api_service_name: "cosmostat_api"
|
||||
api_service_folder: "{{ service_folder }}/api"
|
||||
venv_folder: "{{ service_folder }}/venv"
|
||||
api_service_exe: "{{ venv_folder }}/bin/python -u {{ api_service_folder }}/app.py"
|
||||
custom_api_port: "5000"
|
||||
|
||||
# dashboard vars
|
||||
service_control_web_folder: "{{ service_folder }}/web"
|
||||
public_dashboard: true
|
||||
custom_port: "80"
|
||||
|
||||
# will skip init when true
|
||||
quick_refresh: false
|
||||
|
||||
@ -49,16 +49,25 @@ class Component:
|
||||
# store static properties
|
||||
self.multi_check = self.is_multi()
|
||||
self.virt_ignore = self._descriptor.get('virt_ignore', [])
|
||||
self.multi_metrics = self._descriptor.get('multi_metrics', [])
|
||||
#if 'precheck' in self._descriptor:
|
||||
# precheck_command = self._descriptor.get('precheck', [])
|
||||
# precheck_value = int(run_command(precheck_command, zero_only = True))
|
||||
# if precheck_value == 0:
|
||||
# raise ValueError(f"No devices of type {self.type}")
|
||||
if self.is_virtual:
|
||||
self.virt_ignore = []
|
||||
self._properties: Dict[str, str] = {}
|
||||
self._properties: Dict[str, str | list[str]] = {}
|
||||
for key, command in descriptor.get('properties', {}).items():
|
||||
return_string = True
|
||||
if key in self.multi_metrics:
|
||||
return_string = False
|
||||
if self.this_device != "None":
|
||||
# this means this component type is a multi and the commands need templating for each device
|
||||
formatted_command = command.format(this_device=self.this_device)
|
||||
self._properties[key] = run_command(formatted_command, True)
|
||||
self._properties[key] = run_command(formatted_command, zero_only = return_string)
|
||||
else:
|
||||
self._properties[key] = run_command(command, zero_only = True)
|
||||
self._properties[key] = run_command(command, zero_only = return_string)
|
||||
print(self._properties[key])
|
||||
# build the description string
|
||||
self._description_template: str | None = descriptor.get("description")
|
||||
@ -114,31 +123,32 @@ class Component:
|
||||
component_properties = self._properties.items()
|
||||
else:
|
||||
component_properties = self.get_property(component)
|
||||
for name, value in component_properties:
|
||||
this_property = {
|
||||
"Source": self.name,
|
||||
"Property": name,
|
||||
"Value": value
|
||||
}
|
||||
if name not in self.virt_ignore:
|
||||
result.append(this_property)
|
||||
for name, values in component_properties:
|
||||
for value in (values if isinstance(values, list) else [values]):
|
||||
this_property = {
|
||||
"Source": self.name,
|
||||
"Property": name,
|
||||
"Value": value
|
||||
}
|
||||
if name not in self.virt_ignore:
|
||||
result.append(this_property)
|
||||
return result
|
||||
|
||||
def get_properties_strings(self, return_simple = False):
|
||||
result = []
|
||||
component_properties = self._properties.items()
|
||||
print(component_properties)
|
||||
for name, value in component_properties:
|
||||
simple_property = f"{name}: {value}"
|
||||
complex_property = {
|
||||
"Source": self.name,
|
||||
"Property": simple_property
|
||||
}
|
||||
if name not in self.virt_ignore:
|
||||
if return_simple:
|
||||
result.append(simple_property)
|
||||
else:
|
||||
result.append(complex_property)
|
||||
for name, values in component_properties:
|
||||
for value in (values if isinstance(values, list) else [values]):
|
||||
simple_property = f"{name}: {value}"
|
||||
complex_property = {
|
||||
"Source": self.name,
|
||||
"Property": simple_property
|
||||
}
|
||||
if name not in self.virt_ignore:
|
||||
if return_simple:
|
||||
result.append(simple_property)
|
||||
else:
|
||||
result.append(complex_property)
|
||||
return result
|
||||
|
||||
def get_metrics_keys(self):
|
||||
@ -318,15 +328,15 @@ class System:
|
||||
multi_check = component["multi_check"]
|
||||
# if multi, note that the command in device_list creates the list of things to pipe into this_device
|
||||
if multi_check:
|
||||
letters = [chr(c) for c in range(ord('A'), ord('Z')+1)]
|
||||
print(f"Creating one component of type {component_name} for each one found")
|
||||
component_type_device_list = get_device_list(component_name)
|
||||
|
||||
component_id = 0
|
||||
for this_device in component_type_device_list:
|
||||
this_component_letter = letters[component_type_device_list.index(this_device)]
|
||||
this_component_name = f"{component_name} {this_component_letter}"
|
||||
this_component_ID = component_type_device_list.index(this_device)
|
||||
this_component_name = f"{component_name} {this_component_ID}"
|
||||
print(f"{this_component_name} - {component_name} - {this_device}")
|
||||
self.add_components(Component(name = this_component_name, comp_type = component_name, this_device = this_device))
|
||||
new_component = Component(name = this_component_name, comp_type = component_name, this_device = this_device)
|
||||
self.add_components(new_component)
|
||||
|
||||
else:
|
||||
if debug_output:
|
||||
@ -538,7 +548,13 @@ def run_command(cmd, zero_only=False, use_shell=True, req_check = True):
|
||||
def get_device_list(device_type_name: str):
|
||||
result = []
|
||||
for component in component_class_tree:
|
||||
if component["name"] == device_type_name:
|
||||
precheck_value = 1
|
||||
if "precheck" in component:
|
||||
precheck_command = component["precheck"]
|
||||
precheck_value_output = run_command(precheck_command, zero_only = True)
|
||||
precheck_value = int(precheck_value_output)
|
||||
print(f"Precheck found - {precheck_command} - {precheck_value}")
|
||||
if component["name"] == device_type_name and precheck_value != 0:
|
||||
device_list_command = component["device_list"]
|
||||
device_list_result = run_command(device_list_command)
|
||||
result = device_list_result
|
||||
|
||||
@ -21,7 +21,8 @@ app_settings = {
|
||||
"secure_api" : True,
|
||||
"push_redis" : False,
|
||||
"run_background" : True,
|
||||
"update_frequency": 1
|
||||
"update_frequency": 1,
|
||||
"custom_api_port": "5000"
|
||||
}
|
||||
|
||||
with open('cosmostat_settings.yaml', 'r') as f:
|
||||
@ -60,6 +61,9 @@ def service_gateway_ip():
|
||||
else:
|
||||
return "0.0.0.0"
|
||||
|
||||
def service_api_port():
|
||||
return cosmostat_settings["custom_api_port"]
|
||||
|
||||
#######################################################################
|
||||
### Redis Functions
|
||||
#######################################################################
|
||||
@ -295,7 +299,7 @@ if __name__ == '__main__':
|
||||
print("Skipping flask background task")
|
||||
|
||||
# Flask API
|
||||
app.run(debug=False, host=service_gateway_ip(), port=5000)
|
||||
app.run(debug=False, host=service_gateway_ip(), port=service_api_port())
|
||||
|
||||
|
||||
|
||||
|
||||
@ -51,5 +51,48 @@
|
||||
"metrics": {
|
||||
"placeholder": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "LAN",
|
||||
"description": "{Device ID} - {Device Name} - {MAC Address}",
|
||||
"multi_check": "True",
|
||||
"device_list": "ip link | grep default | grep -v -e docker -e 127.0.0.1 -e br- -e veth -e lo -e tun | cut -d ':' -f 2 | awk '{{print $1}}' ",
|
||||
"properties": {
|
||||
"MAC Address": "ip link | grep -A1 ' {this_device}' | grep ether | awk '{{print $2}}'",
|
||||
"Device Name": "echo {this_device}",
|
||||
"Device ID": "udevadm info -q property -p $(ls -l /sys/class/net/ | grep {this_device} | cut -d '>' -f2 | cut -b 8- ) | grep ID_MODEL_FROM_DATABASE | cut -d '=' -f2 "
|
||||
},
|
||||
"metrics": {
|
||||
"IP Address": "ip -o -4 ad | grep -v -e docker -e 127.0.0.1 -e br- | grep {this_device} | awk '{{print $4}}'",
|
||||
"Data Transmitted": "ifconfig {this_device} | grep RX | grep bytes | cut -d '(' -f2 | tr -d ')'",
|
||||
"Data Received": "ifconfig {this_device} | grep TX | grep bytes | cut -d '(' -f2 | tr -d ')'",
|
||||
"Link State": "cat /sys/class/net/{this_device}/operstate",
|
||||
"Link Speed": "cat /sys/class/net/{this_device}/speed"
|
||||
},
|
||||
"multi_metrics": [
|
||||
"IP Address"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "NVGPU",
|
||||
"description": "NVGPU{Device ID} - {Device Model} with {Memory Size}, Max Power {Maximum Power}",
|
||||
"multi_check": "True",
|
||||
"device_list": "nvidia-smi --query-gpu=index --format=csv,noheader,nounits",
|
||||
"properties": {
|
||||
"Device Model": "nvidia-smi --id={this_device} --query-gpu=name --format=csv,noheader,nounits",
|
||||
"Device ID": "echo NVGPU{this_device}",
|
||||
"Driver Version": "nvidia-smi --id={this_device} --query-gpu=driver_version --format=csv,noheader,nounits",
|
||||
"Maximum Power": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits",
|
||||
"Memory Size": "nvidia-smi --id={this_device} --query-gpu=memory.total --format=csv,noheader,nounits"
|
||||
|
||||
},
|
||||
"metrics": {
|
||||
"Power Draw": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits",
|
||||
"Used Memory": "nvidia-smi --id={this_device} --query-gpu=memory.used --format=csv,noheader,nounits",
|
||||
"Temperature": "nvidia-smi --id={this_device} --query-gpu=temperature.gpu --format=csv,noheader,nounits",
|
||||
"GPU Load": "nvidia-smi --id={this_device} --query-gpu=utilization.gpu --format=csv,noheader,nounits"
|
||||
|
||||
},
|
||||
"precheck": "lspci | grep NV | wc -l"
|
||||
}
|
||||
]
|
||||
@ -1,24 +1,22 @@
|
||||
[
|
||||
{
|
||||
"name": "LAN",
|
||||
"name": "",
|
||||
"description": "",
|
||||
"multi_check": "True",
|
||||
"device_list": "",
|
||||
"device_list": " ",
|
||||
"properties": {
|
||||
"MAC Address": "",
|
||||
"Device Name": "",
|
||||
"Device ID": ""
|
||||
|
||||
},
|
||||
"metrics": {
|
||||
"IP Address": "",
|
||||
"MB Transmitted": "",
|
||||
"MB Received": "",
|
||||
"Link State": "",
|
||||
"Link Speed": ""
|
||||
}
|
||||
},
|
||||
"multi_metrics": [
|
||||
],
|
||||
"virt_ignore": [
|
||||
]
|
||||
},
|
||||
{
|
||||
"SATA GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true",
|
||||
"NVMe GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true"
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Matt-Cloud Cosmostat</title>
|
||||
<title>Cosmostat - <?php echo $_SERVER['SERVER_NAME'] ?></title>
|
||||
|
||||
<style>
|
||||
.components {display:grid; grid-template-columns:repeat(auto-fill, minmax(280px, 1fr)); gap:1rem;}
|
||||
@ -27,8 +27,28 @@
|
||||
<!-- PHP to render static components -->
|
||||
|
||||
<?php
|
||||
# load API settings, this requires a simple yaml file
|
||||
$raw_api_settings = file('/opt/api_settings/cosmostat_settings.yaml', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||
$api_settings = [];
|
||||
foreach ($raw_api_settings as $line) {
|
||||
if ($line[0] === '#') {
|
||||
continue;
|
||||
}
|
||||
$pos = strpos($line, ':');
|
||||
if ($pos === false) {
|
||||
continue;
|
||||
}
|
||||
$key = trim(substr($line, 0, $pos));
|
||||
$value = trim(substr($line, $pos + 1));
|
||||
if ($value === '') {
|
||||
$value = null;
|
||||
}
|
||||
$api_settings[$key] = $value;
|
||||
}
|
||||
$dockerGateway = trim($api_settings['docker_gateway'], "\"'") ?? null;
|
||||
$customApiPort = trim($api_settings['custom_api_port'], "\"'") ?? null;
|
||||
# load API data
|
||||
$apiUrl = 'http://192.168.37.1:5000/php_summary';
|
||||
$apiUrl = 'http://'.$dockerGateway.':'.$customApiPort.'/php_summary';
|
||||
$context = stream_context_create([
|
||||
'http' => [
|
||||
'timeout' => 5, // seconds
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
"express": "^4.18.2",
|
||||
"socket.io": "^4.7.2",
|
||||
"redis": "^4.6.7",
|
||||
"node-fetch": "^2.6.7"
|
||||
"node-fetch": "^2.6.7",
|
||||
"js-yaml": "^4.1.0"
|
||||
}
|
||||
}
|
||||
@ -4,21 +4,39 @@ const express = require('express');
|
||||
const { createClient } = require('redis');
|
||||
const { Server } = require('socket.io');
|
||||
const fetch = require('node-fetch'); // npm i node-fetch@2
|
||||
const fs = require('fs');
|
||||
const yaml = require('js-yaml'); // npm i js-yaml
|
||||
const path = require('path');
|
||||
|
||||
const app = express();
|
||||
const server = http.createServer(app);
|
||||
const io = new Server(server);
|
||||
|
||||
// ---------- Socket.io ----------
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* ---------- 1. Load the YAML configuration file ---------------------- */
|
||||
/* --------------------------------------------------------------------- */
|
||||
let config = {};
|
||||
try {
|
||||
const file = fs.readFileSync(path.resolve(__dirname, 'cosmostat_settings.yaml'), 'utf8');
|
||||
config = yaml.load(file);
|
||||
} catch (e) {
|
||||
console.error('Failed to load config.yaml:', e);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const API_PORT = config.custom_api_port || 5000; // fallback to 5000
|
||||
const API_HOST = config.docker_gateway || '192.168.37.1'; // fallback IP
|
||||
const API_BASE = `http://${API_HOST}:${API_PORT}`;
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// ---------- 2. Socket.io ------------------------------------------------
|
||||
// ---------------------------------------------------------------------
|
||||
io.on('connection', async socket => {
|
||||
console.log('client connected:', socket.id);
|
||||
|
||||
// Call the external API every time a client connects
|
||||
try {
|
||||
const resp = await fetch('http://192.168.37.1:5000/start_timer', {
|
||||
method: 'GET'
|
||||
});
|
||||
|
||||
const resp = await fetch(`${API_BASE}/start_timer`, { method: 'GET' });
|
||||
const data = await resp.json();
|
||||
console.log('API responded to connect:', data);
|
||||
} catch (err) {
|
||||
@ -29,9 +47,7 @@ io.on('connection', async socket => {
|
||||
socket.on('tableRendered', async () => {
|
||||
console.log('Client reported table rendered - starting timer');
|
||||
try {
|
||||
const resp = await fetch('http://192.168.37.1:5000/start_timer', {
|
||||
method: 'GET'
|
||||
});
|
||||
const resp = await fetch(`${API_BASE}/start_timer`, { method: 'GET' });
|
||||
const text = await resp.text();
|
||||
console.log('Timer endpoint responded:', text);
|
||||
} catch (err) {
|
||||
@ -40,17 +56,19 @@ io.on('connection', async socket => {
|
||||
});
|
||||
});
|
||||
|
||||
// Serve static files (index.html, etc.)
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* ---------- 3. Serve static files ----------------------------------- */
|
||||
/* --------------------------------------------------------------------- */
|
||||
app.use(express.static('public'));
|
||||
|
||||
// ---------- Redis subscriber ----------
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* ---------- 4. Redis subscriber ------------------------------------- */
|
||||
/* --------------------------------------------------------------------- */
|
||||
const redisClient = createClient({ url: 'redis://192.168.37.1:6379' });
|
||||
|
||||
redisClient.on('error', err => console.error('Redis error', err));
|
||||
|
||||
(async () => {
|
||||
await redisClient.connect();
|
||||
|
||||
const sub = redisClient.duplicate(); // duplicate to keep separate pub/sub
|
||||
await sub.connect();
|
||||
|
||||
@ -72,7 +90,9 @@ redisClient.on('error', err => console.error('Redis error', err));
|
||||
sub.on('error', err => console.error('Subscriber error', err));
|
||||
})();
|
||||
|
||||
// ---------- Start ----------
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* ---------- 5. Start the HTTP server --------------------------------- */
|
||||
/* --------------------------------------------------------------------- */
|
||||
const PORT = process.env.PORT || 3000;
|
||||
server.listen(PORT, () => {
|
||||
console.log(`Server listening on http://localhost:${PORT}`);
|
||||
|
||||
@ -29,4 +29,5 @@ push_redis: {{ push_redis }}
|
||||
run_background : {{ run_background }}
|
||||
log_output: {{ log_output }}
|
||||
update_frequency: {{ update_frequency }}
|
||||
custom_api_port: {{ custom_api_port }}
|
||||
...
|
||||
@ -17,6 +17,7 @@ services:
|
||||
volumes:
|
||||
- "{{ service_control_web_folder }}/html:/usr/src/app/public"
|
||||
- "{{ service_control_web_folder }}/node_server:/app"
|
||||
- "{{ api_service_folder }}/cosmostat_settings.yaml:/app/cosmostat_settings.yaml:ro"
|
||||
- /app/node_modules
|
||||
ports:
|
||||
- "{{ docker_gateway }}:3000:3000"
|
||||
@ -33,6 +34,7 @@ services:
|
||||
- "{{ docker_gateway }}:8080:80"
|
||||
volumes:
|
||||
- ./html:/var/www/html/
|
||||
- "{{ api_service_folder }}/cosmostat_settings.yaml:/opt/api_settings/cosmostat_settings.yaml:ro"
|
||||
networks:
|
||||
- cosmostat_net
|
||||
restart: always
|
||||
@ -42,7 +44,7 @@ services:
|
||||
container_name: cosmostat_nginx_proxy
|
||||
image: nginx:latest
|
||||
ports:
|
||||
- "{{ (docker_gateway + ':') if not public_dashboard | bool else '' }}80:80"
|
||||
- "{{ (docker_gateway + ':') if not public_dashboard | bool else '' }}{{ custom_port }}:80"
|
||||
volumes:
|
||||
- ./proxy/nginx.conf:/etc/nginx/conf.d/default.conf
|
||||
networks:
|
||||
|
||||
Reference in New Issue
Block a user