add string returns for metrics and properties

This commit is contained in:
2026-03-16 09:17:44 -07:00
parent adb1387693
commit 61421305ed
16 changed files with 524 additions and 390 deletions

View File

@ -1,6 +1,7 @@
# this class file is for the cosmostat service
import subprocess
import json
import time
from typing import Dict, Any, List
# Global Class Vars
@ -8,8 +9,6 @@ global_max_length = 500
debug_output = False
# import the component descriptor
# this outlines how the component class works
# each type of component has a "type"
try:
with open("component_descriptors.json", encoding="utf-8") as f:
component_class_tree: List[Dict] = json.load(f)
@ -18,18 +17,25 @@ except FileNotFoundError as exc:
component_types = [{"name": entry["name"], "multi_check": entry["multi_check"] == "True"} for entry in component_class_tree]
#################################################################
# Component Class
#################################################################
class Component:
############################################################
# instantiate new component
############################################################
def __init__(self, name: str, comp_type: str, this_device="None"):
self.name = name
self.type = comp_type
self.this_device = this_device
print(f"This device - {self.this_device}")
# build the component descriptor dictionary
for component in component_class_tree:
if component["name"] == self.type:
COMPONENT_DESCRIPTORS = component
# Load component type descriptor from class tree
# COMPONENT_DESCRIPTORS = {d['type']: d for d in component_class_tree}
descriptor = COMPONENT_DESCRIPTORS
self._descriptor = descriptor
if descriptor is None:
@ -39,10 +45,10 @@ class Component:
)
# store static properties
self.multi_check = self.is_multi()
self.virt_ignore = self._descriptor.get('virt_ignore', [])
self._properties: Dict[str, str] = {}
for key, command in descriptor.get('properties', {}).items():
if self.this_device != "None":
print(f"command - {command}; this_device - {self.this_device}")
formatted_command = command.format(this_device=self.this_device)
self._properties[key] = run_command(formatted_command, True)
else:
@ -64,6 +70,10 @@ class Component:
f"{self.description}")
return self_string
############################################################
# Class Functions
############################################################
def update_metrics(self):
for key, command in self._descriptor.get('metrics', {}).items():
if self.this_device != "None":
@ -74,20 +84,85 @@ class Component:
else:
self._metrics[key] = run_command(command, True)
def get_property(self, type):
return self._properties[type]
def is_multi(self):
for component_type in component_types:
if self.type == component_type["name"]:
return component_type["multi_check"]
return False
########################################################
# redis data functions
########################################################
def get_properties_keys(self):
result = []
for name, value in self._properties.items():
this_property = {
"Source": self.name,
"Property": name,
"Value": value
}
if name not in self.virt_ignore:
result.append(this_property)
return result
def get_properties_strings(self):
result = []
for name, value in self._properties.items():
this_property = {
"Source": self.name,
"Property": f"{name}: {value}"
}
if name not in self.virt_ignore:
result.append(this_property)
return result
def get_metrics_keys(self):
result = []
empty_value = ["", "null", None, []]
for name, value in self._metrics.items():
this_metric = {
"Source": self.name,
"Metric": name,
"Data": value
}
if value not in empty_value and name not in self.virt_ignore:
result.append(this_metric)
return result
def get_metrics_strings(self):
result = []
empty_value = ["", "null", None, []]
for name, value in self._metrics.items():
this_metric = {
"Source": self.name,
"Metric": f"{name}:{value}"
}
if value not in empty_value and name not in self.virt_ignore:
result.append(this_metric)
return result
########################################################
# random data functions
########################################################
# complex data type return
def get_metrics(self, type = None):
these_metrics = []
if type == None:
for name, value in self._metrics:
these_metrics.append({"name": name, "value": value})
these_metrics.append({"Metric": name, "Data": value})
else:
for name, value in self._metrics:
if name == type:
these_metrics.append({"name": name, "value": value})
these_metrics.append({"Metric": name, "Data": value})
result = {
"name": self.name,
"type": self.type,
"metrics": these_metrics
"Source": self.name,
"Component Type": self.type,
"Metrics": these_metrics
}
return result
@ -96,89 +171,69 @@ class Component:
these_properties = []
if type == None:
for name, value in self._properties.items():
these_properties.append({"name": name, "value": value})
these_properties.append({"Property": name, "Value": value})
else:
for name, value in self._properties.items():
if name == type:
these_properties.append({"name": name, "value": value})
these_properties.append({"Property": name, "Value": value})
result = {
"name": self.name,
"type": self.type,
"properties": these_properties
"Source": self.name,
"Component Type": self.type,
"Properties": these_properties
}
return result
# this gets the value of a specified property, type required
def get_property(self, type):
return self._properties[type]
# returns array of dicts for redis
def get_metrics_keys(self):
result = []
empty_value = ["", "null", None, []]
for name, value in self._metrics.items():
this_metric = {
"name": self.name,
"type": name,
"metric": value
}
if value not in empty_value:
result.append(this_metric)
return result
def get_properties_keys(self):
result = []
for name, value in self._properties.items():
this_property = {
"name": self.name,
"property": name,
"value": value
}
result.append(this_property)
return result
# full data return
def get_description(self):
these_properties = []
for name, value in self._metrics.items():
these_properties.append({"name": name, "value": value})
for name, value in self._properties.items():
these_properties.append({"Property": name, "Value": value})
these_metrics = []
for name, value in self._metrics.items():
these_metrics.append({"name": name, "value": value})
these_metrics.append({"Metric": name, "Data": value})
result = {
"name": self.name,
"type": self.type,
"properties": these_properties,
"metrics": these_metrics
"Source": self.name,
"Type": self.type,
"Properties": these_properties,
"Metrics": these_metrics
}
return result
def is_multi(self):
for component_type in component_types:
if self.type == component_type["name"]:
return component_type["multi_check"]
return False
############################################################
# System Class
# this is a big one...
############################################################
class System:
########################################################
# system variable declarations
# keys to add: model and serial number
########################################################
static_key_variables = [
{"name": "hostname", "command": "hostname"},
{"name": "virt_string", "command": "systemd-detect-virt"}
{"name": "Hostname", "command": "hostname"},
{"name": "Virtual Machine", "command": "echo $([[ \"$(systemd-detect-virt)\" == none ]] && echo False || echo True)"},
{"name": "CPU Architecture:", "command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'"},
{"name": "OS Kernel", "command": "uname -r"},
{"name": "OS Name", "command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2"},
{"name": "Manufacturer", "command": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'"},
{"name": "Product Name", "command": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'"},
{"name": "Serial Number", "command": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'"},
]
dynamic_key_variables = [
{"name": "uptime", "command": "uptime -p"},
{"name": "timestamp", "command": "date '+%D %r'"},
{"name": "System Uptime", "command": "uptime -p"},
{"name": "Current Date", "command": "date '+%D %r'"},
]
# add components based on the class tree
# component_types = [{"name": entry["name"], "multi_check": entry["multi_check"] == "True"} for entry in component_class_tree]
virt_ignore = [
"Product Name",
"Serial Number"
]
########################################################
# instantiate new system
########################################################
def __init__(self, name: str):
# the system needs a name
self.name = name
@ -189,18 +244,29 @@ class System:
# initialize system properties and metrics dicts
self._properties: Dict[str, str] = {}
self._metrics: Dict[str, str] = {}
# timekeeping for websocket
self.recent_check = int(time.time())
# load static keys
for static_key in self.static_key_variables:
command = static_key["command"]
result = run_command(command, True)
if debug_output:
print(f"Static key [{static_key["name"]}] - command [{command}] - output [{result}]")
self._properties[static_key["name"]] = result
if static_key["name"] not in self.virt_ignore:
command = static_key["command"]
result = run_command(command, True)
if debug_output:
print(f'Static key [{static_key["name"]}] - command [{command}] - output [{result}]')
self._properties[static_key["name"]] = result
# initialize live keys
self.update_live_keys()
# initialze components
self.load_components()
def __str__(self):
components_str = "\n".join(f" - {c}" for c in self.components)
return f"System hostname: {self.name}\nComponent Count: {self.get_component_count()}\n{components_str}"
########################################################
# critical class functions
########################################################
# update only system dynamic keys
def update_live_keys(self):
for live_key in self.dynamic_key_variables:
@ -209,7 +275,7 @@ class System:
result = run_command(command, True)
self._metrics[live_key['name']] = result
if debug_output:
print(f"Command {live_key["name"]} - [{command}] Result - [{result}]")
print(f'Command {live_key["name"]} - [{command}] Result - [{result}]')
# update all dynamic keys, including components
def update_system_state(self):
@ -236,7 +302,7 @@ class System:
else:
if debug_output:
print(f"Creating component {component["name"]}")
print(f'Creating component {component["name"]}')
self.add_components(Component(component_name, component_name))
# Add a component to the system
@ -244,6 +310,10 @@ class System:
if debug_output:
print(f"Component description: {component.description}")
self.components.append(component)
########################################################
# helper class functions
########################################################
# Get all components, optionally filtered by type
def get_components(self, component_type: type = None):
@ -259,31 +329,123 @@ class System:
else:
return result[0]
def get_component_strings(self, component_type: type = None):
if component_type is None:
result = []
for component in self.components:
result.append(component.description)
return result
else:
result = []
for component in self.components:
if component.type == component_type:
result.append(component.description)
if component.is_multi():
return result
else:
return result[0]
# get component count
def get_component_count(self):
result = int(len(self.components))
return result
def __str__(self):
components_str = "\n".join(f" - {c}" for c in self.components)
return f"System hostname: {self.name}\nComponent Count: {self.get_component_count()}\n{components_str}"
def is_virtual(self):
virt_check = self._properties.get('virt_ignore', {}).items()
def check_system_timer(self):
time_lapsed = time.time() - float(self.recent_check)
return time_lapsed < 30.0
########################################################
# static metrics redis data functions
########################################################
# return list of all static metrics from system and properties
def get_static_metrics(self, human_readable = False):
result = []
for component_property in self.get_component_properties(human_readable):
result.append(component_property)
for system_property in self.get_system_properties(human_readable):
result.append(system_property)
return result
def get_component_properties(self, human_readable = False):
result = []
for component in self.components:
if human_readable:
for metric in component.get_properties_strings():
result.append(metric)
else:
for metric in component.get_properties_keys():
result.append(metric)
return result
def get_system_properties(self, human_readable = False):
result = []
for name, value in self._properties.items():
if human_readable:
result.append({
"Source": "System",
"Property": f"{name}: {value}"
})
else:
result.append({
"Source": "System",
"Property": name,
"Value": value
})
return result
########################################################
# live metrics redis data functions
########################################################
# return list of all live metrics from system and properties
def get_live_metrics(self, human_readable = False):
result = []
for component_metric in self.get_component_metrics(human_readable):
result.append(component_metric)
for system_metric in self.get_system_metrics(human_readable):
result.append(system_metric)
return result
def get_component_metrics(self, human_readable = False):
result = []
for component in self.components:
if human_readable:
metrics_keys = component.get_metrics_strings()
else:
metrics_keys = component.get_metrics_keys()
for metric in metrics_keys:
result.append(metric)
return result
def get_system_metrics(self, human_readable = False):
if human_readable:
return self.get_system_metric_strings()
else:
return self.get_system_metric_keys()
def get_system_metric_keys(self):
result = []
for name, value in self._metrics.items():
thisvar = {
"Source": "System",
"Metric": name,
"Data": value
}
result.append(thisvar)
# add internal dynamic metrics
result.append({
"Source": "System",
"Metric": "component_count",
"Data": self.get_component_count()
})
return result
def get_system_metric_strings(self):
result = []
for name, value in self._metrics.items():
thisvar = {
"Source": "System",
"Metric": f"{name}: {value}"
}
result.append(thisvar)
# add internal dynamic metrics
result.append({
"Source": "System",
"Metric": f"component_count: {self.get_component_count()}"
})
return result
# straggler functions, might cut them
# return both static and dynamic data
def get_sysvars_summary_keys(self):
result = []
@ -303,70 +465,24 @@ class System:
result.append(thisvar)
return result
# return list of all live metrics from system and properties
def get_live_metrics(self):
result = []
for component_metric in self.get_component_metrics():
result.append(component_metric)
for system_metric in self.get_system_metrics():
result.append(system_metric)
return result
# return array of all component metrics
def get_component_metrics(self):
result = []
for component in self.components:
for metric in component.get_metrics_keys():
result.append(metric)
return result
# return array of all component metrics
def get_component_properties(self):
result = []
for component in self.components:
for metric in component.get_properties_keys():
result.append(metric)
return result
# return array of all system metrics
def get_system_metrics(self):
result = []
for name, value in self._metrics.items():
thisvar = {
"name": "System",
"type": name,
"metric": value
}
result.append(thisvar)
# add component count
result.append({
"name": "System",
"type": "component_count",
"metric": self.get_component_count()
})
return result
def get_system_properties(self):
result = []
for name, value in self._properties.items():
if name == "virt_string":
thisvar = {
"name": "System",
"property": name,
"value": value == "none"
}
def get_component_strings(self, component_type: type = None):
if component_type is None:
result = []
for component in self.components:
result.append(component.description)
return result
else:
result = []
for component in self.components:
if component.type == component_type:
result.append(component.description)
if component.is_multi():
return result
else:
thisvar = {
"name": "System",
"property": name,
"value": value
}
result.append(thisvar)
return result
return result[0]
############################################################
# Helper Functions
# Non-class Helper Functions
############################################################
# subroutine to run a command, return stdout as array unless zero_only then return [0]

View File

@ -76,7 +76,8 @@ def update_redis_channel(redis_channel, data):
def update_redis_server():
# Update Stats Redis Channel
update_redis_channel("host_metrics", get_redis_data())
if cosmostat_system.check_system_timer():
update_redis_channel("host_metrics", get_redis_data(human_readable = False))
# Update history_stats Redis Channel
# update_redis_channel("history_stats", get_component_list())
@ -98,7 +99,12 @@ def static_data():
# redis data
@app.route('/redis_data', methods=['GET'])
def redis_data():
return jsonify(get_redis_data())
return jsonify(get_redis_data(human_readable = False))
# redis strings
@app.route('/redis_strings', methods=['GET'])
def redis_strings():
return jsonify(get_redis_data(human_readable = True))
# full summary
@app.route('/full_summary', methods=['GET'])
@ -110,15 +116,43 @@ def full_summary():
def info():
return jsonify(get_info())
# socket timer
@app.route('/start_timer', methods=['GET'])
def start_timer():
current_timestamp = int(time.time())
cosmostat_system.recent_check = current_timestamp
if app_settings["noisy_test"]:
print(f"Timestamp updated to {cosmostat_system.recent_check}")
return jsonify(
{
"message": "websocket timer reset",
"new_timestamp": cosmostat_system.recent_check
}
)
# socket timer data
@app.route('/timer_data', methods=['GET'])
def timer_data():
time_now = time.time()
time_lapsed = time_now - float(cosmostat_system.recent_check)
result = {
"Time Lapsed": time_lapsed,
"Current Time Value": time_now,
"Last Update Value": float(cosmostat_system.recent_check),
"System Updating": cosmostat_system.check_system_timer()
}
return jsonify(result)
# test route
@app.route('/test', methods=['GET'])
def test():
this_cpu = cosmostat_system.get_components(component_type="CPU")
return jsonify(
{
"component_count:": len(cosmostat_system.components),
"user": jenkins_user_settings(),
"hostname": jenkins_hostname_settings(),
"cpu_model": cosmostat_system.get_components(component_type="CPU").description
"cpu_model": this_cpu[0].description
}
)
@ -128,22 +162,18 @@ def test():
# needs to return array of {name: name, type: type, metrics: metrics}
# for redis table generation, includes system and component metrics
def get_dynamic_data():
return cosmostat_system.get_live_metrics()
def get_dynamic_data(human_readable = False):
return cosmostat_system.get_live_metrics(human_readable)
def get_static_data():
def get_static_data(human_readable = False):
result = []
for metric in cosmostat_system.get_system_properties():
result.append(metric)
for metric in cosmostat_system.get_component_properties():
result.append(metric)
return result
return cosmostat_system.get_static_metrics(human_readable)
def get_redis_data():
def get_redis_data(human_readable = False):
result = []
for metric in get_dynamic_data():
for metric in get_dynamic_data(human_readable):
result.append(metric)
for metric in get_static_data():
for metric in get_static_data(human_readable):
result.append(metric)
return result
@ -171,11 +201,11 @@ def get_info():
component_strings = []
for component in cosmostat_system.get_components():
component_strings.append({"name": component.name, "description": component.description})
system_strings = []
result = {
"hostname": jenkins_hostname_settings(),
"component_strings": component_strings
"component_strings": component_strings,
"system_strings": cosmostat_system.get_sysvars_summary_keys()
}
#for component_string in component_strings:
# for name, description in component_string.items():
@ -199,7 +229,8 @@ def new_cosmos_system():
# Background Loop Function
def background_loop():
# Update all data on the System object
cosmostat_system.update_system_state()
if cosmostat_system.check_system_timer():
cosmostat_system.update_system_state()
if app_settings["push_redis"]:
update_redis_server()
@ -241,7 +272,7 @@ if __name__ == '__main__':
print("Skipping flask background task")
# Flask API
app.run(debug=True, host=service_gateway_ip(), port=5000)
app.run(debug=False, host=service_gateway_ip(), port=5000)

View File

@ -1,46 +1,56 @@
[
{
"name": "CPU",
"description": "{model_name} with {core_count} cores.",
"description": "{CPU Model} with {Core Count} cores.",
"multi_check": "False",
"properties": {
"core_count": "lscpu --json | jq -r '.lscpu[] | select(.field==\"CPU(s):\") | .data'",
"model_name": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Model name:\") | .data'"
"Core Count": "lscpu --json | jq -r '.lscpu[] | select(.field==\"CPU(s):\") | .data'",
"CPU Model": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Model name:\") | .data'",
"Clock Speed": "sudo dmesg | grep MHz | grep tsc | cut -d: -f2 | awk '{print $2 \" \" $3}'"
},
"metrics": {
"1m_load": "cat /proc/loadavg | awk '{print $1}'",
"5m_load": "cat /proc/loadavg | awk '{print $2}'",
"15m_load": "cat /proc/loadavg | awk '{print $3}'"
"15m_load": "cat /proc/loadavg | awk '{print $3}'",
"current_mhz": "less /proc/cpuinfo | grep MHz | cut -d: -f2 | awk '{sum += $1} END {print sum/NR}'"
}
},
{
"name": "RAM",
"description": "Total {bytes_total}GB in {module_count} modules.",
"description": "Total {Total GB}GB in {RAM Module Count} modules.",
"multi_check": "False",
"properties": {
"bytes_total": "sudo lshw -json -c memory | jq -r '.[] | select(.description==\"System Memory\").size' | awk '{printf \"%.2f\\n\", $1/1073741824}'",
"module_count": "sudo lshw -json -c memory | jq -r '.[] | select(.id | contains(\"bank\")) | .id ' | wc -l"
"Total GB": "sudo /usr/bin/lshw -json -c memory | jq -r '.[] | select(.description==\"System Memory\").size' | awk '{printf \"%.2f\\n\", $1/1073741824}'",
"RAM Module Count": "sudo /usr/bin/lshw -json -c memory | jq -r '.[] | select(.id | contains(\"bank\")) | .id ' | wc -l",
"RAM Type": "/usr/sbin/dmidecode --type 17 | grep Type: | sort -u | cut -d: -f2 | xargs",
"RAM Speed": "/usr/sbin/dmidecode --type 17 | grep Speed: | grep -v Configured | sort -u | cut -d: -f2 | xargs",
"RAM Voltage": "/usr/sbin/dmidecode --type 17 | grep 'Configured Voltage' | sort -u | cut -d: -f2 | xargs"
},
"metrics": {
"used_capacity_mb": "free -m | grep Mem | awk '{print $3}'",
"free_capacity_mb": "free -m | grep Mem | awk '{print $4}'"
}
"MB Used": "free -m | grep Mem | awk '{print $3}'",
"MB Free": "free -m | grep Mem | awk '{print $4}'"
},
"virt_ignore": [
"RAM Type",
"RAM Speed",
"RAM Voltage"
]
},
{
"name": "Block Storage",
"description": "{device_id} is of type {drive_type} with capacity of {drive_capacity}.",
"name": "STOR",
"description": "{Device Path} is of type {Drive Type} with capacity of {Total Capacity}.",
"multi_check": "True",
"device_list": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{print $1}'",
"properties": {
"device_name": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print $1}}' | grep {this_device}",
"device_id": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print \"/dev/\"$1}}' | grep {this_device}",
"drive_type": "lsblk -d -o NAME,TRAN | grep {this_device} | awk '{{print $2}}'",
"drive_capacity": "lsblk -d -o NAME,SIZE | grep {this_device} | awk '{{print $2}}'"
"Device Name": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print $1}}' | grep {this_device}",
"Device Path": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print \"/dev/\"$1}}' | grep {this_device}",
"Drive Type": "lsblk -d -o NAME,TRAN | grep {this_device} | awk '{{print $2}}'",
"Total Capacity": "lsblk -d -o NAME,SIZE | grep {this_device} | awk '{{print $2}}'"
},
"metrics": {
"smart_status": "sudo smartctl -x --json /dev/{this_device} | jq -r .smart_status.passed",
"ssd_endurance_string": "sudo smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true",
"nvme_endurance_string": "sudo smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true"
"SMART Check": "/usr/sbin/smartctl -x --json /dev/{this_device} | jq -r .smart_status.passed",
"SATA GBW": "/usr/sbin/smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true",
"NVMe GBW": "/usr/sbin/smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true"
}
}
]