first cosmoserver commit

This commit is contained in:
2026-03-21 21:20:00 -07:00
parent 7c29cbdab5
commit 324eaff135
14 changed files with 1019 additions and 423 deletions

View File

@ -2,7 +2,7 @@
# required system packages # required system packages
cosmostat_packages: cosmostat_packages:
- docker - "{{ 'docker' if x64_arch else 'wmdocker' }}"
- docker.io - docker.io
- docker-compose - docker-compose
- python3 - python3
@ -55,8 +55,9 @@ service_control_web_folder: "{{ service_folder }}/web"
public_dashboard: true public_dashboard: true
custom_port: "80" custom_port: "80"
# will skip init when true # other vars
quick_refresh: false quick_refresh: false
x64_arch: true
# cosmostat_settings # cosmostat_settings
noisy_test: false noisy_test: false
@ -66,4 +67,7 @@ push_redis: true
run_background : true run_background : true
log_output: true log_output: true
update_frequency: "1" update_frequency: "1"
cosmostat_server: false
cosmostat_server_api: "http://10.200.27.20/"
cosmostat_server_reporter: false
... ...

View File

@ -2,80 +2,89 @@
import subprocess import subprocess
import json import json
import time import time
import weakref
import base64, hashlib
from typing import Dict, Any, List from typing import Dict, Any, List
from Cosmos_Settings import *
# Global Class Vars # Global Class Vars
global_max_length = 500 global_max_length = 500
debug_output = False
null_result = [
"",
"null",
None,
[],
"Unknown",
"To Be Filled By O.E.M."
]
# import the component descriptor # import the component descriptor
try: try:
with open("component_descriptors.json", encoding="utf-8") as f: with open("descriptors.json", encoding="utf-8") as f:
component_class_tree: List[Dict] = json.load(f) component_class_tree: List[Dict] = json.load(f)
except FileNotFoundError as exc: except FileNotFoundError as exc:
raise RuntimeError("Descriptor file not found") from exc raise RuntimeError("Descriptor file not found") from exc
component_types = [{"name": entry["name"], "multi_check": entry["multi_check"] == "True"} for entry in component_class_tree] component_types = []
for entry in component_class_tree:
if entry["name"] != "System":
component_types.append({"name": entry["name"], "multi_check": entry["multi_check"] == "True"})
#################################################################
################################################################# #################################################################
# Component Class # Component Class
################################################################# #################################################################
#################################################################
class Component: class Component:
############################################################ ############################################################
# instantiate new component # instantiate new component
# this_device is set when the component has multiple instances
############################################################ ############################################################
def __init__(self, name: str, comp_type: str, this_device="None", is_virtual = "True"):
def __init__(self, name: str, comp_type: str, parent_system, this_device=None):
# begin init
self.name = name self.name = name
self.type = comp_type self.type = comp_type
self.parent_system = weakref.ref(parent_system)
# this variable is set when the device can have multiples # this variable is set when the device can have multiples
# it indicates that the commands in the descriptor might need templating # it indicates that the commands in the descriptor might need templating
self.this_device = this_device self.this_device = this_device
self.is_virtual = is_virtual self.is_virtual = parent_system.is_virtual()
print(f"This device - {self.this_device}") self.cpu_arch = parent_system.get_system_arch()
if self.this_device is None:
log_data(log_output = f"This device - {self.name}", log_level = "log_output")
else:
log_data(log_output = f"This device - {self.this_device}", log_level = "log_output")
# build the component descriptor dictionary # build the component descriptor dictionary
for component in component_class_tree: self._descriptor = self._parse_descriptor()
if component["name"] == self.type:
COMPONENT_DESCRIPTORS = component
descriptor = COMPONENT_DESCRIPTORS
self._descriptor = descriptor
if descriptor is None:
raise ValueError(
f"Component type '{comp_type}' is not defined in the "
f"component descriptor tree."
)
# store static properties # store static properties
self.multi_check = self.is_multi() self.multi_check = self.is_multi()
self.virt_ignore = self._descriptor.get('virt_ignore', []) self.virt_ignore = self._descriptor.get('virt_ignore', [])
self.multi_metrics = self._descriptor.get('multi_metrics', []) self.multi_metrics = self._descriptor.get('multi_metrics', [])
#if 'precheck' in self._descriptor: self.arch_check = self._descriptor.get('arch_check', [])
# precheck_command = self._descriptor.get('precheck', [])
# precheck_value = int(run_command(precheck_command, zero_only = True))
# if precheck_value == 0:
# raise ValueError(f"No devices of type {self.type}")
if self.is_virtual: if self.is_virtual:
self.virt_ignore = [] self.virt_ignore = []
# initialize properties
self._properties: Dict[str, str | list[str]] = {} self._properties: Dict[str, str | list[str]] = {}
for key, command in descriptor.get('properties', {}).items(): self._process_properties()
return_string = True
if key in self.multi_metrics: # build the description string, requires the properties first
return_string = False self._description_template: str | None = self._descriptor.get("description")
if self.this_device != "None":
# this means this component type is a multi and the commands need templating for each device
formatted_command = command.format(this_device=self.this_device)
self._properties[key] = run_command(formatted_command, zero_only = return_string)
else:
self._properties[key] = run_command(command, zero_only = return_string)
print(self._properties[key])
# build the description string
self._description_template: str | None = descriptor.get("description")
self.description = self._description_template.format(**self._properties) self.description = self._description_template.format(**self._properties)
# initialize metrics # initialize metrics
self._metrics: Dict[str, str] = {} self._metrics: Dict[str, str] = {}
self.update_metrics() self.update_metrics()
def __str__(self): def __str__(self):
self_string = (f"Component name: {self.name}, type: {self.type} - " self_string = (f"Component name: {self.name}, type: {self.type} - "
f"{self.description}") f"{self.description}")
@ -92,13 +101,21 @@ class Component:
def update_metrics(self): def update_metrics(self):
for key, command in self._descriptor.get('metrics', {}).items(): for key, command in self._descriptor.get('metrics', {}).items():
if self.this_device != "None": log_data(log_output = f"Key: {key} - Command: {command}", log_level = "noisy_test")
formatted_command = command.format(this_device=self.this_device) formatted_command = command
this_metric = run_command(formatted_command, True) if self.arch_check is not None:
if this_metric is not None: arch_variance = self._descriptor.get('arch_variance', {})
self._metrics[key] = this_metric if key in arch_variance:
else: if self.cpu_arch in formatted_command:
self._metrics[key] = run_command(command, zero_only = True) formatted_command = command[self.cpu_arch]
else:
formatted_command = f"echo Missing {self.cpu_arch} command"
if self.this_device is not None:
formatted_command = formatted_command.format(this_device=self.this_device)
if formatted_command is not None:
result = run_command(formatted_command, zero_only = True)
if result not in null_result:
self._metrics[key] = result
def get_property(self, type = None): def get_property(self, type = None):
if type == None: if type == None:
@ -112,8 +129,52 @@ class Component:
return component_type["multi_check"] return component_type["multi_check"]
return False return False
# return descriptor for this device type
def _parse_descriptor(self):
for component in component_class_tree:
if component["name"] == self.type:
COMPONENT_DESCRIPTORS = component
descriptor = COMPONENT_DESCRIPTORS
if descriptor is None:
raise ValueError(
f"Component type '{comp_type}' is not defined in the "
f"component descriptor tree."
)
return descriptor
# iterate over all properties to process descriptor
def _process_properties(self):
for key, command in self._descriptor.get('properties', {}).items():
return_string = True
if key in self.multi_metrics:
return_string = False
formatted_command = self._parse_command(key, command, return_string)
log_data(log_output = f"Property {key} - command: {formatted_command}", log_level = "debug_output")
result = run_command(formatted_command, zero_only = return_string)
if result not in null_result:
self._properties[key] = result
# helper function to parse command key
def _parse_command(self, key: str, command: str | list[str], return_string = True):
result_command = command
log_data(log_output = f"_parse_command - {command}", log_level = "debug_output")
if self.arch_check:
# since the keys are stored with the arch variable this can be concise
arch_variance = self._descriptor.get('arch_variance', {})
if key in arch_variance:
if self.cpu_arch in result_command:
log_data(log_output = f"arch_variance - {key} - {result_command}", log_level = "debug_output")
result_command = result_command[self.cpu_arch]
else:
result_command = f"echo Missing {self.cpu_arch} command"
if self.this_device is not None:
# template the key if the component type can have multiples
result_command = command.format(this_device=self.this_device)
log_data(log_output = f"result - {result_command}", log_level = "debug_output")
return result_command
######################################################## ########################################################
# redis data functions # keyed data functions
######################################################## ########################################################
def get_properties_keys(self, component = None): def get_properties_keys(self, component = None):
@ -230,9 +291,10 @@ class Component:
} }
return result return result
############################################################
############################################################ ############################################################
# System Class # System Class
# this is a big one... ############################################################
############################################################ ############################################################
class System: class System:
@ -241,25 +303,18 @@ class System:
# system variable declarations # system variable declarations
######################################################## ########################################################
static_key_variables = [ for component in component_class_tree:
{"name": "Hostname", "command": "hostname"}, if component["name"] == "System":
{"name": "Virtual Machine", "command": 'echo $( [ "$(systemd-detect-virt)" = none ] && echo False || echo True )', "req_check": "False"}, SYSTEM_DESCRIPTOR = component
{"name": "CPU Architecture:", "command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'"}, descriptor = SYSTEM_DESCRIPTOR
{"name": "OS Kernel", "command": "uname -r"}, if descriptor is None:
{"name": "OS Name", "command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2"}, raise ValueError(
{"name": "Manufacturer", "command": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'"}, f"Component type 'System' is not defined in the "
{"name": "Product Name", "command": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'"}, f"component descriptor tree."
{"name": "Serial Number", "command": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'"}, )
] static_key_variables = descriptor["static_key_variables"]
dynamic_key_variables = [ dynamic_key_variables = descriptor["dynamic_key_variables"]
{"name": "System Uptime", "command": "uptime -p"}, virt_ignore = descriptor["virt_ignore"]
{"name": "Current Date", "command": "date '+%D %r'"},
]
virt_ignore = [
"Product Name",
"Serial Number"
]
######################################################## ########################################################
# instantiate new system # instantiate new system
@ -268,8 +323,9 @@ class System:
def __init__(self, name: str): def __init__(self, name: str):
# the system needs a name # the system needs a name
self.name = name self.name = name
if debug_output: log_data(log_output = f"System initializing, name {self.name}", log_level = "debug_output")
print(f"System initializing, name {self.name}") self.uuid = run_command(cmd = "cat /etc/machine-id", zero_only = True)
self.short_id = self.short_uuid(self.uuid)
# system contains an array of component objects # system contains an array of component objects
self.components = [] self.components = []
self.component_class_tree = component_class_tree self.component_class_tree = component_class_tree
@ -277,6 +333,7 @@ class System:
self._properties: Dict[str, str] = {} self._properties: Dict[str, str] = {}
self._metrics: Dict[str, str] = {} self._metrics: Dict[str, str] = {}
self._virt_string = run_command('systemd-detect-virt', zero_only = True, req_check = False) self._virt_string = run_command('systemd-detect-virt', zero_only = True, req_check = False)
self._virt_ignore = self.virt_ignore self._virt_ignore = self.virt_ignore
if self._virt_string == "none": if self._virt_string == "none":
self._virt_ignore = [] self._virt_ignore = []
@ -285,36 +342,56 @@ class System:
# load static keys # load static keys
for static_key in self.static_key_variables: for static_key in self.static_key_variables:
if static_key["name"] not in self._virt_ignore: if static_key["name"] not in self._virt_ignore:
command = static_key["command"] self.process_property(static_key = static_key)
if "req_check" in static_key:
result = run_command(command, zero_only = True, req_check = static_key["req_check"])
else:
result = run_command(command, zero_only = True)
if debug_output:
print(f'Static key [{static_key["name"]}] - command [{command}] - output [{result}]')
self._properties[static_key["name"]] = result
# initialize live keys # initialize live keys
self.update_live_keys() self.update_live_keys()
# initialze components # initialze components
self.load_components() for component in component_types:
self.create_component(component)
def __str__(self): def __str__(self):
components_str = "\n".join(f" - {c}" for c in self.components) components_str = "\n".join(f" - {c}" for c in self.components)
return f"System hostname: {self.name}\nComponent Count: {self.get_component_count()}\n{components_str}" return f"System hostname: {self.name}\nComponent Count: {self.get_component_count()}\n{components_str}"
def __repr__(self):
self_string = f"Cosmostat Client {self.short_id}"
def short_uuid(self, value: str, length=8):
hasher = hashlib.md5()
hasher.update(value.encode('utf-8'))
full_hex = hasher.hexdigest()
return full_hex[:length]
######################################################## ########################################################
# critical class functions # critical class functions
######################################################## ########################################################
# process static keys
def process_property(self, static_key):
command = static_key["command"]
if "arch_check" in static_key:
arch_string = run_command("lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'", zero_only = True)
if arch_string in command:
command = command[arch_string]
else:
command = f"echo Missing {arch_string} command"
if "req_check" in static_key:
result = run_command(command, zero_only = True, req_check = static_key["req_check"])
else:
result = run_command(command, zero_only = True)
log_data(log_output = f'Static key [{static_key["name"]}] - command [{command}] - output [{result}]', log_level = "debug_output")
if result not in null_result:
self._properties[static_key["name"]] = result
# update only system dynamic keys # update only system dynamic keys
def update_live_keys(self): def update_live_keys(self):
for live_key in self.dynamic_key_variables: for live_key in self.dynamic_key_variables:
if live_key['command'] is not None: if live_key['command'] is not None:
command = live_key['command'] command = live_key['command']
result = run_command(command, zero_only = True) result = run_command(command, zero_only = True)
self._metrics[live_key['name']] = result if result not in null_result:
if debug_output: self._metrics[live_key['name']] = result
print(f'Command {live_key["name"]} - [{command}] Result - [{result}]') log_data(log_output = f'Command {live_key["name"]} - [{command}] Result - [{result}]', log_level = "noisy_test")
# update all dynamic keys, including components # update all dynamic keys, including components
def update_system_state(self): def update_system_state(self):
@ -322,33 +399,25 @@ class System:
for component in self.components: for component in self.components:
component.update_metrics() component.update_metrics()
# check for components # component creation helper
def load_components(self): def create_component(self, component):
for component in component_types: component_name = component["name"]
component_name = component["name"] multi_check = component["multi_check"]
multi_check = component["multi_check"] # if multi, note that the command in device_list creates the list of things to pipe into this_device
# if multi, note that the command in device_list creates the list of things to pipe into this_device if multi_check:
if multi_check: log_data(log_output = f"Creating one component of type {component_name} for each one found", log_level = "log_output")
print(f"Creating one component of type {component_name} for each one found") component_type_device_list = get_device_list(component_name)
component_type_device_list = get_device_list(component_name) component_id = 0
component_id = 0 for this_device in component_type_device_list:
for this_device in component_type_device_list: this_component_ID = component_type_device_list.index(this_device)
this_component_ID = component_type_device_list.index(this_device) this_component_name = f"{component_name} {this_component_ID}"
this_component_name = f"{component_name} {this_component_ID}" log_data(log_output = f"{this_component_name} - {component_name} - {this_device}", log_level = "debug_output")
print(f"{this_component_name} - {component_name} - {this_device}") new_component = Component(name = this_component_name, comp_type = component_name, this_device = this_device, parent_system = self)
new_component = Component(name = this_component_name, comp_type = component_name, this_device = this_device) self.components.append(new_component)
self.add_components(new_component) else:
log_data(log_output = f'Creating component {component["name"]}', log_level = "debug_output")
else: new_component = Component(name = component_name, comp_type = component_name, parent_system = self)
if debug_output: self.components.append(new_component)
print(f'Creating component {component["name"]}')
self.add_components(Component(name = component_name, comp_type = component_name, is_virtual = self.is_virtual()))
# Add a component to the system
def add_components(self, component: Component,):
if debug_output:
print(f"Component description: {component.description}")
self.components.append(component)
######################################################## ########################################################
# helper class functions # helper class functions
@ -380,7 +449,7 @@ class System:
def is_virtual(self): def is_virtual(self):
vm_check = self.get_property('Virtual Machine') vm_check = self.get_property('Virtual Machine')
print(f'vm_check: {vm_check}') log_data(log_output = f'vm_check: {vm_check}', log_level = "debug_output")
return vm_check return vm_check
def check_system_timer(self): def check_system_timer(self):
@ -390,6 +459,9 @@ class System:
def get_component_class_tree(self): def get_component_class_tree(self):
return self.component_class_tree return self.component_class_tree
def get_system_arch(self):
return self.get_property("CPU Architecture")
######################################################## ########################################################
# static metrics redis data functions # static metrics redis data functions
######################################################## ########################################################
@ -493,43 +565,6 @@ class System:
"Metric": f"component_count: {self.get_component_count()}" "Metric": f"component_count: {self.get_component_count()}"
}) })
return result return result
# straggler functions, might cut them
# return both static and dynamic data
def get_sysvars_summary_keys(self):
result = []
for name, value in self._properties.items():
thisvar = {
"name": "System Class Property",
"type": name,
"value": value
}
result.append(thisvar)
for name, value in self._metrics.items():
thisvar = {
"name": "System Class Metric",
"type": name,
"value": value
}
result.append(thisvar)
return result
def get_component_strings(self, component_type: type = None):
if component_type is None:
result = []
for component in self.components:
result.append(component.description)
return result
else:
result = []
for component in self.components:
if component.type == component_type:
result.append(component.description)
if component.is_multi():
return result
else:
return result[0]
############################################################ ############################################################
# Non-class Helper Functions # Non-class Helper Functions
@ -557,10 +592,12 @@ def get_device_list(device_type_name: str):
precheck_command = component["precheck"] precheck_command = component["precheck"]
precheck_value_output = run_command(precheck_command, zero_only = True) precheck_value_output = run_command(precheck_command, zero_only = True)
precheck_value = int(precheck_value_output) precheck_value = int(precheck_value_output)
print(f"Precheck found - {precheck_command} - {precheck_value}") log_data(log_output = f"Precheck found - {precheck_command} - {precheck_value}", log_level = "log_output")
if component["name"] == device_type_name and precheck_value != 0: if component["name"] == device_type_name and precheck_value != 0:
device_list_command = component["device_list"] device_list_command = component["device_list"]
device_list_result = run_command(device_list_command) device_list_result = run_command(device_list_command)
result = device_list_result result = device_list_result
return result return result

View File

@ -0,0 +1,85 @@
import yaml
#######################################################################
### Settings Handler Functions
#######################################################################
# default application setting variables
app_settings = {
"noisy_test" : False,
"debug_output" : True,
"log_output" : True,
"secure_api" : True,
"push_redis" : False,
"run_background" : True,
"cosmostat_server": False,
"cosmostat_server_reporter": False,
"update_frequency": 1,
"custom_api_port": "5000",
"cosmostat_server_api": "http://10.200.27.20:5000/"
}
with open('cosmostat_settings.yaml', 'r') as f:
print("Loading cosmostat_settings file")
cosmostat_settings = yaml.safe_load(f)
print("...Done")
# initialize system variables from settings file
print("Checking for system var overrides")
for setting in app_settings:
if setting in cosmostat_settings:
cosmos_setting = cosmostat_settings[setting]
if app_settings["debug_output"]:
print(f"{setting}: {cosmos_setting}")
app_settings[setting] = cosmos_setting
print("...Done")
# this returns the docker gateway from the settings
def docker_gateway_settings() -> str:
return cosmostat_settings["docker_gateway"]
# this returns the jenkins user that ran the pipeline
def jenkins_user_settings() -> str:
return cosmostat_settings["jenkins_user"]
# this returns the ansible_hostname from setup
def jenkins_hostname_settings() -> str:
return cosmostat_settings["ansible_hostname"]
# this returns the inventory_generation_timestamp
def jenkins_inventory_generation_timestamp_settings() -> str:
return cosmostat_settings["inventory_generation_timestamp"]
def run_cosmostat_server():
return cosmostat_settings["cosmostat_server"]
def service_gateway_ip():
if cosmostat_settings["secure_api"]:
return docker_gateway_settings()
else:
return "0.0.0.0"
def cosmostat_server_api():
return cosmostat_settings["cosmostat_server_api"]
def service_api_port():
return cosmostat_settings["custom_api_port"]
def log_data(log_output:str, log_level = cosmostat_settings["noisy_test"]):
log_levels = [
"noisy_test",
"debug_output",
"log_output",
]
if log_level in log_levels:
if cosmostat_settings[log_level]:
print(log_output)
else:
print(f"Warning - {log_level} not valid log level")
#for level_check in log_levels:
# if log_level == level_check:
# print(f"log_level: {log_level} - level_check - {level_check}")
# print(log_output)

75
files/api/Cosmostat.py Normal file
View File

@ -0,0 +1,75 @@
# This will be a class definitation for the cosmostat server
# On the server, there will be a Cosmostat Class Object
# This will have an array of System Class Objects
# These will be created based on API input from remote systems
# The remote systems will submit a json of their state to a private API
# this will define the System Class
import subprocess
import json
import time
import weakref
import base64, hashlib
from typing import Dict, Any, List
from Cosmos_Settings import *
#################################################################
#################################################################
# Cosmostat Class
#################################################################
#################################################################
class Cosmostat:
############################################################
# instantiate new Cosmostat server
############################################################
def __init__(self, name: str):
# the system needs a name, should be equal to the uuid of the client
self.name = name
self.short_id = self.short_uuid(self.name)
log_data(log_output = f"Cosmostat Server {self.short_id} initializing", log_level = "log_output")
# system contains an array of keys with component objects
self.systems = []
def __str__(self):
self_string = f"Cosmostat Server {self.short_id}"
return self_string
def __repr__(self):
self_string = f"Cosmostat Server {self.short_id}"
def add_system(self, system_dictionary: dict):
new_system_key = {
"data_timestamp": time.time(),
"uuid": system_dictionary["uuid"],
"short_id": system_dictionary["short_id"],
"client_properties": system_dictionary["client_properties"],
"redis_data": {}
}
log_data(log_output = f"Client system {system_dictionary["short_id"]} added", log_level = "log_output")
self.systems.append(new_system_key)
def update_system(self, system_state: {}, system_uuid: str):
this_system = self.get_system(system_uuid)
this_system["redis_data"] = system_state
this_system["data_timestamp"] = time.time()
log_data(log_output = f"Client system {this_system["short_id"]} addupdateded", log_level = "log_output")
return this_system["data_timestamp"]
def get_system(self, system_uuid: str) -> dict:
result = {}
for system in self.systems:
if system["uuid"] == system_uuid:
return system
return result
def short_uuid(self, value: str, length=8):
hasher = hashlib.md5()
hasher.update(value.encode('utf-8'))
full_hex = hasher.hexdigest()
return full_hex[:length]

View File

@ -3,67 +3,14 @@ from flask_apscheduler import APScheduler
from typing import Dict, Union from typing import Dict, Union
import json, time, redis, yaml import json, time, redis, yaml
import base64, hashlib
from Components import * from Components import *
from Cosmos_Settings import *
# declare flask apps # declare flask apps
app = Flask(__name__) app = Flask(__name__)
scheduler = APScheduler() scheduler = APScheduler()
#######################################################################
### Settings Handler Functions
#######################################################################
# default application setting variables
app_settings = {
"noisy_test" : False,
"debug_output" : True,
"log_output" : True,
"secure_api" : True,
"push_redis" : False,
"run_background" : True,
"update_frequency": 1,
"custom_api_port": "5000"
}
with open('cosmostat_settings.yaml', 'r') as f:
print("Loading cosmostat_settings file")
cosmostat_settings = yaml.safe_load(f)
print("...Done")
# initialize system variables from settings file
print("Checking for system var overrides")
for setting in app_settings:
if setting in cosmostat_settings:
cosmos_setting = cosmostat_settings[setting]
if app_settings["debug_output"]:
print(f"{setting}: {cosmos_setting}")
app_settings[setting] = cosmos_setting
print("...Done")
# this returns the docker gateway from the settings
def docker_gateway_settings() -> str:
return cosmostat_settings["docker_gateway"]
# this returns the jenkins user that ran the pipeline
def jenkins_user_settings() -> str:
return cosmostat_settings["jenkins_user"]
# this returns the ansible_hostname from setup
def jenkins_hostname_settings() -> str:
return cosmostat_settings["ansible_hostname"]
# this returns the inventory_generation_timestamp
def jenkins_inventory_generation_timestamp_settings() -> str:
return cosmostat_settings["inventory_generation_timestamp"]
def service_gateway_ip():
if cosmostat_settings["secure_api"]:
return docker_gateway_settings()
else:
return "0.0.0.0"
def service_api_port():
return cosmostat_settings["custom_api_port"]
####################################################################### #######################################################################
### Redis Functions ### Redis Functions
####################################################################### #######################################################################
@ -74,19 +21,42 @@ r = redis.Redis(host=service_gateway_ip(), port=6379)
def update_redis_channel(redis_channel, data): def update_redis_channel(redis_channel, data):
# Publish to the specified Redis channel # Publish to the specified Redis channel
r.publish(redis_channel, json.dumps(data)) r.publish(redis_channel, json.dumps(data))
if app_settings["noisy_test"]: log_data(log_output = data, log_level = "noisy_test")
print(f"{redis_channel} Redis Update")
print(data)
def update_redis_server(): def update_redis_server():
# Update Stats Redis Channel # Client Redis Tree
if cosmostat_system.check_system_timer(): if not run_cosmostat_server():
update_redis_channel("host_metrics", get_redis_data(human_readable = False)) if cosmostat_client.check_system_timer():
update_redis_channel("host_metrics", get_client_redis_data(human_readable = False))
if run_cosmostat_server():
update_redis_channel("client_summary", get_server_redis_data())
# Server Redis Tree
# Update history_stats Redis Channel # Update history_stats Redis Channel
# update_redis_channel("history_stats", get_component_list()) # update_redis_channel("history_stats", get_component_list())
def get_client_redis_data(human_readable = False):
result = []
for metric in get_dynamic_data(human_readable):
result.append(metric)
#for metric in get_static_data(human_readable):
# result.append(metric)
return result
def get_server_redis_data():
result = []
for client in cosmostat_server.systems:
this_client_key = {
"uuid": client["uuid"],
"short_id": client["short_id"],
"redis_data": client["redis_data"]
}
result.append(this_client_key)
return result
####################################################################### #######################################################################
### Flask Routes ### Client Flask Routes
####################################################################### #######################################################################
# dynamic data # dynamic data
@ -103,44 +73,33 @@ def static_data():
# redis data # redis data
@app.route('/redis_data', methods=['GET']) @app.route('/redis_data', methods=['GET'])
def redis_data(): def redis_data():
return jsonify(get_redis_data(human_readable = False)) return jsonify(get_client_redis_data(human_readable = False))
# redis strings # redis strings
@app.route('/redis_strings', methods=['GET']) @app.route('/redis_strings', methods=['GET'])
def redis_strings(): def redis_strings():
return jsonify(get_redis_data(human_readable = True)) return jsonify(get_client_redis_data(human_readable = True))
# full summary
@app.route('/full_summary', methods=['GET'])
def full_summary():
return jsonify(get_full_summary())
# php summary # php summary
@app.route('/php_summary', methods=['GET']) @app.route('/php_summary', methods=['GET'])
def php_summary(): def php_summary():
return jsonify(get_php_summary()) return jsonify(get_php_summary())
# system info # return full descriptor
@app.route('/info', methods=['GET'])
def info():
return jsonify(get_info())
# system info
@app.route('/descriptor', methods=['GET']) @app.route('/descriptor', methods=['GET'])
def descriptor(): def descriptor():
return jsonify(get_descriptor()) return jsonify(get_descriptor())
# socket timer # socket timer handler
@app.route('/start_timer', methods=['GET']) @app.route('/start_timer', methods=['GET'])
def start_timer(): def start_timer():
current_timestamp = int(time.time()) current_timestamp = int(time.time())
cosmostat_system.recent_check = current_timestamp cosmostat_client.recent_check = current_timestamp
if app_settings["noisy_test"]: log_data(log_output = f"Timestamp updated to {cosmostat_client.recent_check}", log_level = "noisy_test")
print(f"Timestamp updated to {cosmostat_system.recent_check}")
return jsonify( return jsonify(
{ {
"message": "websocket timer reset", "message": "websocket timer reset",
"new_timestamp": cosmostat_system.recent_check "new_timestamp": cosmostat_client.recent_check
} }
) )
@ -148,22 +107,22 @@ def start_timer():
@app.route('/timer_data', methods=['GET']) @app.route('/timer_data', methods=['GET'])
def timer_data(): def timer_data():
time_now = time.time() time_now = time.time()
time_lapsed = time_now - float(cosmostat_system.recent_check) time_lapsed = time_now - float(cosmostat_client.recent_check)
result = { result = {
"Time Lapsed": time_lapsed, "Time Lapsed": time_lapsed,
"Current Time Value": time_now, "Current Time Value": time_now,
"Last Update Value": float(cosmostat_system.recent_check), "Last Update Value": float(cosmostat_client.recent_check),
"System Updating": cosmostat_system.check_system_timer() "System Updating": cosmostat_client.check_system_timer()
} }
return jsonify(result) return jsonify(result)
# test route # test route
@app.route('/test', methods=['GET']) @app.route('/test', methods=['GET'])
def test(): def test():
this_cpu = cosmostat_system.get_components(component_type="CPU") this_cpu = cosmostat_client.get_components(component_type="CPU")
return jsonify( return jsonify(
{ {
"component_count:": len(cosmostat_system.components), "component_count:": len(cosmostat_client.components),
"user": jenkins_user_settings(), "user": jenkins_user_settings(),
"hostname": jenkins_hostname_settings(), "hostname": jenkins_hostname_settings(),
"cpu_model": this_cpu[0].description "cpu_model": this_cpu[0].description
@ -171,66 +130,22 @@ def test():
) )
####################################################################### #######################################################################
### Flask Helpers ### Client Flask Helpers
####################################################################### #######################################################################
# needs to return array of {name: name, type: type, metrics: metrics} # needs to return array of {name: name, type: type, metrics: metrics}
# for redis table generation, includes system and component metrics # for redis table generation, includes system and component metrics
def get_dynamic_data(human_readable = False): def get_dynamic_data(human_readable = False):
return cosmostat_system.get_live_metrics(human_readable) return cosmostat_client.get_live_metrics(human_readable)
def get_static_data(human_readable = False): def get_static_data(human_readable = False):
result = [] result = []
return cosmostat_system.get_static_metrics(human_readable) return cosmostat_client.get_static_metrics(human_readable)
# php is about to start rendering static data
def get_redis_data(human_readable = False):
result = []
for metric in get_dynamic_data(human_readable):
result.append(metric)
#for metric in get_static_data(human_readable):
# result.append(metric)
return result
def get_full_summary():
live_metrics = cosmostat_system.get_live_metrics()
system_components = cosmostat_system.get_component_strings()
system_info = get_info()
result = {
"system_settings":
{
"user": jenkins_user_settings(),
"hostname": jenkins_hostname_settings(),
"timestamp": jenkins_inventory_generation_timestamp_settings(),
"component_count:": len(cosmostat_system.components),
"object_name": cosmostat_system.name,
"docker_gateway": docker_gateway_settings()
},
"live_metrics": live_metrics,
"system_components": system_components,
"system_info": system_info
}
return result
def get_info():
component_strings = []
for component in cosmostat_system.get_components():
component_strings.append({"name": component.name, "description": component.description})
result = {
"hostname": jenkins_hostname_settings(),
"component_strings": component_strings,
"system_strings": cosmostat_system.get_sysvars_summary_keys()
}
#for component_string in component_strings:
# for name, description in component_string.items():
# result[name] = description
return result
def get_php_summary(): def get_php_summary():
system_properties = cosmostat_system.get_system_properties(human_readable = True) system_properties = cosmostat_client.get_system_properties(human_readable = True)
system_components = [] system_components = []
for component in cosmostat_system.get_components(): for component in cosmostat_client.get_components():
this_component = { this_component = {
"component_name": component.name, "component_name": component.name,
"info_strings": component.get_properties_strings(return_simple = True) "info_strings": component.get_properties_strings(return_simple = True)
@ -245,53 +160,244 @@ def get_php_summary():
return result return result
def get_descriptor(): def get_descriptor():
return cosmostat_system.get_component_class_tree() return cosmostat_client.get_component_class_tree()
def generate_state_definition():
result = {
"uuid": cosmostat_client.uuid,
"state_definition": get_php_summary()
}
return result
####################################################################### #######################################################################
### Other Functions ### Server Flask Routes
####################################################################### #######################################################################
# instantiate and return the System object # update client on server
def new_cosmos_system(): @app.route('/update_client', methods=['GET'])
new_system = System(f"{jenkins_hostname_settings()}") def update_client():
if app_settings["log_output"]: result = {}
print(f"New system object name: {new_system.name} - {new_system.get_component_count()} components:") # check the request and return payload if all good
for component in new_system.components: payload = client_submit_check(request = request, dict_name = "redis_data")
print(component.description) this_client = cosmostat_server.get_system(uuid = payload["uuid"])
return new_system result = run_update_client(this_client)
return jsonify(result), 200
# create client on server
@app.route('/create_client', methods=['GET'])
def create_client():
result = {}
# check the request and return payload if all good
payload = client_submit_check(request = request, dict_name = "client_properties")
this_client = cosmostat_server.get_system(uuid = payload["uuid"])
result = run_create_client(this_client)
return jsonify(result), 200
# api to validate Cosmostat Class
@app.route('/client_summary', methods=['GET'])
def client_summary():
client_summary = get_client_summary()
return jsonify()
#######################################################################
### Server Flask Helpers
#######################################################################
# update client on server
def run_update_client(this_client):
if this_client == {}:
return { "message": "client not found" }
update_status = f"updated client {this_client.short_id}"
timestamp_update = cosmostat_server.update_system(system_state = payload, system_uuid = payload["uuid"])
return {
"status": update_status,
"uuid": payload["uuid"],
"timestamp": timestamp_update
}
# create client on server
def run_create_client(this_client):
update_status = f"created client {this_client.short_id}"
timestamp_update = cosmostat_server.create_system(system_state = payload, system_uuid = payload["uuid"])
return {
"status": update_status,
"uuid": payload["uuid"],
"timestamp": timestamp_update
}
# flask submission check fucntion
def client_submit_check(request, dict_name: str):
required_keys = {"uuid", "short_id", "data_timestamp", dict_name}
if not request.is_json:
logging.warning("Received non-JSON request")
return jsonify({"error": "Content-type must be application/json"}), 400
payload = request.get_json(silent=True)
if payload is None:
logging.warning("Malformed JSON body")
return jsonify({"error": "Malformed JSON"}), 400
missing = required_keys - payload.keys()
if missing:
raise ValueError(f"Missing required keys: {', '.join(sorted(missing))}")
return payload
# generate cosmostat server summary
def get_client_summary():
result = []
for client in cosmostat_server.systems:
this_client_properties = client.get_system_properties(human_readable = True)
this_client_components = []
for component in client.get_components():
this_component = {
"component_name": component.name,
"info_strings": component.get_properties_strings(return_simple = True)
}
this_client_components.append(this_component)
this_client = {
"client_properties": this_client_properties,
"client_components": this_client_components
}
result.append(this_client)
return result
#######################################################################
### Cosmostat Client Subroutines
#######################################################################
# Cosmostat Client Reporter
def client_update(this_client: dict, api_endpoint = "update_client"):
# set variables for API call
this_uuid = cosmostat_client.uuid
this_short_id = cosmostat_client.short_id
this_timestamp = time.time()
api_url = f"{cosmostat_server_api()}{api_endpoint}"
# generate payload
payload = {
"uuid": this_uuid,
"short_id": this_short_id,
"data_timestamp": this_timestamp, # Unix epoch float
"redis_data": get_client_redis_data(human_readable = False),
}
# execute API call
result = client_submission_handler()
if (
isinstance(result, dict)
and result.get("message", "").lower() == "client not found"
):
# if client not found, create client
if api_endpoint == "update_client":
client_initialize()
raise RuntimeError("Client not found - initializing")
return result
# Cosmostat Client Initializer
def client_initialize():
# set variables for API call
this_uuid = cosmostat_client.uuid
this_short_id = cosmostat_client.short_id
this_timestamp = time.time()
api_url = f"{cosmostat_server_api()}create_client"
# generate payload
payload = {
"uuid": this_uuid,
"short_id": this_short_id,
"data_timestamp": this_timestamp, # Unix epoch float
"client_properties": get_php_summary(),
}
# execute API call
result = client_submission_handler()
return result
# Cosmostat Client API Reporting Handler
def client_submission_handler():
result = None
try:
# `json=` automatically sets Content-Type to application/json
response: Response = requests.post(api_url, json=payload, timeout=timeout)
response.raise_for_status() # raise HTTPError for 4xx/5xx
except RequestException as exc:
# Wrap the low-level exception in a more descriptive one
raise RuntimeError(
f"Failed to POST to {url!r}: {exc}"
) from exc
# process reply from API
try:
result = response.json()
except ValueError as exc:
raise RuntimeError(
f"Server responded with non-JSON payload: {response.text!r}"
) from exc
return result
# Background Loop Function #######################################################################
def background_loop():
# Update all data on the System object
if cosmostat_system.check_system_timer():
cosmostat_system.update_system_state()
if app_settings["push_redis"]:
update_redis_server()
if app_settings["noisy_test"]:
print("Sorry about the mess...")
print(f"Blame {jenkins_user_settings()}")
####################################################################### #######################################################################
### Main Subroutine ### Main Subroutine
####################################################################### #######################################################################
#######################################################################
if __name__ == '__main__': if __name__ == '__main__':
# instantiate system ######################################
cosmostat_system = new_cosmos_system() ### Main Functions
######################################
# instantiate and return the Client System object
def new_cosmos_client():
new_client = System(f"{jenkins_hostname_settings()}")
log_data(log_output = f"New System object name: {new_client.name} - {new_client.get_component_count()} components:", log_level = "log_output")
for component in new_client.components:
log_data(log_output = component.description, log_level = "log_output")
return new_client
# instantiate and return the Cosmoserver System object
def new_cosmos_server():
new_server = Cosmoserver(cosmostat_client.uuid)
log_data(log_output = f"New Cosmostat object name: {new_server.name}", log_level = "log_output")
return new_server
# Background Loop Function
def background_loop():
# Update all data on the System object
if cosmostat_client.check_system_timer():
cosmostat_client.update_system_state()
if app_settings["push_redis"]:
update_redis_server()
if app_settings["cosmostat_server_reporter"]:
client_update()
######################################
# instantiate client
######################################
cosmostat_client = new_cosmos_client()
if app_settings["cosmostat_server_reporter"]:
client_initialize()
######################################
# instantiate server
######################################
cosmostat_server = None
if run_cosmostat_server():
cosmostat_server = new_cosmos_server()
######################################
# send initial stats update to redis # send initial stats update to redis
######################################
if app_settings["push_redis"]: if app_settings["push_redis"]:
update_redis_server() update_redis_server()
######################################
# Flask scheduler for scanner # Flask scheduler for scanner
######################################
if app_settings["run_background"]: if app_settings["run_background"]:
if app_settings["log_output"]: log_data(log_output = "Loading flask background subroutine...", log_level = "log_output")
print("Loading flask background subroutine...")
scheduler.add_job(id='background_loop', scheduler.add_job(id='background_loop',
func=background_loop, func=background_loop,
@ -300,13 +406,14 @@ if __name__ == '__main__':
scheduler.init_app(app) scheduler.init_app(app)
scheduler.start() scheduler.start()
if app_settings["log_output"]: log_data(log_output = "...Done", log_level = "log_output")
print("...Done")
else: else:
if app_settings["log_output"]: log_data(log_output = "Skipping flask background task", log_level = "log_output")
print("Skipping flask background task")
######################################
# Flask API # Flask API
######################################
app.run(debug=False, host=service_gateway_ip(), port=service_api_port()) app.run(debug=False, host=service_gateway_ip(), port=service_api_port())

View File

@ -1,98 +0,0 @@
[
{
"name": "CPU",
"description": "{CPU Model} with {Core Count} cores.",
"multi_check": "False",
"properties": {
"Core Count": "lscpu --json | jq -r '.lscpu[] | select(.field==\"CPU(s):\") | .data'",
"CPU Model": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Model name:\") | .data'",
"Clock Speed": "sudo dmesg | grep MHz | grep tsc | cut -d: -f2 | awk '{print $2 \" \" $3}'"
},
"metrics": {
"1m_load": "cat /proc/loadavg | awk '{print $1}'",
"5m_load": "cat /proc/loadavg | awk '{print $2}'",
"15m_load": "cat /proc/loadavg | awk '{print $3}'",
"current_mhz": "less /proc/cpuinfo | grep MHz | cut -d: -f2 | awk '{sum += $1} END {print sum/NR}'"
}
},
{
"name": "RAM",
"description": "Total {Total GB}GB in {RAM Module Count} modules.",
"multi_check": "False",
"properties": {
"Total GB": "sudo /usr/bin/lshw -json -c memory | jq -r '.[] | select(.description==\"System Memory\").size' | awk '{printf \"%.2f\\n\", $1/1073741824}'",
"RAM Module Count": "sudo /usr/bin/lshw -json -c memory | jq -r '.[] | select(.id | contains(\"bank\")) | .id ' | wc -l",
"RAM Type": "sudo /usr/sbin/dmidecode --type 17 | grep Type: | sort -u | cut -d: -f2 | xargs",
"RAM Speed": "sudo /usr/sbin/dmidecode --type 17 | grep Speed: | grep -v Configured | sort -u | cut -d: -f2 | xargs",
"RAM Voltage": "sudo /usr/sbin/dmidecode --type 17 | grep 'Configured Voltage' | sort -u | cut -d: -f2 | xargs"
},
"metrics": {
"MB Used": "free -m | grep Mem | awk '{print $3}'",
"MB Free": "free -m | grep Mem | awk '{print $4}'"
},
"virt_ignore": [
"RAM Type",
"RAM Speed",
"RAM Voltage"
]
},
{
"name": "LAN",
"description": "{Device ID} - {Device Name} - {MAC Address}",
"multi_check": "True",
"device_list": "ip link | grep default | grep -v -e docker -e 127.0.0.1 -e br- -e veth -e lo -e tun | cut -d ':' -f 2 | awk '{{print $1}}' ",
"properties": {
"MAC Address": "ip link | grep -A1 ' {this_device}' | grep ether | awk '{{print $2}}'",
"Device Name": "echo {this_device}",
"Device ID": "udevadm info -q property -p $(ls -l /sys/class/net/ | grep {this_device} | cut -d '>' -f2 | cut -b 8- ) | grep ID_MODEL_FROM_DATABASE | cut -d '=' -f2 "
},
"metrics": {
"IP Address": "ip -o -4 ad | grep -v -e docker -e 127.0.0.1 -e br- | grep {this_device} | awk '{{print $4}}'",
"Data Transmitted": "ifconfig {this_device} | grep RX | grep bytes | cut -d '(' -f2 | tr -d ')'",
"Data Received": "ifconfig {this_device} | grep TX | grep bytes | cut -d '(' -f2 | tr -d ')'",
"Link State": "cat /sys/class/net/{this_device}/operstate",
"Link Speed": "cat /sys/class/net/{this_device}/speed || true"
},
"multi_metrics": [
"IP Address"
]
},
{
"name": "NVGPU",
"description": "NVGPU{Device ID} - {Device Model} with {Memory Size}, Max Power {Maximum Power}",
"multi_check": "True",
"device_list": "nvidia-smi --query-gpu=index --format=csv,noheader,nounits",
"properties": {
"Device Model": "nvidia-smi --id={this_device} --query-gpu=name --format=csv,noheader,nounits",
"Device ID": "echo NVGPU{this_device}",
"Driver Version": "nvidia-smi --id={this_device} --query-gpu=driver_version --format=csv,noheader,nounits",
"Maximum Power": "nvidia-smi --id={this_device} --query-gpu=power.limit --format=csv,noheader,nounits",
"Memory Size": "nvidia-smi --id={this_device} --query-gpu=memory.total --format=csv,noheader,nounits"
},
"metrics": {
"Power Draw": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits",
"Used Memory": "nvidia-smi --id={this_device} --query-gpu=memory.used --format=csv,noheader,nounits",
"Temperature": "nvidia-smi --id={this_device} --query-gpu=temperature.gpu --format=csv,noheader,nounits",
"GPU Load": "nvidia-smi --id={this_device} --query-gpu=utilization.gpu --format=csv,noheader,nounits"
},
"precheck": "lspci | grep NV | wc -l"
},
{
"name": "STOR",
"description": "{Device Path} is of type {Drive Type} with capacity of {Total Capacity}.",
"multi_check": "True",
"device_list": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{print $1}'",
"properties": {
"Device Name": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print $1}}' | grep {this_device}",
"Device Path": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print \"/dev/\"$1}}' | grep {this_device}",
"Drive Type": "lsblk -d -o NAME,TRAN | grep {this_device} | awk '{{print $2}}'",
"Total Capacity": "lsblk -d -o NAME,SIZE | grep {this_device} | awk '{{print $2}}'",
"SMART Check": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r .smart_status.passed"
},
"metrics": {
"placeholder": ""
}
}
]

196
files/api/descriptors.json Normal file
View File

@ -0,0 +1,196 @@
[
{
"name": "System",
"static_key_variables": [
{
"name": "Hostname",
"command": "hostname"
},
{
"name": "Virtual Machine",
"command": "echo $( [ \"$(systemd-detect-virt)\" = none ] && echo False || echo True )",
"req_check": "False"
},
{
"name": "CPU Architecture",
"command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'"
},
{
"name": "OS Kernel",
"command": "uname -r"
},
{
"name": "OS Name",
"command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2"
},
{
"name": "Manufacturer",
"command":{
"x86_64": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'",
"aarch64": ""
},
"arch_check": "true"
},
{
"name": "Product Name",
"command": {
"x86_64": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'",
"aarch64": "lshw -C system -json -disable NVMe -disable usb -disable 'PCI (Legacy)' -disable PCI -disable pci| jq -r '.[] | .product'"
},
"arch_check": "true"
},
{
"name": "Serial Number",
"command": {
"x86_64": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'",
"aarch64": "lshw -C system -json -disable NVMe -disable usb -disable 'PCI (Legacy)' -disable PCI -disable pci| jq -r '.[] | .serial'"
},
"arch_check": "true"
}
],
"dynamic_key_variables": [
{
"name": "System Uptime",
"command": "uptime -p"
},
{
"name": "Current Date",
"command": "date '+%D %r'"
}
],
"virt_ignore": [
"Product Name",
"Serial Number"
]
},
{
"name": "CPU",
"description": "{CPU Model} with {Core Count} cores.",
"multi_check": "False",
"properties": {
"Core Count": "lscpu --json | jq -r '.lscpu[] | select(.field==\"CPU(s):\") | .data'",
"CPU Model": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Model name:\") | .data' | xargs",
"Clock Speed": {
"x86_64": "sudo dmesg | grep MHz | grep tsc | cut -d: -f2 | awk '{print $2 \" \" $3}'",
"aarch64": "lscpu --json | jq -r '.lscpu[] | select(.field==\"CPU max MHz:\") | .data ' | xargs "
}
},
"notes": "clock speed doesn't work on ARM",
"metrics": {
"1m_load": "cat /proc/loadavg | awk '{{print $1}}'",
"5m_load": "cat /proc/loadavg | awk '{{print $2}}'",
"15m_load": "cat /proc/loadavg | awk '{{print $3}}'",
"current_mhz": {
"x86_64": "cat /proc/cpuinfo | grep MHz | cut -d: -f2 | awk '{{sum += $1}} END {{print sum/NR}}'",
"aarch64": "echo unknown"
}
},
"arch_check": "True",
"arch_variance": [
"current_mhz",
"Clock Speed"
]
},
{
"name": "RAM",
"description": "Total {Total GB}GB in {RAM Module Count} modules.",
"multi_check": "False",
"properties": {
"Total GB": {
"x86_64": "sudo /usr/bin/lshw -json -c memory -disable NVMe -disable usb | jq -r '.[] | select(.description==\"System Memory\").size' | awk '{{printf \"%.2f\\n\", $1/1073741824}}'",
"aarch64": "sudo /usr/bin/lshw -json -c memory -disable NVMe -disable usb | jq -r '.[] | select(.description==\"System memory\").size' | awk '{{printf \"%.2f\\n\", $1/1073741824}}'"
},
"RAM Module Count": {
"x86_64": "sudo /usr/bin/lshw -json -c memory -disable NVMe -disable usb | jq -r '.[] | select(.id | contains(\"bank\")) | .id ' | wc -l"
},
"RAM Type": {
"x86_64": "sudo /usr/sbin/dmidecode --type 17 | grep Type: | sort -u | cut -d: -f2 | xargs",
"aarch64": "echo none"
},
"RAM Speed": {
"x86_64": "sudo /usr/sbin/dmidecode --type 17 | grep Speed: | grep -v Configured | sort -u | cut -d: -f2 | xargs",
"aarch64": "echo none"
},
"RAM Voltage": {
"x86_64": "sudo /usr/sbin/dmidecode --type 17 | grep 'Configured Voltage' | sort -u | cut -d: -f2 | xargs",
"aarch64": "echo none"
}
},
"metrics": {
"MB Used": "free -m | grep Mem | awk '{print $3}'",
"MB Free": "free -m | grep Mem | awk '{print $4}'"
},
"virt_ignore": [
"RAM Type",
"RAM Speed",
"RAM Voltage"
],
"arch_check": "True",
"arch_variance": [
"Total GB",
"RAM Module Count",
"RAM Type",
"RAM Speed",
"RAM Voltage"
]
},
{
"name": "LAN",
"description": "{Device ID} - {Device Name} - {MAC Address}",
"multi_check": "True",
"device_list": "ip link | grep default | grep -v -e docker -e 127.0.0.1 -e br- -e veth -e lo -e tun | cut -d ':' -f 2 | awk '{{print $1}}' ",
"properties": {
"MAC Address": "ip link | grep -A1 ' {this_device}' | grep ether | awk '{{print $2}}'",
"Device Name": "echo {this_device}",
"Device ID": "udevadm info -q property -p $(ls -l /sys/class/net/ | grep {this_device} | cut -d '>' -f2 | cut -b 8- ) | grep ID_MODEL_FROM_DATABASE | cut -d '=' -f2 "
},
"metrics": {
"IP Address": "ip -o -4 ad | grep -v -e docker -e 127.0.0.1 -e br- | grep {this_device} | awk '{{print $4}}'",
"Data Transmitted": "ifconfig {this_device} | grep RX | grep bytes | cut -d '(' -f2 | tr -d ')'",
"Data Received": "ifconfig {this_device} | grep TX | grep bytes | cut -d '(' -f2 | tr -d ')'",
"Link State": "cat /sys/class/net/{this_device}/operstate",
"Link Speed": "cat /sys/class/net/{this_device}/speed || true"
},
"multi_metrics": [
"IP Address"
]
},
{
"name": "NVGPU",
"description": "NVGPU{Device ID} - {Device Model} with {Memory Size}, Max Power {Maximum Power}",
"multi_check": "True",
"device_list": "nvidia-smi --query-gpu=index --format=csv,noheader,nounits",
"properties": {
"Device Model": "nvidia-smi --id={this_device} --query-gpu=name --format=csv,noheader,nounits",
"Device ID": "echo NVGPU{this_device}",
"Driver Version": "nvidia-smi --id={this_device} --query-gpu=driver_version --format=csv,noheader,nounits",
"Maximum Power": "nvidia-smi --id={this_device} --query-gpu=power.limit --format=csv,noheader,nounits",
"Memory Size": "nvidia-smi --id={this_device} --query-gpu=memory.total --format=csv,noheader,nounits"
},
"metrics": {
"Power Draw": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits",
"Used Memory": "nvidia-smi --id={this_device} --query-gpu=memory.used --format=csv,noheader,nounits",
"Temperature": "nvidia-smi --id={this_device} --query-gpu=temperature.gpu --format=csv,noheader,nounits",
"GPU Load": "nvidia-smi --id={this_device} --query-gpu=utilization.gpu --format=csv,noheader,nounits"
},
"precheck": "lspci | grep NVIDIA | wc -l"
},
{
"name": "STOR",
"description": "{Device Path} is of type {Drive Type} with capacity of {Total Capacity}.",
"multi_check": "True",
"device_list": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{print $1}'",
"properties": {
"Device Name": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print $1}}' | grep {this_device}",
"Device Path": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print \"/dev/\"$1}}' | grep {this_device}",
"Drive Type": "lsblk -d -o NAME,TRAN | grep {this_device} | awk '{{print $2}}'",
"Total Capacity": "lsblk -d -o NAME,SIZE | grep {this_device} | awk '{{print $2}}'",
"SMART Check": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r .smart_status.passed"
},
"metrics": {
"placeholder": ""
}
}
]

View File

@ -3,17 +3,123 @@
"name": "", "name": "",
"description": "", "description": "",
"multi_check": "True", "multi_check": "True",
"device_list": " ", "device_list": "command to list all devices of this type for iteration",
"properties": { "properties": {
"property_name": "shell command to display said property",
"property_with_variance":{
"x86_64": "this structure works with metrics also",
"aarch64": "the code calls based on the key name"
}
}, },
"metrics": { "metrics": {
"metric_name": "shell command to display said metric"
}, },
"multi_metrics": [ "multi_metrics": [
"array",
"of metric names",
"for devices with",
"multiple instances"
], ],
"virt_ignore": [ "virt_ignore": [
"array",
"of metrics",
"or properties",
"to skip when",
"running on a VM"
],
"precheck": "if there is a chance this device might be absent, this is the shell command to check for its presence",
"arch_check": "If there have to be different commands for different architechures, this is the solution",
"arch_variance": [
"array",
"of metrics",
"or properties",
"which have variance"
] ]
}, },
{
"static_key_variables": [
{"name": "Hostname", "command": "hostname"},
{"name": "Virtual Machine", "command": "echo $( [ \"$(systemd-detect-virt)\" = none ] && echo False || echo True )", "req_check": "False"},
{"name": "CPU Architecture", "command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'"},
{"name": "OS Kernel", "command": "uname -r"},
{"name": "OS Name", "command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2"},
{"name": "Manufacturer", "command": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'"},
{"name": "Product Name", "command": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'"},
{"name": "Serial Number", "command": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'"}
],
"dynamic_key_variables": [
{"name": "System Uptime", "command": "uptime -p"},
{"name": "Current Date", "command": "date '+%D %r'"}
],
"virt_ignore": [
"Product Name",
"Serial Number"
]
},
{
"name:": "System",
"static_key_variables": [
{
"name": "Hostname",
"command": "hostname"
},
{
"name": "Virtual Machine",
"command": "echo $( [ \"$(systemd-detect-virt)\" = none ] && echo False || echo True )",
"req_check": "False"
},
{
"name": "CPU Architecture",
"command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'"
},
{
"name": "OS Kernel",
"command": "uname -r"
},
{
"name": "OS Name",
"command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2"
},
{
"name": "Manufacturer",
"command":{
"x86_64": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'"
},
"arch_check": "true"
},
{
"name": "Product Name",
"command": {
"x86_64": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'"
},
"arch_check": "true"
},
{
"name": "Serial Number",
"command": {
"x86_64": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'"
},
"arch_check": "true"
}
],
"dynamic_key_variables": [
{
"name": "System Uptime",
"command": "uptime -p"
},
{
"name": "Current Date",
"command": "date '+%D %r'"
}
],
"virt_ignore": [
"Product Name",
"Serial Number"
]
},
{ {
"SATA GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true", "SATA GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true",
"NVMe GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true" "NVMe GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true"

46
files/api/shrink.py Normal file
View File

@ -0,0 +1,46 @@
def get_properties_keys(self, component = None):
component_properties = []
if component == None:
component_properties = self._properties.items()
else:
component_properties = self.get_property(component)
result = self.process_key_list(key_items = component_properties, key_name = "Property", return_type = "key" key_value = "Value")
return result
def get_metrics_keys(self):
result = self.process_key_list(key_items = self._metrics.items(), key_name = "Metric", key_value = "Data", return_type = "key")
return result
def get_properties_strings(self, return_simple = True):
result = self.process_key_list(key_items = self._properties.items(), key_name = "Property", return_type = "string", return_simple = return_simple)
return result
def get_metrics_strings(self, return_simple = True):
result = self.process_key_list(key_items = self._metrics.items(), key_name = "Metric", return_type = "string", return_simple = return_simple)
return result
def process_key_list(self, key_items: str, key_name: str, return_type: str, key_value = "none"):
result = []
empty_value = ["", "null", None, []]
for name, values in key_items:
for value in (values if isinstance(values, list) else [values]):
if value not in empty_value and name not in self.virt_ignore:
this_key_string = f"{name}: {value}"
if return_simple:
result.append(this_key_string)
elif return_keys:
this_key_value = {
"Source": self.name,
key_name: name,
key_value: value
}
result.append(this_key_value)
else:
complex_key_string = {
"Source": self.name,
key_name: this_key_string
}
result.append(complex_key_string)
return result

View File

@ -18,7 +18,7 @@ server_name localhost;
proxy_set_header Host $host; proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $sceme; proxy_set_header X-Forwarded-Proto $scheme;
} }
# --------------------------------------- # ---------------------------------------

View File

@ -1,12 +1,21 @@
--- ---
# get arch
- name: Cosmostat - Init - Check CPU Arch
shell: "dpkg --print-architecture"
register: cpu_architecture_output
- name: Cosmostat - Init - Set x64_arch variable
when: "'arm64' in cpu_architecture_output.stdout"
set_fact:
x64_arch: false
# package handler # package handler
- name: Cosmostat - Init - Get installed package list - name: Cosmostat - Init - Get installed package list
when: dpkg_output is undefined
shell: "dpkg --list | grep ii | awk '{print $2}'" shell: "dpkg --list | grep ii | awk '{print $2}'"
register: dpkg_output register: dpkg_output
- name: Cosmostat - Init - Install Prereq Packages - name: Cosmostat - Init - Install Prereq Packages
when: cosmostat_packages_item not in dpkg_output.stdout when: cosmostat_packages_item not in dpkg_output.stdout_lines
apt: apt:
name: name:
- "{{ cosmostat_packages_item }}" - "{{ cosmostat_packages_item }}"
@ -16,12 +25,25 @@
loop_var: cosmostat_packages_item loop_var: cosmostat_packages_item
# docker network for cosmostat service # docker network for cosmostat service
- name: Cosmostat - Init - Set Up docker network - name: Cosmostat - Init - Check for docker network
community.docker.docker_network: shell: "ip -o -4 ad | grep {{ docker_gateway }} | wc -l"
name: "cosmostat_net" register: docker_network_register
driver: bridge
ipam_config: - name: Cosmostat - Init - Run Network Handlers
- subnet: "{{ docker_subnet }}" when: docker_network_register.stdout | int == 0
block:
- name: Cosmostat - Init - Set Up docker network x64
when: x64_arch | bool
community.docker.docker_network:
name: "cosmostat_net"
driver: bridge
ipam_config:
- subnet: "{{ docker_subnet }}"
- name: Cosmostat - Init - Set Up docker network arm64
when: not x64_arch | bool
shell: "docker network create --driver bridge --subnet {{ docker_subnet }} cosmostat_net"
# allow service_user to sudo lshw without a password # allow service_user to sudo lshw without a password
- name: Cosmostat - Init - cosmos user sudoers file creation - name: Cosmostat - Init - cosmos user sudoers file creation

8
tasks/server.yaml Normal file
View File

@ -0,0 +1,8 @@
---
# this will be ran to install the full cosmostat server dashboard
...

View File

@ -3,6 +3,11 @@
# This part sets up cosmostat web dashboard # This part sets up cosmostat web dashboard
############################################### ###############################################
- name: Cosmostat - Web - stop containers
when: not quick_refresh | bool
shell: "docker-compose -f {{ service_control_web_folder }}/docker-compose.yaml down"
ignore_errors: yes
# Create web Folder # Create web Folder
- name: "Cosmostat - Web - create {{ service_control_web_folder }}" - name: "Cosmostat - Web - create {{ service_control_web_folder }}"
file: file:
@ -31,6 +36,6 @@
register: docker_output register: docker_output
- debug: | - debug: |
msg="{{ docker_output.stdout_lines }}" msg="{{ docker_output.stdout_lines }}"
msg="{{ docker_output.stderr_lines }}" msg="{{ docker_output.stderr_lines }}"
... ...

View File

@ -21,7 +21,7 @@ ansible_hostname: "{{ ansible_hostname }}"
docker_subnet: "{{ docker_subnet }}" docker_subnet: "{{ docker_subnet }}"
docker_gateway: "{{ docker_gateway }}" docker_gateway: "{{ docker_gateway }}"
# python system variables # python system variables, no quotes for bool or int
secure_api: {{ secure_api }} secure_api: {{ secure_api }}
noisy_test: {{ noisy_test }} noisy_test: {{ noisy_test }}
debug_output: {{ debug_output }} debug_output: {{ debug_output }}
@ -30,4 +30,7 @@ run_background : {{ run_background }}
log_output: {{ log_output }} log_output: {{ log_output }}
update_frequency: {{ update_frequency }} update_frequency: {{ update_frequency }}
custom_api_port: {{ custom_api_port }} custom_api_port: {{ custom_api_port }}
cosmostat_server: {{ cosmostat_server }}
cosmostat_server_api: "{{ cosmostat_server_api }}"
cosmostat_server_reporter: {{ cosmostat_server_reporter }}
... ...