From 324eaff13548882a9fb3be2a03c44b1d0f454fe9 Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 21 Mar 2026 21:20:00 -0700 Subject: [PATCH] first cosmoserver commit --- defaults/main.yaml | 8 +- files/api/Components.py | 325 +++++++++++--------- files/api/Cosmos_Settings.py | 85 ++++++ files/api/Cosmostat.py | 75 +++++ files/api/app.py | 437 +++++++++++++++++---------- files/api/component_descriptors.json | 98 ------ files/api/descriptors.json | 196 ++++++++++++ files/api/new_descriptors.json | 112 ++++++- files/api/shrink.py | 46 +++ files/web/proxy/nginx.conf | 2 +- tasks/init.yaml | 38 ++- tasks/server.yaml | 8 + tasks/web.yaml | 7 +- templates/cosmostat_settings.yaml | 5 +- 14 files changed, 1019 insertions(+), 423 deletions(-) create mode 100644 files/api/Cosmos_Settings.py create mode 100644 files/api/Cosmostat.py delete mode 100644 files/api/component_descriptors.json create mode 100644 files/api/descriptors.json create mode 100644 files/api/shrink.py create mode 100644 tasks/server.yaml diff --git a/defaults/main.yaml b/defaults/main.yaml index 48fd72b..56114ad 100644 --- a/defaults/main.yaml +++ b/defaults/main.yaml @@ -2,7 +2,7 @@ # required system packages cosmostat_packages: - - docker + - "{{ 'docker' if x64_arch else 'wmdocker' }}" - docker.io - docker-compose - python3 @@ -55,8 +55,9 @@ service_control_web_folder: "{{ service_folder }}/web" public_dashboard: true custom_port: "80" -# will skip init when true +# other vars quick_refresh: false +x64_arch: true # cosmostat_settings noisy_test: false @@ -66,4 +67,7 @@ push_redis: true run_background : true log_output: true update_frequency: "1" +cosmostat_server: false +cosmostat_server_api: "http://10.200.27.20/" +cosmostat_server_reporter: false ... \ No newline at end of file diff --git a/files/api/Components.py b/files/api/Components.py index b0793ef..d6eb14e 100644 --- a/files/api/Components.py +++ b/files/api/Components.py @@ -2,80 +2,89 @@ import subprocess import json import time +import weakref +import base64, hashlib from typing import Dict, Any, List +from Cosmos_Settings import * # Global Class Vars global_max_length = 500 -debug_output = False + +null_result = [ + "", + "null", + None, + [], + "Unknown", + "To Be Filled By O.E.M." + ] # import the component descriptor try: - with open("component_descriptors.json", encoding="utf-8") as f: + with open("descriptors.json", encoding="utf-8") as f: component_class_tree: List[Dict] = json.load(f) except FileNotFoundError as exc: raise RuntimeError("Descriptor file not found") from exc -component_types = [{"name": entry["name"], "multi_check": entry["multi_check"] == "True"} for entry in component_class_tree] - +component_types = [] +for entry in component_class_tree: + if entry["name"] != "System": + component_types.append({"name": entry["name"], "multi_check": entry["multi_check"] == "True"}) + +################################################################# ################################################################# # Component Class ################################################################# +################################################################# class Component: ############################################################ # instantiate new component + # this_device is set when the component has multiple instances ############################################################ - def __init__(self, name: str, comp_type: str, this_device="None", is_virtual = "True"): + + def __init__(self, name: str, comp_type: str, parent_system, this_device=None): + # begin init self.name = name self.type = comp_type + self.parent_system = weakref.ref(parent_system) + # this variable is set when the device can have multiples # it indicates that the commands in the descriptor might need templating self.this_device = this_device - self.is_virtual = is_virtual - print(f"This device - {self.this_device}") + self.is_virtual = parent_system.is_virtual() + self.cpu_arch = parent_system.get_system_arch() + if self.this_device is None: + log_data(log_output = f"This device - {self.name}", log_level = "log_output") + else: + log_data(log_output = f"This device - {self.this_device}", log_level = "log_output") + # build the component descriptor dictionary - for component in component_class_tree: - if component["name"] == self.type: - COMPONENT_DESCRIPTORS = component - descriptor = COMPONENT_DESCRIPTORS - self._descriptor = descriptor - if descriptor is None: - raise ValueError( - f"Component type '{comp_type}' is not defined in the " - f"component descriptor tree." - ) + self._descriptor = self._parse_descriptor() + # store static properties self.multi_check = self.is_multi() self.virt_ignore = self._descriptor.get('virt_ignore', []) self.multi_metrics = self._descriptor.get('multi_metrics', []) - #if 'precheck' in self._descriptor: - # precheck_command = self._descriptor.get('precheck', []) - # precheck_value = int(run_command(precheck_command, zero_only = True)) - # if precheck_value == 0: - # raise ValueError(f"No devices of type {self.type}") + self.arch_check = self._descriptor.get('arch_check', []) if self.is_virtual: self.virt_ignore = [] + + # initialize properties self._properties: Dict[str, str | list[str]] = {} - for key, command in descriptor.get('properties', {}).items(): - return_string = True - if key in self.multi_metrics: - return_string = False - if self.this_device != "None": - # this means this component type is a multi and the commands need templating for each device - formatted_command = command.format(this_device=self.this_device) - self._properties[key] = run_command(formatted_command, zero_only = return_string) - else: - self._properties[key] = run_command(command, zero_only = return_string) - print(self._properties[key]) - # build the description string - self._description_template: str | None = descriptor.get("description") + self._process_properties() + + # build the description string, requires the properties first + self._description_template: str | None = self._descriptor.get("description") self.description = self._description_template.format(**self._properties) + # initialize metrics self._metrics: Dict[str, str] = {} self.update_metrics() - + + def __str__(self): self_string = (f"Component name: {self.name}, type: {self.type} - " f"{self.description}") @@ -92,13 +101,21 @@ class Component: def update_metrics(self): for key, command in self._descriptor.get('metrics', {}).items(): - if self.this_device != "None": - formatted_command = command.format(this_device=self.this_device) - this_metric = run_command(formatted_command, True) - if this_metric is not None: - self._metrics[key] = this_metric - else: - self._metrics[key] = run_command(command, zero_only = True) + log_data(log_output = f"Key: {key} - Command: {command}", log_level = "noisy_test") + formatted_command = command + if self.arch_check is not None: + arch_variance = self._descriptor.get('arch_variance', {}) + if key in arch_variance: + if self.cpu_arch in formatted_command: + formatted_command = command[self.cpu_arch] + else: + formatted_command = f"echo Missing {self.cpu_arch} command" + if self.this_device is not None: + formatted_command = formatted_command.format(this_device=self.this_device) + if formatted_command is not None: + result = run_command(formatted_command, zero_only = True) + if result not in null_result: + self._metrics[key] = result def get_property(self, type = None): if type == None: @@ -112,8 +129,52 @@ class Component: return component_type["multi_check"] return False + # return descriptor for this device type + def _parse_descriptor(self): + for component in component_class_tree: + if component["name"] == self.type: + COMPONENT_DESCRIPTORS = component + descriptor = COMPONENT_DESCRIPTORS + if descriptor is None: + raise ValueError( + f"Component type '{comp_type}' is not defined in the " + f"component descriptor tree." + ) + return descriptor + + # iterate over all properties to process descriptor + def _process_properties(self): + for key, command in self._descriptor.get('properties', {}).items(): + return_string = True + if key in self.multi_metrics: + return_string = False + formatted_command = self._parse_command(key, command, return_string) + log_data(log_output = f"Property {key} - command: {formatted_command}", log_level = "debug_output") + result = run_command(formatted_command, zero_only = return_string) + if result not in null_result: + self._properties[key] = result + + # helper function to parse command key + def _parse_command(self, key: str, command: str | list[str], return_string = True): + result_command = command + log_data(log_output = f"_parse_command - {command}", log_level = "debug_output") + if self.arch_check: + # since the keys are stored with the arch variable this can be concise + arch_variance = self._descriptor.get('arch_variance', {}) + if key in arch_variance: + if self.cpu_arch in result_command: + log_data(log_output = f"arch_variance - {key} - {result_command}", log_level = "debug_output") + result_command = result_command[self.cpu_arch] + else: + result_command = f"echo Missing {self.cpu_arch} command" + if self.this_device is not None: + # template the key if the component type can have multiples + result_command = command.format(this_device=self.this_device) + log_data(log_output = f"result - {result_command}", log_level = "debug_output") + return result_command + ######################################################## - # redis data functions + # keyed data functions ######################################################## def get_properties_keys(self, component = None): @@ -230,9 +291,10 @@ class Component: } return result +############################################################ ############################################################ # System Class -# this is a big one... +############################################################ ############################################################ class System: @@ -241,25 +303,18 @@ class System: # system variable declarations ######################################################## - static_key_variables = [ - {"name": "Hostname", "command": "hostname"}, - {"name": "Virtual Machine", "command": 'echo $( [ "$(systemd-detect-virt)" = none ] && echo False || echo True )', "req_check": "False"}, - {"name": "CPU Architecture:", "command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'"}, - {"name": "OS Kernel", "command": "uname -r"}, - {"name": "OS Name", "command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2"}, - {"name": "Manufacturer", "command": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'"}, - {"name": "Product Name", "command": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'"}, - {"name": "Serial Number", "command": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'"}, - ] - dynamic_key_variables = [ - {"name": "System Uptime", "command": "uptime -p"}, - {"name": "Current Date", "command": "date '+%D %r'"}, - ] - - virt_ignore = [ - "Product Name", - "Serial Number" - ] + for component in component_class_tree: + if component["name"] == "System": + SYSTEM_DESCRIPTOR = component + descriptor = SYSTEM_DESCRIPTOR + if descriptor is None: + raise ValueError( + f"Component type 'System' is not defined in the " + f"component descriptor tree." + ) + static_key_variables = descriptor["static_key_variables"] + dynamic_key_variables = descriptor["dynamic_key_variables"] + virt_ignore = descriptor["virt_ignore"] ######################################################## # instantiate new system @@ -268,8 +323,9 @@ class System: def __init__(self, name: str): # the system needs a name self.name = name - if debug_output: - print(f"System initializing, name {self.name}") + log_data(log_output = f"System initializing, name {self.name}", log_level = "debug_output") + self.uuid = run_command(cmd = "cat /etc/machine-id", zero_only = True) + self.short_id = self.short_uuid(self.uuid) # system contains an array of component objects self.components = [] self.component_class_tree = component_class_tree @@ -277,6 +333,7 @@ class System: self._properties: Dict[str, str] = {} self._metrics: Dict[str, str] = {} self._virt_string = run_command('systemd-detect-virt', zero_only = True, req_check = False) + self._virt_ignore = self.virt_ignore if self._virt_string == "none": self._virt_ignore = [] @@ -285,36 +342,56 @@ class System: # load static keys for static_key in self.static_key_variables: if static_key["name"] not in self._virt_ignore: - command = static_key["command"] - if "req_check" in static_key: - result = run_command(command, zero_only = True, req_check = static_key["req_check"]) - else: - result = run_command(command, zero_only = True) - if debug_output: - print(f'Static key [{static_key["name"]}] - command [{command}] - output [{result}]') - self._properties[static_key["name"]] = result + self.process_property(static_key = static_key) # initialize live keys self.update_live_keys() # initialze components - self.load_components() + for component in component_types: + self.create_component(component) def __str__(self): components_str = "\n".join(f" - {c}" for c in self.components) return f"System hostname: {self.name}\nComponent Count: {self.get_component_count()}\n{components_str}" + + def __repr__(self): + self_string = f"Cosmostat Client {self.short_id}" + + def short_uuid(self, value: str, length=8): + hasher = hashlib.md5() + hasher.update(value.encode('utf-8')) + full_hex = hasher.hexdigest() + return full_hex[:length] ######################################################## # critical class functions ######################################################## + # process static keys + def process_property(self, static_key): + command = static_key["command"] + if "arch_check" in static_key: + arch_string = run_command("lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'", zero_only = True) + if arch_string in command: + command = command[arch_string] + else: + command = f"echo Missing {arch_string} command" + if "req_check" in static_key: + result = run_command(command, zero_only = True, req_check = static_key["req_check"]) + else: + result = run_command(command, zero_only = True) + log_data(log_output = f'Static key [{static_key["name"]}] - command [{command}] - output [{result}]', log_level = "debug_output") + if result not in null_result: + self._properties[static_key["name"]] = result + # update only system dynamic keys def update_live_keys(self): for live_key in self.dynamic_key_variables: if live_key['command'] is not None: command = live_key['command'] result = run_command(command, zero_only = True) - self._metrics[live_key['name']] = result - if debug_output: - print(f'Command {live_key["name"]} - [{command}] Result - [{result}]') + if result not in null_result: + self._metrics[live_key['name']] = result + log_data(log_output = f'Command {live_key["name"]} - [{command}] Result - [{result}]', log_level = "noisy_test") # update all dynamic keys, including components def update_system_state(self): @@ -322,33 +399,25 @@ class System: for component in self.components: component.update_metrics() - # check for components - def load_components(self): - for component in component_types: - component_name = component["name"] - multi_check = component["multi_check"] - # if multi, note that the command in device_list creates the list of things to pipe into this_device - if multi_check: - print(f"Creating one component of type {component_name} for each one found") - component_type_device_list = get_device_list(component_name) - component_id = 0 - for this_device in component_type_device_list: - this_component_ID = component_type_device_list.index(this_device) - this_component_name = f"{component_name} {this_component_ID}" - print(f"{this_component_name} - {component_name} - {this_device}") - new_component = Component(name = this_component_name, comp_type = component_name, this_device = this_device) - self.add_components(new_component) - - else: - if debug_output: - print(f'Creating component {component["name"]}') - self.add_components(Component(name = component_name, comp_type = component_name, is_virtual = self.is_virtual())) - - # Add a component to the system - def add_components(self, component: Component,): - if debug_output: - print(f"Component description: {component.description}") - self.components.append(component) + # component creation helper + def create_component(self, component): + component_name = component["name"] + multi_check = component["multi_check"] + # if multi, note that the command in device_list creates the list of things to pipe into this_device + if multi_check: + log_data(log_output = f"Creating one component of type {component_name} for each one found", log_level = "log_output") + component_type_device_list = get_device_list(component_name) + component_id = 0 + for this_device in component_type_device_list: + this_component_ID = component_type_device_list.index(this_device) + this_component_name = f"{component_name} {this_component_ID}" + log_data(log_output = f"{this_component_name} - {component_name} - {this_device}", log_level = "debug_output") + new_component = Component(name = this_component_name, comp_type = component_name, this_device = this_device, parent_system = self) + self.components.append(new_component) + else: + log_data(log_output = f'Creating component {component["name"]}', log_level = "debug_output") + new_component = Component(name = component_name, comp_type = component_name, parent_system = self) + self.components.append(new_component) ######################################################## # helper class functions @@ -380,7 +449,7 @@ class System: def is_virtual(self): vm_check = self.get_property('Virtual Machine') - print(f'vm_check: {vm_check}') + log_data(log_output = f'vm_check: {vm_check}', log_level = "debug_output") return vm_check def check_system_timer(self): @@ -390,6 +459,9 @@ class System: def get_component_class_tree(self): return self.component_class_tree + def get_system_arch(self): + return self.get_property("CPU Architecture") + ######################################################## # static metrics redis data functions ######################################################## @@ -493,43 +565,6 @@ class System: "Metric": f"component_count: {self.get_component_count()}" }) return result - - # straggler functions, might cut them - - # return both static and dynamic data - def get_sysvars_summary_keys(self): - result = [] - for name, value in self._properties.items(): - thisvar = { - "name": "System Class Property", - "type": name, - "value": value - } - result.append(thisvar) - for name, value in self._metrics.items(): - thisvar = { - "name": "System Class Metric", - "type": name, - "value": value - } - result.append(thisvar) - return result - - def get_component_strings(self, component_type: type = None): - if component_type is None: - result = [] - for component in self.components: - result.append(component.description) - return result - else: - result = [] - for component in self.components: - if component.type == component_type: - result.append(component.description) - if component.is_multi(): - return result - else: - return result[0] ############################################################ # Non-class Helper Functions @@ -557,10 +592,12 @@ def get_device_list(device_type_name: str): precheck_command = component["precheck"] precheck_value_output = run_command(precheck_command, zero_only = True) precheck_value = int(precheck_value_output) - print(f"Precheck found - {precheck_command} - {precheck_value}") + log_data(log_output = f"Precheck found - {precheck_command} - {precheck_value}", log_level = "log_output") if component["name"] == device_type_name and precheck_value != 0: device_list_command = component["device_list"] device_list_result = run_command(device_list_command) result = device_list_result return result + + diff --git a/files/api/Cosmos_Settings.py b/files/api/Cosmos_Settings.py new file mode 100644 index 0000000..785c50c --- /dev/null +++ b/files/api/Cosmos_Settings.py @@ -0,0 +1,85 @@ +import yaml +####################################################################### +### Settings Handler Functions +####################################################################### + +# default application setting variables +app_settings = { + "noisy_test" : False, + "debug_output" : True, + "log_output" : True, + "secure_api" : True, + "push_redis" : False, + "run_background" : True, + "cosmostat_server": False, + "cosmostat_server_reporter": False, + "update_frequency": 1, + "custom_api_port": "5000", + "cosmostat_server_api": "http://10.200.27.20:5000/" + } + +with open('cosmostat_settings.yaml', 'r') as f: + print("Loading cosmostat_settings file") + cosmostat_settings = yaml.safe_load(f) + print("...Done") + # initialize system variables from settings file + print("Checking for system var overrides") + for setting in app_settings: + if setting in cosmostat_settings: + cosmos_setting = cosmostat_settings[setting] + if app_settings["debug_output"]: + print(f"{setting}: {cosmos_setting}") + app_settings[setting] = cosmos_setting + print("...Done") + +# this returns the docker gateway from the settings +def docker_gateway_settings() -> str: + return cosmostat_settings["docker_gateway"] + +# this returns the jenkins user that ran the pipeline +def jenkins_user_settings() -> str: + return cosmostat_settings["jenkins_user"] + +# this returns the ansible_hostname from setup +def jenkins_hostname_settings() -> str: + return cosmostat_settings["ansible_hostname"] + +# this returns the inventory_generation_timestamp +def jenkins_inventory_generation_timestamp_settings() -> str: + return cosmostat_settings["inventory_generation_timestamp"] + +def run_cosmostat_server(): + return cosmostat_settings["cosmostat_server"] + +def service_gateway_ip(): + if cosmostat_settings["secure_api"]: + return docker_gateway_settings() + else: + return "0.0.0.0" + +def cosmostat_server_api(): + return cosmostat_settings["cosmostat_server_api"] + +def service_api_port(): + return cosmostat_settings["custom_api_port"] + +def log_data(log_output:str, log_level = cosmostat_settings["noisy_test"]): + log_levels = [ + "noisy_test", + "debug_output", + "log_output", + ] + + if log_level in log_levels: + if cosmostat_settings[log_level]: + print(log_output) + else: + print(f"Warning - {log_level} not valid log level") + + + + + #for level_check in log_levels: + # if log_level == level_check: + # print(f"log_level: {log_level} - level_check - {level_check}") + # print(log_output) \ No newline at end of file diff --git a/files/api/Cosmostat.py b/files/api/Cosmostat.py new file mode 100644 index 0000000..5b7771a --- /dev/null +++ b/files/api/Cosmostat.py @@ -0,0 +1,75 @@ +# This will be a class definitation for the cosmostat server +# On the server, there will be a Cosmostat Class Object +# This will have an array of System Class Objects +# These will be created based on API input from remote systems +# The remote systems will submit a json of their state to a private API +# this will define the System Class + +import subprocess +import json +import time +import weakref +import base64, hashlib +from typing import Dict, Any, List +from Cosmos_Settings import * + + + +################################################################# +################################################################# +# Cosmostat Class +################################################################# +################################################################# + +class Cosmostat: + + ############################################################ + # instantiate new Cosmostat server + ############################################################ + + def __init__(self, name: str): + # the system needs a name, should be equal to the uuid of the client + self.name = name + self.short_id = self.short_uuid(self.name) + log_data(log_output = f"Cosmostat Server {self.short_id} initializing", log_level = "log_output") + # system contains an array of keys with component objects + self.systems = [] + + def __str__(self): + self_string = f"Cosmostat Server {self.short_id}" + return self_string + + def __repr__(self): + self_string = f"Cosmostat Server {self.short_id}" + + def add_system(self, system_dictionary: dict): + new_system_key = { + "data_timestamp": time.time(), + "uuid": system_dictionary["uuid"], + "short_id": system_dictionary["short_id"], + "client_properties": system_dictionary["client_properties"], + "redis_data": {} + } + log_data(log_output = f"Client system {system_dictionary["short_id"]} added", log_level = "log_output") + self.systems.append(new_system_key) + + def update_system(self, system_state: {}, system_uuid: str): + this_system = self.get_system(system_uuid) + this_system["redis_data"] = system_state + this_system["data_timestamp"] = time.time() + log_data(log_output = f"Client system {this_system["short_id"]} addupdateded", log_level = "log_output") + return this_system["data_timestamp"] + + + def get_system(self, system_uuid: str) -> dict: + result = {} + for system in self.systems: + if system["uuid"] == system_uuid: + return system + return result + + def short_uuid(self, value: str, length=8): + hasher = hashlib.md5() + hasher.update(value.encode('utf-8')) + full_hex = hasher.hexdigest() + return full_hex[:length] diff --git a/files/api/app.py b/files/api/app.py index 6084d56..d10bb9d 100644 --- a/files/api/app.py +++ b/files/api/app.py @@ -3,67 +3,14 @@ from flask_apscheduler import APScheduler from typing import Dict, Union import json, time, redis, yaml +import base64, hashlib from Components import * +from Cosmos_Settings import * # declare flask apps app = Flask(__name__) scheduler = APScheduler() -####################################################################### -### Settings Handler Functions -####################################################################### - -# default application setting variables -app_settings = { - "noisy_test" : False, - "debug_output" : True, - "log_output" : True, - "secure_api" : True, - "push_redis" : False, - "run_background" : True, - "update_frequency": 1, - "custom_api_port": "5000" - } - -with open('cosmostat_settings.yaml', 'r') as f: - print("Loading cosmostat_settings file") - cosmostat_settings = yaml.safe_load(f) - print("...Done") - # initialize system variables from settings file - print("Checking for system var overrides") - for setting in app_settings: - if setting in cosmostat_settings: - cosmos_setting = cosmostat_settings[setting] - if app_settings["debug_output"]: - print(f"{setting}: {cosmos_setting}") - app_settings[setting] = cosmos_setting - print("...Done") - -# this returns the docker gateway from the settings -def docker_gateway_settings() -> str: - return cosmostat_settings["docker_gateway"] - -# this returns the jenkins user that ran the pipeline -def jenkins_user_settings() -> str: - return cosmostat_settings["jenkins_user"] - -# this returns the ansible_hostname from setup -def jenkins_hostname_settings() -> str: - return cosmostat_settings["ansible_hostname"] - -# this returns the inventory_generation_timestamp -def jenkins_inventory_generation_timestamp_settings() -> str: - return cosmostat_settings["inventory_generation_timestamp"] - -def service_gateway_ip(): - if cosmostat_settings["secure_api"]: - return docker_gateway_settings() - else: - return "0.0.0.0" - -def service_api_port(): - return cosmostat_settings["custom_api_port"] - ####################################################################### ### Redis Functions ####################################################################### @@ -74,19 +21,42 @@ r = redis.Redis(host=service_gateway_ip(), port=6379) def update_redis_channel(redis_channel, data): # Publish to the specified Redis channel r.publish(redis_channel, json.dumps(data)) - if app_settings["noisy_test"]: - print(f"{redis_channel} Redis Update") - print(data) + log_data(log_output = data, log_level = "noisy_test") def update_redis_server(): - # Update Stats Redis Channel - if cosmostat_system.check_system_timer(): - update_redis_channel("host_metrics", get_redis_data(human_readable = False)) + # Client Redis Tree + if not run_cosmostat_server(): + if cosmostat_client.check_system_timer(): + update_redis_channel("host_metrics", get_client_redis_data(human_readable = False)) + + if run_cosmostat_server(): + update_redis_channel("client_summary", get_server_redis_data()) + + # Server Redis Tree # Update history_stats Redis Channel # update_redis_channel("history_stats", get_component_list()) +def get_client_redis_data(human_readable = False): + result = [] + for metric in get_dynamic_data(human_readable): + result.append(metric) + #for metric in get_static_data(human_readable): + # result.append(metric) + return result + +def get_server_redis_data(): + result = [] + for client in cosmostat_server.systems: + this_client_key = { + "uuid": client["uuid"], + "short_id": client["short_id"], + "redis_data": client["redis_data"] + } + result.append(this_client_key) + return result + ####################################################################### -### Flask Routes +### Client Flask Routes ####################################################################### # dynamic data @@ -103,44 +73,33 @@ def static_data(): # redis data @app.route('/redis_data', methods=['GET']) def redis_data(): - return jsonify(get_redis_data(human_readable = False)) + return jsonify(get_client_redis_data(human_readable = False)) # redis strings @app.route('/redis_strings', methods=['GET']) def redis_strings(): - return jsonify(get_redis_data(human_readable = True)) - -# full summary -@app.route('/full_summary', methods=['GET']) -def full_summary(): - return jsonify(get_full_summary()) + return jsonify(get_client_redis_data(human_readable = True)) # php summary @app.route('/php_summary', methods=['GET']) def php_summary(): return jsonify(get_php_summary()) -# system info -@app.route('/info', methods=['GET']) -def info(): - return jsonify(get_info()) - -# system info +# return full descriptor @app.route('/descriptor', methods=['GET']) def descriptor(): return jsonify(get_descriptor()) -# socket timer +# socket timer handler @app.route('/start_timer', methods=['GET']) def start_timer(): current_timestamp = int(time.time()) - cosmostat_system.recent_check = current_timestamp - if app_settings["noisy_test"]: - print(f"Timestamp updated to {cosmostat_system.recent_check}") + cosmostat_client.recent_check = current_timestamp + log_data(log_output = f"Timestamp updated to {cosmostat_client.recent_check}", log_level = "noisy_test") return jsonify( { "message": "websocket timer reset", - "new_timestamp": cosmostat_system.recent_check + "new_timestamp": cosmostat_client.recent_check } ) @@ -148,22 +107,22 @@ def start_timer(): @app.route('/timer_data', methods=['GET']) def timer_data(): time_now = time.time() - time_lapsed = time_now - float(cosmostat_system.recent_check) + time_lapsed = time_now - float(cosmostat_client.recent_check) result = { "Time Lapsed": time_lapsed, "Current Time Value": time_now, - "Last Update Value": float(cosmostat_system.recent_check), - "System Updating": cosmostat_system.check_system_timer() + "Last Update Value": float(cosmostat_client.recent_check), + "System Updating": cosmostat_client.check_system_timer() } return jsonify(result) # test route @app.route('/test', methods=['GET']) def test(): - this_cpu = cosmostat_system.get_components(component_type="CPU") + this_cpu = cosmostat_client.get_components(component_type="CPU") return jsonify( { - "component_count:": len(cosmostat_system.components), + "component_count:": len(cosmostat_client.components), "user": jenkins_user_settings(), "hostname": jenkins_hostname_settings(), "cpu_model": this_cpu[0].description @@ -171,66 +130,22 @@ def test(): ) ####################################################################### -### Flask Helpers +### Client Flask Helpers ####################################################################### # needs to return array of {name: name, type: type, metrics: metrics} # for redis table generation, includes system and component metrics def get_dynamic_data(human_readable = False): - return cosmostat_system.get_live_metrics(human_readable) + return cosmostat_client.get_live_metrics(human_readable) def get_static_data(human_readable = False): result = [] - return cosmostat_system.get_static_metrics(human_readable) - -# php is about to start rendering static data -def get_redis_data(human_readable = False): - result = [] - for metric in get_dynamic_data(human_readable): - result.append(metric) - #for metric in get_static_data(human_readable): - # result.append(metric) - return result - -def get_full_summary(): - live_metrics = cosmostat_system.get_live_metrics() - system_components = cosmostat_system.get_component_strings() - system_info = get_info() - result = { - "system_settings": - { - "user": jenkins_user_settings(), - "hostname": jenkins_hostname_settings(), - "timestamp": jenkins_inventory_generation_timestamp_settings(), - "component_count:": len(cosmostat_system.components), - "object_name": cosmostat_system.name, - "docker_gateway": docker_gateway_settings() - }, - "live_metrics": live_metrics, - "system_components": system_components, - "system_info": system_info - } - return result - -def get_info(): - component_strings = [] - for component in cosmostat_system.get_components(): - component_strings.append({"name": component.name, "description": component.description}) - - result = { - "hostname": jenkins_hostname_settings(), - "component_strings": component_strings, - "system_strings": cosmostat_system.get_sysvars_summary_keys() - } - #for component_string in component_strings: - # for name, description in component_string.items(): - # result[name] = description - return result + return cosmostat_client.get_static_metrics(human_readable) def get_php_summary(): - system_properties = cosmostat_system.get_system_properties(human_readable = True) + system_properties = cosmostat_client.get_system_properties(human_readable = True) system_components = [] - for component in cosmostat_system.get_components(): + for component in cosmostat_client.get_components(): this_component = { "component_name": component.name, "info_strings": component.get_properties_strings(return_simple = True) @@ -245,53 +160,244 @@ def get_php_summary(): return result def get_descriptor(): - return cosmostat_system.get_component_class_tree() + return cosmostat_client.get_component_class_tree() + +def generate_state_definition(): + result = { + "uuid": cosmostat_client.uuid, + "state_definition": get_php_summary() + } + return result + ####################################################################### -### Other Functions +### Server Flask Routes ####################################################################### -# instantiate and return the System object -def new_cosmos_system(): - new_system = System(f"{jenkins_hostname_settings()}") - if app_settings["log_output"]: - print(f"New system object name: {new_system.name} - {new_system.get_component_count()} components:") - for component in new_system.components: - print(component.description) - return new_system +# update client on server +@app.route('/update_client', methods=['GET']) +def update_client(): + result = {} + # check the request and return payload if all good + payload = client_submit_check(request = request, dict_name = "redis_data") + this_client = cosmostat_server.get_system(uuid = payload["uuid"]) + result = run_update_client(this_client) + return jsonify(result), 200 + +# create client on server +@app.route('/create_client', methods=['GET']) +def create_client(): + result = {} + # check the request and return payload if all good + payload = client_submit_check(request = request, dict_name = "client_properties") + this_client = cosmostat_server.get_system(uuid = payload["uuid"]) + result = run_create_client(this_client) + return jsonify(result), 200 + +# api to validate Cosmostat Class +@app.route('/client_summary', methods=['GET']) +def client_summary(): + client_summary = get_client_summary() + return jsonify() + +####################################################################### +### Server Flask Helpers +####################################################################### + +# update client on server +def run_update_client(this_client): + if this_client == {}: + return { "message": "client not found" } + update_status = f"updated client {this_client.short_id}" + timestamp_update = cosmostat_server.update_system(system_state = payload, system_uuid = payload["uuid"]) + return { + "status": update_status, + "uuid": payload["uuid"], + "timestamp": timestamp_update + } + +# create client on server +def run_create_client(this_client): + update_status = f"created client {this_client.short_id}" + timestamp_update = cosmostat_server.create_system(system_state = payload, system_uuid = payload["uuid"]) + return { + "status": update_status, + "uuid": payload["uuid"], + "timestamp": timestamp_update + } + +# flask submission check fucntion +def client_submit_check(request, dict_name: str): + required_keys = {"uuid", "short_id", "data_timestamp", dict_name} + if not request.is_json: + logging.warning("Received non-JSON request") + return jsonify({"error": "Content-type must be application/json"}), 400 + payload = request.get_json(silent=True) + if payload is None: + logging.warning("Malformed JSON body") + return jsonify({"error": "Malformed JSON"}), 400 + missing = required_keys - payload.keys() + if missing: + raise ValueError(f"Missing required keys: {', '.join(sorted(missing))}") + + return payload + +# generate cosmostat server summary +def get_client_summary(): + result = [] + for client in cosmostat_server.systems: + this_client_properties = client.get_system_properties(human_readable = True) + this_client_components = [] + for component in client.get_components(): + this_component = { + "component_name": component.name, + "info_strings": component.get_properties_strings(return_simple = True) + } + this_client_components.append(this_component) + this_client = { + "client_properties": this_client_properties, + "client_components": this_client_components + } + result.append(this_client) + return result + +####################################################################### +### Cosmostat Client Subroutines +####################################################################### + +# Cosmostat Client Reporter +def client_update(this_client: dict, api_endpoint = "update_client"): + # set variables for API call + this_uuid = cosmostat_client.uuid + this_short_id = cosmostat_client.short_id + this_timestamp = time.time() + api_url = f"{cosmostat_server_api()}{api_endpoint}" + # generate payload + payload = { + "uuid": this_uuid, + "short_id": this_short_id, + "data_timestamp": this_timestamp, # Unix epoch float + "redis_data": get_client_redis_data(human_readable = False), + } + # execute API call + result = client_submission_handler() + if ( + isinstance(result, dict) + and result.get("message", "").lower() == "client not found" + ): + # if client not found, create client + if api_endpoint == "update_client": + client_initialize() + raise RuntimeError("Client not found - initializing") + return result + +# Cosmostat Client Initializer +def client_initialize(): + # set variables for API call + this_uuid = cosmostat_client.uuid + this_short_id = cosmostat_client.short_id + this_timestamp = time.time() + api_url = f"{cosmostat_server_api()}create_client" + # generate payload + payload = { + "uuid": this_uuid, + "short_id": this_short_id, + "data_timestamp": this_timestamp, # Unix epoch float + "client_properties": get_php_summary(), + } + # execute API call + result = client_submission_handler() + return result + +# Cosmostat Client API Reporting Handler +def client_submission_handler(): + result = None + try: + # `json=` automatically sets Content-Type to application/json + response: Response = requests.post(api_url, json=payload, timeout=timeout) + response.raise_for_status() # raise HTTPError for 4xx/5xx + except RequestException as exc: + # Wrap the low-level exception in a more descriptive one + raise RuntimeError( + f"Failed to POST to {url!r}: {exc}" + ) from exc + # process reply from API + try: + result = response.json() + except ValueError as exc: + raise RuntimeError( + f"Server responded with non-JSON payload: {response.text!r}" + ) from exc + return result -# Background Loop Function -def background_loop(): - # Update all data on the System object - if cosmostat_system.check_system_timer(): - cosmostat_system.update_system_state() - - if app_settings["push_redis"]: - update_redis_server() - - if app_settings["noisy_test"]: - print("Sorry about the mess...") - print(f"Blame {jenkins_user_settings()}") - +####################################################################### ####################################################################### ### Main Subroutine ####################################################################### +####################################################################### if __name__ == '__main__': - # instantiate system - cosmostat_system = new_cosmos_system() + ###################################### + ### Main Functions + ###################################### + # instantiate and return the Client System object + def new_cosmos_client(): + new_client = System(f"{jenkins_hostname_settings()}") + log_data(log_output = f"New System object name: {new_client.name} - {new_client.get_component_count()} components:", log_level = "log_output") + for component in new_client.components: + log_data(log_output = component.description, log_level = "log_output") + return new_client + + # instantiate and return the Cosmoserver System object + def new_cosmos_server(): + new_server = Cosmoserver(cosmostat_client.uuid) + log_data(log_output = f"New Cosmostat object name: {new_server.name}", log_level = "log_output") + return new_server + + # Background Loop Function + def background_loop(): + # Update all data on the System object + if cosmostat_client.check_system_timer(): + cosmostat_client.update_system_state() + + if app_settings["push_redis"]: + update_redis_server() + + if app_settings["cosmostat_server_reporter"]: + client_update() + + ###################################### + # instantiate client + ###################################### + cosmostat_client = new_cosmos_client() + if app_settings["cosmostat_server_reporter"]: + client_initialize() + + ###################################### + # instantiate server + ###################################### + + cosmostat_server = None + if run_cosmostat_server(): + cosmostat_server = new_cosmos_server() + + ###################################### # send initial stats update to redis + ###################################### + if app_settings["push_redis"]: update_redis_server() + ###################################### # Flask scheduler for scanner + ###################################### + if app_settings["run_background"]: - if app_settings["log_output"]: - print("Loading flask background subroutine...") + log_data(log_output = "Loading flask background subroutine...", log_level = "log_output") scheduler.add_job(id='background_loop', func=background_loop, @@ -300,13 +406,14 @@ if __name__ == '__main__': scheduler.init_app(app) scheduler.start() - if app_settings["log_output"]: - print("...Done") + log_data(log_output = "...Done", log_level = "log_output") else: - if app_settings["log_output"]: - print("Skipping flask background task") + log_data(log_output = "Skipping flask background task", log_level = "log_output") + ###################################### # Flask API + ###################################### + app.run(debug=False, host=service_gateway_ip(), port=service_api_port()) diff --git a/files/api/component_descriptors.json b/files/api/component_descriptors.json deleted file mode 100644 index 269a099..0000000 --- a/files/api/component_descriptors.json +++ /dev/null @@ -1,98 +0,0 @@ -[ - { - "name": "CPU", - "description": "{CPU Model} with {Core Count} cores.", - "multi_check": "False", - "properties": { - "Core Count": "lscpu --json | jq -r '.lscpu[] | select(.field==\"CPU(s):\") | .data'", - "CPU Model": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Model name:\") | .data'", - "Clock Speed": "sudo dmesg | grep MHz | grep tsc | cut -d: -f2 | awk '{print $2 \" \" $3}'" - }, - "metrics": { - "1m_load": "cat /proc/loadavg | awk '{print $1}'", - "5m_load": "cat /proc/loadavg | awk '{print $2}'", - "15m_load": "cat /proc/loadavg | awk '{print $3}'", - "current_mhz": "less /proc/cpuinfo | grep MHz | cut -d: -f2 | awk '{sum += $1} END {print sum/NR}'" - } - }, - { - "name": "RAM", - "description": "Total {Total GB}GB in {RAM Module Count} modules.", - "multi_check": "False", - "properties": { - "Total GB": "sudo /usr/bin/lshw -json -c memory | jq -r '.[] | select(.description==\"System Memory\").size' | awk '{printf \"%.2f\\n\", $1/1073741824}'", - "RAM Module Count": "sudo /usr/bin/lshw -json -c memory | jq -r '.[] | select(.id | contains(\"bank\")) | .id ' | wc -l", - "RAM Type": "sudo /usr/sbin/dmidecode --type 17 | grep Type: | sort -u | cut -d: -f2 | xargs", - "RAM Speed": "sudo /usr/sbin/dmidecode --type 17 | grep Speed: | grep -v Configured | sort -u | cut -d: -f2 | xargs", - "RAM Voltage": "sudo /usr/sbin/dmidecode --type 17 | grep 'Configured Voltage' | sort -u | cut -d: -f2 | xargs" - }, - "metrics": { - "MB Used": "free -m | grep Mem | awk '{print $3}'", - "MB Free": "free -m | grep Mem | awk '{print $4}'" - }, - "virt_ignore": [ - "RAM Type", - "RAM Speed", - "RAM Voltage" - ] - }, - { - "name": "LAN", - "description": "{Device ID} - {Device Name} - {MAC Address}", - "multi_check": "True", - "device_list": "ip link | grep default | grep -v -e docker -e 127.0.0.1 -e br- -e veth -e lo -e tun | cut -d ':' -f 2 | awk '{{print $1}}' ", - "properties": { - "MAC Address": "ip link | grep -A1 ' {this_device}' | grep ether | awk '{{print $2}}'", - "Device Name": "echo {this_device}", - "Device ID": "udevadm info -q property -p $(ls -l /sys/class/net/ | grep {this_device} | cut -d '>' -f2 | cut -b 8- ) | grep ID_MODEL_FROM_DATABASE | cut -d '=' -f2 " - }, - "metrics": { - "IP Address": "ip -o -4 ad | grep -v -e docker -e 127.0.0.1 -e br- | grep {this_device} | awk '{{print $4}}'", - "Data Transmitted": "ifconfig {this_device} | grep RX | grep bytes | cut -d '(' -f2 | tr -d ')'", - "Data Received": "ifconfig {this_device} | grep TX | grep bytes | cut -d '(' -f2 | tr -d ')'", - "Link State": "cat /sys/class/net/{this_device}/operstate", - "Link Speed": "cat /sys/class/net/{this_device}/speed || true" - }, - "multi_metrics": [ - "IP Address" - ] - }, - { - "name": "NVGPU", - "description": "NVGPU{Device ID} - {Device Model} with {Memory Size}, Max Power {Maximum Power}", - "multi_check": "True", - "device_list": "nvidia-smi --query-gpu=index --format=csv,noheader,nounits", - "properties": { - "Device Model": "nvidia-smi --id={this_device} --query-gpu=name --format=csv,noheader,nounits", - "Device ID": "echo NVGPU{this_device}", - "Driver Version": "nvidia-smi --id={this_device} --query-gpu=driver_version --format=csv,noheader,nounits", - "Maximum Power": "nvidia-smi --id={this_device} --query-gpu=power.limit --format=csv,noheader,nounits", - "Memory Size": "nvidia-smi --id={this_device} --query-gpu=memory.total --format=csv,noheader,nounits" - - }, - "metrics": { - "Power Draw": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits", - "Used Memory": "nvidia-smi --id={this_device} --query-gpu=memory.used --format=csv,noheader,nounits", - "Temperature": "nvidia-smi --id={this_device} --query-gpu=temperature.gpu --format=csv,noheader,nounits", - "GPU Load": "nvidia-smi --id={this_device} --query-gpu=utilization.gpu --format=csv,noheader,nounits" - - }, - "precheck": "lspci | grep NV | wc -l" - }, - { - "name": "STOR", - "description": "{Device Path} is of type {Drive Type} with capacity of {Total Capacity}.", - "multi_check": "True", - "device_list": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{print $1}'", - "properties": { - "Device Name": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print $1}}' | grep {this_device}", - "Device Path": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print \"/dev/\"$1}}' | grep {this_device}", - "Drive Type": "lsblk -d -o NAME,TRAN | grep {this_device} | awk '{{print $2}}'", - "Total Capacity": "lsblk -d -o NAME,SIZE | grep {this_device} | awk '{{print $2}}'", - "SMART Check": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r .smart_status.passed" - }, - "metrics": { - "placeholder": "" - } - } -] \ No newline at end of file diff --git a/files/api/descriptors.json b/files/api/descriptors.json new file mode 100644 index 0000000..0659abf --- /dev/null +++ b/files/api/descriptors.json @@ -0,0 +1,196 @@ +[ + { + "name": "System", + "static_key_variables": [ + { + "name": "Hostname", + "command": "hostname" + }, + { + "name": "Virtual Machine", + "command": "echo $( [ \"$(systemd-detect-virt)\" = none ] && echo False || echo True )", + "req_check": "False" + }, + { + "name": "CPU Architecture", + "command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'" + }, + { + "name": "OS Kernel", + "command": "uname -r" + }, + { + "name": "OS Name", + "command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2" + }, + { + "name": "Manufacturer", + "command":{ + "x86_64": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'", + "aarch64": "" + }, + "arch_check": "true" + }, + { + "name": "Product Name", + "command": { + "x86_64": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'", + "aarch64": "lshw -C system -json -disable NVMe -disable usb -disable 'PCI (Legacy)' -disable PCI -disable pci| jq -r '.[] | .product'" + }, + "arch_check": "true" + }, + { + "name": "Serial Number", + "command": { + "x86_64": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'", + "aarch64": "lshw -C system -json -disable NVMe -disable usb -disable 'PCI (Legacy)' -disable PCI -disable pci| jq -r '.[] | .serial'" + }, + "arch_check": "true" + } + ], + "dynamic_key_variables": [ + { + "name": "System Uptime", + "command": "uptime -p" + }, + { + "name": "Current Date", + "command": "date '+%D %r'" + } + ], + "virt_ignore": [ + "Product Name", + "Serial Number" + ] + }, + { + "name": "CPU", + "description": "{CPU Model} with {Core Count} cores.", + "multi_check": "False", + "properties": { + "Core Count": "lscpu --json | jq -r '.lscpu[] | select(.field==\"CPU(s):\") | .data'", + "CPU Model": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Model name:\") | .data' | xargs", + "Clock Speed": { + "x86_64": "sudo dmesg | grep MHz | grep tsc | cut -d: -f2 | awk '{print $2 \" \" $3}'", + "aarch64": "lscpu --json | jq -r '.lscpu[] | select(.field==\"CPU max MHz:\") | .data ' | xargs " + } + }, + "notes": "clock speed doesn't work on ARM", + "metrics": { + "1m_load": "cat /proc/loadavg | awk '{{print $1}}'", + "5m_load": "cat /proc/loadavg | awk '{{print $2}}'", + "15m_load": "cat /proc/loadavg | awk '{{print $3}}'", + "current_mhz": { + "x86_64": "cat /proc/cpuinfo | grep MHz | cut -d: -f2 | awk '{{sum += $1}} END {{print sum/NR}}'", + "aarch64": "echo unknown" + } + }, + "arch_check": "True", + "arch_variance": [ + "current_mhz", + "Clock Speed" + ] + }, + { + "name": "RAM", + "description": "Total {Total GB}GB in {RAM Module Count} modules.", + "multi_check": "False", + "properties": { + "Total GB": { + "x86_64": "sudo /usr/bin/lshw -json -c memory -disable NVMe -disable usb | jq -r '.[] | select(.description==\"System Memory\").size' | awk '{{printf \"%.2f\\n\", $1/1073741824}}'", + "aarch64": "sudo /usr/bin/lshw -json -c memory -disable NVMe -disable usb | jq -r '.[] | select(.description==\"System memory\").size' | awk '{{printf \"%.2f\\n\", $1/1073741824}}'" + }, + "RAM Module Count": { + "x86_64": "sudo /usr/bin/lshw -json -c memory -disable NVMe -disable usb | jq -r '.[] | select(.id | contains(\"bank\")) | .id ' | wc -l" + }, + "RAM Type": { + "x86_64": "sudo /usr/sbin/dmidecode --type 17 | grep Type: | sort -u | cut -d: -f2 | xargs", + "aarch64": "echo none" + }, + "RAM Speed": { + "x86_64": "sudo /usr/sbin/dmidecode --type 17 | grep Speed: | grep -v Configured | sort -u | cut -d: -f2 | xargs", + "aarch64": "echo none" + }, + "RAM Voltage": { + "x86_64": "sudo /usr/sbin/dmidecode --type 17 | grep 'Configured Voltage' | sort -u | cut -d: -f2 | xargs", + "aarch64": "echo none" + } + }, + "metrics": { + "MB Used": "free -m | grep Mem | awk '{print $3}'", + "MB Free": "free -m | grep Mem | awk '{print $4}'" + }, + "virt_ignore": [ + "RAM Type", + "RAM Speed", + "RAM Voltage" + ], + "arch_check": "True", + "arch_variance": [ + "Total GB", + "RAM Module Count", + "RAM Type", + "RAM Speed", + "RAM Voltage" + ] + }, + { + "name": "LAN", + "description": "{Device ID} - {Device Name} - {MAC Address}", + "multi_check": "True", + "device_list": "ip link | grep default | grep -v -e docker -e 127.0.0.1 -e br- -e veth -e lo -e tun | cut -d ':' -f 2 | awk '{{print $1}}' ", + "properties": { + "MAC Address": "ip link | grep -A1 ' {this_device}' | grep ether | awk '{{print $2}}'", + "Device Name": "echo {this_device}", + "Device ID": "udevadm info -q property -p $(ls -l /sys/class/net/ | grep {this_device} | cut -d '>' -f2 | cut -b 8- ) | grep ID_MODEL_FROM_DATABASE | cut -d '=' -f2 " + }, + "metrics": { + "IP Address": "ip -o -4 ad | grep -v -e docker -e 127.0.0.1 -e br- | grep {this_device} | awk '{{print $4}}'", + "Data Transmitted": "ifconfig {this_device} | grep RX | grep bytes | cut -d '(' -f2 | tr -d ')'", + "Data Received": "ifconfig {this_device} | grep TX | grep bytes | cut -d '(' -f2 | tr -d ')'", + "Link State": "cat /sys/class/net/{this_device}/operstate", + "Link Speed": "cat /sys/class/net/{this_device}/speed || true" + }, + "multi_metrics": [ + "IP Address" + ] + }, + { + "name": "NVGPU", + "description": "NVGPU{Device ID} - {Device Model} with {Memory Size}, Max Power {Maximum Power}", + "multi_check": "True", + "device_list": "nvidia-smi --query-gpu=index --format=csv,noheader,nounits", + "properties": { + "Device Model": "nvidia-smi --id={this_device} --query-gpu=name --format=csv,noheader,nounits", + "Device ID": "echo NVGPU{this_device}", + "Driver Version": "nvidia-smi --id={this_device} --query-gpu=driver_version --format=csv,noheader,nounits", + "Maximum Power": "nvidia-smi --id={this_device} --query-gpu=power.limit --format=csv,noheader,nounits", + "Memory Size": "nvidia-smi --id={this_device} --query-gpu=memory.total --format=csv,noheader,nounits" + + }, + "metrics": { + "Power Draw": "nvidia-smi --id={this_device} --query-gpu=power.draw --format=csv,noheader,nounits", + "Used Memory": "nvidia-smi --id={this_device} --query-gpu=memory.used --format=csv,noheader,nounits", + "Temperature": "nvidia-smi --id={this_device} --query-gpu=temperature.gpu --format=csv,noheader,nounits", + "GPU Load": "nvidia-smi --id={this_device} --query-gpu=utilization.gpu --format=csv,noheader,nounits" + + }, + "precheck": "lspci | grep NVIDIA | wc -l" + }, + { + "name": "STOR", + "description": "{Device Path} is of type {Drive Type} with capacity of {Total Capacity}.", + "multi_check": "True", + "device_list": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{print $1}'", + "properties": { + "Device Name": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print $1}}' | grep {this_device}", + "Device Path": "lsblk -d -o NAME,SIZE | grep -v -e 0B -e NAME | awk '{{print \"/dev/\"$1}}' | grep {this_device}", + "Drive Type": "lsblk -d -o NAME,TRAN | grep {this_device} | awk '{{print $2}}'", + "Total Capacity": "lsblk -d -o NAME,SIZE | grep {this_device} | awk '{{print $2}}'", + "SMART Check": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r .smart_status.passed" + }, + "metrics": { + "placeholder": "" + } + } +] \ No newline at end of file diff --git a/files/api/new_descriptors.json b/files/api/new_descriptors.json index 4eb452a..4bbcd7d 100644 --- a/files/api/new_descriptors.json +++ b/files/api/new_descriptors.json @@ -3,17 +3,123 @@ "name": "", "description": "", "multi_check": "True", - "device_list": " ", + "device_list": "command to list all devices of this type for iteration", "properties": { - + "property_name": "shell command to display said property", + "property_with_variance":{ + "x86_64": "this structure works with metrics also", + "aarch64": "the code calls based on the key name" + } }, "metrics": { + "metric_name": "shell command to display said metric" }, "multi_metrics": [ + "array", + "of metric names", + "for devices with", + "multiple instances" ], - "virt_ignore": [ + "virt_ignore": [ + "array", + "of metrics", + "or properties", + "to skip when", + "running on a VM" + ], + "precheck": "if there is a chance this device might be absent, this is the shell command to check for its presence", + + "arch_check": "If there have to be different commands for different architechures, this is the solution", + "arch_variance": [ + "array", + "of metrics", + "or properties", + "which have variance" ] }, + { + "static_key_variables": [ + {"name": "Hostname", "command": "hostname"}, + {"name": "Virtual Machine", "command": "echo $( [ \"$(systemd-detect-virt)\" = none ] && echo False || echo True )", "req_check": "False"}, + {"name": "CPU Architecture", "command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'"}, + {"name": "OS Kernel", "command": "uname -r"}, + {"name": "OS Name", "command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2"}, + {"name": "Manufacturer", "command": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'"}, + {"name": "Product Name", "command": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'"}, + {"name": "Serial Number", "command": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'"} + ], + "dynamic_key_variables": [ + {"name": "System Uptime", "command": "uptime -p"}, + {"name": "Current Date", "command": "date '+%D %r'"} + ], + "virt_ignore": [ + "Product Name", + "Serial Number" + ] + }, +{ + "name:": "System", + "static_key_variables": [ + { + "name": "Hostname", + "command": "hostname" + }, + { + "name": "Virtual Machine", + "command": "echo $( [ \"$(systemd-detect-virt)\" = none ] && echo False || echo True )", + "req_check": "False" + }, + { + "name": "CPU Architecture", + "command": "lscpu --json | jq -r '.lscpu[] | select(.field==\"Architecture:\") | .data'" + }, + { + "name": "OS Kernel", + "command": "uname -r" + }, + { + "name": "OS Name", + "command": "cat /etc/os-release | grep PRETTY | cut -d\\\" -f2" + }, + { + "name": "Manufacturer", + "command":{ + "x86_64": "sudo dmidecode --type 1 | grep Manufacturer: | cut -d: -f2 | sed -e 's/^[ \\t]*//'" + }, + "arch_check": "true" + }, + { + "name": "Product Name", + "command": { + "x86_64": "sudo dmidecode --type 2 | grep 'Product Name:' | cut -d: -f2 | sed -e 's/^[ \\t]*//'" + }, + "arch_check": "true" + }, + { + "name": "Serial Number", + "command": { + "x86_64": "sudo dmidecode --type 2 | grep 'Serial Number: '| cut -d: -f2 | sed -e 's/^[ \\t]*//'" + }, + "arch_check": "true" + } + ], + "dynamic_key_variables": [ + { + "name": "System Uptime", + "command": "uptime -p" + }, + { + "name": "Current Date", + "command": "date '+%D %r'" + } + ], + "virt_ignore": [ + "Product Name", + "Serial Number" + ] + }, + + { "SATA GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r '.physical_block_size as $block |.ata_device_statistics.pages[] | select(.name == \"General Statistics\") | .table[] | select(.name == \"Logical Sectors Written\") | .value as $sectors | ($sectors * $block) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true", "NVMe GBW": "sudo /usr/sbin/smartctl -x --json /dev/{this_device} | jq -r ' .nvme_smart_health_information_log.data_units_written as $dw | .logical_block_size as $ls | ($dw * $ls) / 1073741824 ' | awk '{{printf \"%.2f GiB Written\\n\", $0}}' || true" diff --git a/files/api/shrink.py b/files/api/shrink.py new file mode 100644 index 0000000..4f7313e --- /dev/null +++ b/files/api/shrink.py @@ -0,0 +1,46 @@ + + def get_properties_keys(self, component = None): + component_properties = [] + if component == None: + component_properties = self._properties.items() + else: + component_properties = self.get_property(component) + result = self.process_key_list(key_items = component_properties, key_name = "Property", return_type = "key" key_value = "Value") + return result + + def get_metrics_keys(self): + result = self.process_key_list(key_items = self._metrics.items(), key_name = "Metric", key_value = "Data", return_type = "key") + return result + + def get_properties_strings(self, return_simple = True): + result = self.process_key_list(key_items = self._properties.items(), key_name = "Property", return_type = "string", return_simple = return_simple) + return result + + def get_metrics_strings(self, return_simple = True): + result = self.process_key_list(key_items = self._metrics.items(), key_name = "Metric", return_type = "string", return_simple = return_simple) + return result + + def process_key_list(self, key_items: str, key_name: str, return_type: str, key_value = "none"): + result = [] + empty_value = ["", "null", None, []] + for name, values in key_items: + for value in (values if isinstance(values, list) else [values]): + if value not in empty_value and name not in self.virt_ignore: + this_key_string = f"{name}: {value}" + if return_simple: + result.append(this_key_string) + elif return_keys: + this_key_value = { + "Source": self.name, + key_name: name, + key_value: value + } + result.append(this_key_value) + else: + complex_key_string = { + "Source": self.name, + key_name: this_key_string + } + result.append(complex_key_string) + + return result \ No newline at end of file diff --git a/files/web/proxy/nginx.conf b/files/web/proxy/nginx.conf index b08b797..bd1c200 100644 --- a/files/web/proxy/nginx.conf +++ b/files/web/proxy/nginx.conf @@ -18,7 +18,7 @@ server_name localhost; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $sceme; + proxy_set_header X-Forwarded-Proto $scheme; } # --------------------------------------- diff --git a/tasks/init.yaml b/tasks/init.yaml index 1543a41..8c1130c 100644 --- a/tasks/init.yaml +++ b/tasks/init.yaml @@ -1,12 +1,21 @@ --- +# get arch +- name: Cosmostat - Init - Check CPU Arch + shell: "dpkg --print-architecture" + register: cpu_architecture_output + +- name: Cosmostat - Init - Set x64_arch variable + when: "'arm64' in cpu_architecture_output.stdout" + set_fact: + x64_arch: false + # package handler - name: Cosmostat - Init - Get installed package list - when: dpkg_output is undefined shell: "dpkg --list | grep ii | awk '{print $2}'" register: dpkg_output - name: Cosmostat - Init - Install Prereq Packages - when: cosmostat_packages_item not in dpkg_output.stdout + when: cosmostat_packages_item not in dpkg_output.stdout_lines apt: name: - "{{ cosmostat_packages_item }}" @@ -16,12 +25,25 @@ loop_var: cosmostat_packages_item # docker network for cosmostat service -- name: Cosmostat - Init - Set Up docker network - community.docker.docker_network: - name: "cosmostat_net" - driver: bridge - ipam_config: - - subnet: "{{ docker_subnet }}" +- name: Cosmostat - Init - Check for docker network + shell: "ip -o -4 ad | grep {{ docker_gateway }} | wc -l" + register: docker_network_register + +- name: Cosmostat - Init - Run Network Handlers + when: docker_network_register.stdout | int == 0 + block: + + - name: Cosmostat - Init - Set Up docker network x64 + when: x64_arch | bool + community.docker.docker_network: + name: "cosmostat_net" + driver: bridge + ipam_config: + - subnet: "{{ docker_subnet }}" + + - name: Cosmostat - Init - Set Up docker network arm64 + when: not x64_arch | bool + shell: "docker network create --driver bridge --subnet {{ docker_subnet }} cosmostat_net" # allow service_user to sudo lshw without a password - name: Cosmostat - Init - cosmos user sudoers file creation diff --git a/tasks/server.yaml b/tasks/server.yaml new file mode 100644 index 0000000..6f639b6 --- /dev/null +++ b/tasks/server.yaml @@ -0,0 +1,8 @@ +--- +# this will be ran to install the full cosmostat server dashboard + + + + + +... \ No newline at end of file diff --git a/tasks/web.yaml b/tasks/web.yaml index d8067ee..e4f7594 100644 --- a/tasks/web.yaml +++ b/tasks/web.yaml @@ -3,6 +3,11 @@ # This part sets up cosmostat web dashboard ############################################### +- name: Cosmostat - Web - stop containers + when: not quick_refresh | bool + shell: "docker-compose -f {{ service_control_web_folder }}/docker-compose.yaml down" + ignore_errors: yes + # Create web Folder - name: "Cosmostat - Web - create {{ service_control_web_folder }}" file: @@ -31,6 +36,6 @@ register: docker_output - debug: | msg="{{ docker_output.stdout_lines }}" - msg="{{ docker_output.stderr_lines }}" + msg="{{ docker_output.stderr_lines }}" ... \ No newline at end of file diff --git a/templates/cosmostat_settings.yaml b/templates/cosmostat_settings.yaml index 5916660..171a1b2 100644 --- a/templates/cosmostat_settings.yaml +++ b/templates/cosmostat_settings.yaml @@ -21,7 +21,7 @@ ansible_hostname: "{{ ansible_hostname }}" docker_subnet: "{{ docker_subnet }}" docker_gateway: "{{ docker_gateway }}" -# python system variables +# python system variables, no quotes for bool or int secure_api: {{ secure_api }} noisy_test: {{ noisy_test }} debug_output: {{ debug_output }} @@ -30,4 +30,7 @@ run_background : {{ run_background }} log_output: {{ log_output }} update_frequency: {{ update_frequency }} custom_api_port: {{ custom_api_port }} +cosmostat_server: {{ cosmostat_server }} +cosmostat_server_api: "{{ cosmostat_server_api }}" +cosmostat_server_reporter: {{ cosmostat_server_reporter }} ... \ No newline at end of file