Cosmostat Init Commit

This commit is contained in:
2026-03-09 16:32:43 -07:00
commit 298d7432a7
23 changed files with 1707 additions and 0 deletions

298
files/api/Components.py Normal file
View File

@ -0,0 +1,298 @@
# this class file is for the cosmostat service
import subprocess
from LinkedList import *
global_max_length = 500
class Component:
##########################################################################################
# Base class for all system components. All instantiated objects need a child class
# Class data:
### name - name of the type of component, declared in the parent class
### status
### model_string - string with device info, declared in parent class
### metric_name - name of the value being measured
### current_value
### historical_data - This will be a linked list used to generate a json when calling get_historical_data
### for this to work, the function using these classes needs to update the values periodically
#### historical_data = [
#### {
#### "timestamp": timestamp, # seconds since epoch
#### "value": value
#### },
#### {
#### "timestamp": timestamp,
#### "value": value
#### }
#### ]
def __init__(self, name: str, model_string: str = None):
# fail instantiation if critical data is missing
if self.model_string is None:
raise TypeError("Error - missing component model_string")
if self.metric_name is None:
raise TypeError("Error - missing component metric_name")
if self.metric_value_command is None:
raise TypeError("Error - missing component metric_value_command")
if self.type is None:
raise TypeError("Error - missing component type")
if self.has_temp is None:
raise TypeError("Error - missing temp data check")
# set up history list
self.history_max_length = global_max_length
self.historical_data = ValueHistory(self.history_max_length)
self.history_start = self.historical_data.get_first_timestamp()
self.update_value()
if self.current_value is None:
raise TypeError("Error - failed to read value")
# if temp data exists, handle it
if self.has_temp:
self.temp_history_data = ValueHistory(self.history_max_length)
self.temp_history_start = self.temp_history_data.get_first_timestamp()
self.current_temp = self.temp_history_data.get_current_value()
else:
self.temp_history_data = None
# instantiate other shared class variables
self.name = name
self.current_value = self.historical_data.get_current_value()
if self.has_temp:
self.current_temp = self.temp_history_data.get_current_value()
else:
self.current_temp = None
self.comment = f"This is a {self.type}, so we are measuring {self.metric_name}, currently at {self.current_value}"
# if nothing failed, the object is ready
self.status = "ready"
def __str__(self):
return (f"{self.__class__.__name__}: {self.name} "
f"{self.model_string}")
def __del__(self):
print(f"Deleting {self.type} component - {self.model_string}")
def get_info_key(self):
result = {
"name": self.name,
"type": self.type,
"model_string": self.model_string,
"metric_name": self.metric_name
}
return result
def get_summary_key(self):
result = {
"type": self.type,
"current_value": self.current_value,
"metric_name": self.metric_name,
"model_string": self.model_string
}
return result
def update_value(self):
#try:
self.current_value = run_command(self.metric_value_command, True)
self.historical_data.add(self.current_value)
#except:
def update_temp_value(self):
if has_temp:
#try:
self.current_temp = run_command(self.temp_value_command, True)
self.temp_history_data.add(self.current_value)
#except:
else:
return None
def get_history(self, count: int = global_max_length):
if self.has_temp:
result = {
"value_metric": self.metric_name,
"history_count": count,
"history_data": self.historical_data.get_history(count), # reminder this is a LinkedList get_history
"history_temp_data": self.temp_history_data.get_history(count)
}
else:
result = {
"value_metric": self.metric_name,
"history_count": count,
"history_data": self.historical_data.get_history(count) # same reminder here
}
return result
############################################################
# Component Class Types
# There needs to be one of these for each monitored thing
############################################################
# Need to add:
### temperatures
### network + VPN
### storage + ZFS
### video cards
### virtual machines
# CPU component class.
class CPU(Component):
def __init__(self, name: str, is_virtual: bool = False):
# Declare component type
self.type = "CPU"
# deal with temp later
self.has_temp = False
# no temp if VM
#self.has_temp = not is_virtual
#self.temp_value_command = "acpi -V | jc --acpi -p | jq '.[] | select(.type==\"Thermal\") | .temperature '"
self.model_string = self.get_model_string()
# Initialize value
self.metric_name = "1m_load"
self.metric_value_command = "cat /proc/loadavg | awk '{print $1}'"
self.current_value = run_command(self.metric_value_command, True)
# Complete instantiation
super().__init__(name, self.model_string)
def get_model_string(self):
# Get CPU Info
model_string_command = "lscpu --json | jq -r '.lscpu[] | select(.field==\"Model name:\") | .data'"
return run_command(model_string_command, True)
# RAM component class.
class RAM(Component):
def __init__(self, name: str):
# Declare component type
self.type = "RAM"
self.has_temp = False
self.model_string = self.get_model_string()
# Initialize Value
self.metric_name = "used_capacity_mb"
self.metric_value_command = "free -m | grep Mem | awk '{print $3}'"
self.current_value = run_command(self.metric_value_command, True)
# Complete instantiation
super().__init__(name, self.model_string)
def get_model_string(self):
# Check total system RAM
bytes_total_command = "sudo lshw -json -c memory | jq -r '.[] | select(.description==\"System Memory\").size' "
bytes_total = float(run_command(bytes_total_command, True))
gb_total = round(bytes_total / 1073741824, 2)
return f"Total Capacity: {gb_total}GB"
############################################################
# System Class
# A system is build from components
############################################################
class System:
# instantiate new system
def __init__(self, name: str):
# the system needs a name
self.name = name
# system is built of other component objects
self.components = []
# other system properties
self.sysvars = {}
# either i do it here or i do it twice
self.sysvars["is_virtual"] = self.check_for_virtual()
# Let's build a system
self.add_component(CPU("CPU", self.sysvars["is_virtual"]))
self.add_component(RAM("RAM"))
# let's build system values
self.check_values()
# Add a component to the system
def add_component(self, component: Component):
self.components.append(component)
# Get all components, optionally filtered by type
def get_components(self, component_type: type = None):
if component_type is None:
return self.components
return [c for c in self.components if isinstance(c, component_type)]
# get component count
def get_component_count(self):
result = int(len(self.components))
return result
def __str__(self):
components_str = "\n".join(f" - {c}" for c in self.components)
return f"System: {self.name}\n{components_str}"
# update metrics for all components
def update_values(self):
self.check_values()
for component in self.components:
component.update_value()
def check_for_virtual(self):
check_if_vm_command = "systemd-detect-virt"
check_if_vm = run_command(check_if_vm_command, True)
if check_if_vm != "none":
return True
else:
return False
def check_uptime(self):
check_uptime_command = "uptime -p"
system_uptime = run_command(check_uptime_command, True)
return system_uptime
def check_timestamp(self):
check_timestamp_command = "date '+%D %r'"
system_timestamp = run_command(check_timestamp_command, True)
return system_timestamp
def check_values(self):
self.sysvars["uptime"] = self.check_uptime()
self.sysvars["name"] = self.name
self.sysvars["component_count"] = self.get_component_count()
self.sysvars["timestamp"] = self.check_timestamp()
def get_sysvars(self):
result = {}
for sysvar in self.sysvars:
result[f"{sysvar}"] = self.sysvars[f"{sysvar}"]
return result
def get_sysvars_summary_keys(self):
result = []
for sysvar in self.sysvars:
system_type_string = f"sysvar['{sysvar}']"
thisvar = {
"type": "System Class Variable",
"current_value": sysvar,
"metric_name": system_type_string,
"model_string": self.sysvars[sysvar]
}
result.append(thisvar)
return result
############################################################
# Helper Functions
############################################################
# subroutine to run a command, return stdout as array unless zero_only then return [0]
def run_command(cmd, zero_only=False):
# Run the command and capture the output
result = subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Decode the byte output to a string
output = result.stdout.decode('utf-8')
# Split the output into lines and store it in an array
output_lines = [line for line in output.split('\n') if line]
# Return result
try:
return output_lines[0] if zero_only else output_lines
except:
return output_lines

98
files/api/LinkedList.py Normal file
View File

@ -0,0 +1,98 @@
##############################
# linked list classes
# written by the intern
##############################
import time
# single node in a singly linked list
class Node:
__slots__ = ("value", "next", "timestamp")
def __init__(self, value):
self.value = value
self.timestamp = time.time()
self.next = None
# small, bounded history implemented with a singly linked list
class ValueHistory:
def __init__(self, maxlen: int):
if maxlen <= 0:
raise ValueError("maxlen must be a positive integer")
self.maxlen = maxlen
self.head: Node | None = None # oldest entry
self.tail: Node | None = None # newest entry
self.size = 0
# Append a new value to the history, dropping the oldest if needed
def add(self, value):
new_node = Node(value)
# link it after the current tail
if self.tail is None: # empty list
self.head = self.tail = new_node
else:
self.tail.next = new_node
self.tail = new_node
self.size += 1
# 2. enforce the size bound
if self.size > self.maxlen:
# drop the head (oldest item)
assert self.head is not None # for the type checker
self.head = self.head.next
self.size -= 1
# If the list became empty, also reset tail
if self.head is None:
self.tail = None
# Return the history as a Python dict list (oldest → newest)
def get_history(self, count: int | None = None):
if count is None:
count = self.maxlen
out = []
cur = self.head
counter = 0
while cur is not None and counter < count:
counter += 1
out.append(
{
"timestamp": cur.timestamp,
"value": cur.value
}
)
cur = cur.next
return out
# Return oldest timestamp
def get_first_timestamp(self):
if self.head is not None:
return self.head.timestamp
else:
return time.time()
# Return current data
def get_current_value(self):
if self.tail is not None:
return self.tail.value
else:
return 0
# ------------------------------------------------------------------
# Convenience methods
# ------------------------------------------------------------------
def __len__(self):
return self.size
def __iter__(self):
"""Iterate over values from oldest to newest."""
cur = self.head
while cur is not None:
yield cur.value
cur = cur.next
def __repr__(self):
return f"BoundedHistory(maxlen={self.maxlen}, data={self.get()!r})"

237
files/api/app.py Normal file
View File

@ -0,0 +1,237 @@
from flask import Flask, jsonify, request
from flask_apscheduler import APScheduler
from typing import Dict, Union
import json, time, redis, yaml
from Components import *
# declare flask apps
app = Flask(__name__)
scheduler = APScheduler()
#######################################################################
### Settings Handler Functions
#######################################################################
# default application setting variables
app_settings = {
"noisy_test" : False,
"debug_output" : False,
"log_output" : False,
"secure_api" : True,
"push_redis" : False,
"run_background" : True
}
with open('cosmostat_settings.yaml', 'r') as f:
print("Loading cosmostat_settings file")
cosmostat_settings = yaml.safe_load(f)
print("...Done")
# initialize system variables from settings file
print("Checking for system var overrides")
for setting in app_settings:
if setting in cosmostat_settings:
cosmos_setting = cosmostat_settings[setting]
if app_settings["debug_output"]:
print(f"{setting}: {cosmos_setting}")
app_settings[setting] = cosmos_setting
print("...Done")
# this returns the docker gateway from the settings
def docker_gateway_settings() -> str:
return cosmostat_settings["docker_gateway"]
# this returns the jenkins user that ran the pipeline
def jenkins_user_settings() -> str:
return cosmostat_settings["jenkins_user"]
# this returns the ansible_hostname from setup
def jenkins_hostname_settings() -> str:
return cosmostat_settings["ansible_hostname"]
# this returns the inventory_generation_timestamp
def jenkins_inventory_generation_timestamp_settings() -> str:
return cosmostat_settings["inventory_generation_timestamp"]
def service_gateway_ip():
if cosmostat_settings["secure_api"]:
return docker_gateway_settings()
else:
return "0.0.0.0"
#######################################################################
### Redis Functions
#######################################################################
# Redis client will publish updates
r = redis.Redis(host=service_gateway_ip(), port=6379)
def update_redis_channel(redis_channel, data):
# Publish to the specified Redis channel
r.publish(redis_channel, json.dumps(data))
if app_settings["noisy_test"]:
print(f"{redis_channel} Redis Update")
print(data)
def update_redis_server():
# Update Stats Redis Channel
update_redis_channel("host_stats", get_full_summary())
# Update history_stats Redis Channel
update_redis_channel("history_stats", get_component_list())
#######################################################################
### Other Functions
#######################################################################
def get_component_summary():
result = []
for component in cosmostat_system.components:
result.append(component.get_summary_key())
return result
def get_full_summary():
result = []
for component in cosmostat_system.components:
result.append(component.get_summary_key())
for sysvar in cosmostat_system.get_sysvars_summary_keys():
result.append(sysvar)
return result
# This will instantiate a System object
def new_cosmos_system():
new_system = System(f"{jenkins_hostname_settings()}")
if app_settings["log_output"]:
print(f"New system object name: {new_system.name}")
for component in new_system.components:
print(component)
return new_system
def get_component_list(history_count = None):
result = []
for component in cosmostat_system.components:
if history_count is not None:
history = component.get_history(history_count)
else:
history = component.get_history()
result.append(
{
"info": component.get_info_key(),
"history": history
}
)
return result
def get_info():
device_summary = []
for component in cosmostat_system.components:
device_summary.append(
{
"info": component.get_info_key(),
}
)
result = {
"system_info":
{
"user": jenkins_user_settings(),
"hostname": jenkins_hostname_settings(),
"timestamp": jenkins_inventory_generation_timestamp_settings(),
"component_count:": len(cosmostat_system.components),
"object_name": cosmostat_system.name,
"docker_gateway": docker_gateway_settings()
},
"device_summary": device_summary
}
return result
#def get_history_summary():
#######################################################################
### Flask Routes
#######################################################################
# full component list
@app.route('/component_list', methods=['GET'])
def component_list():
count = request.args.get('count', type=int)
return jsonify(get_component_list(count))
# component summary
@app.route('/component_summary', methods=['GET'])
def component_summary():
return jsonify(get_component_summary())
# full summary
@app.route('/full_summary', methods=['GET'])
def full_summary():
return jsonify(get_full_summary())
# system info
@app.route('/info', methods=['GET'])
def info():
return jsonify(get_info())
# test route
@app.route('/test', methods=['GET'])
def test():
return jsonify(
{
"component_count:": len(cosmostat_system.components),
"user": jenkins_user_settings(),
"hostname": jenkins_hostname_settings()
}
)
#######################################################################
### Main Subroutine
#######################################################################
if __name__ == '__main__':
# Background Loop Function
def background_loop():
# Update all data on the System object
cosmostat_system.update_values()
if app_settings["push_redis"]:
update_redis_server()
if app_settings["noisy_test"]:
print("Sorry about the mess...")
print(f"Blame {jenkins_user_settings()}")
# instantiate system
cosmostat_system = new_cosmos_system()
# send initial stats update to redis
if app_settings["push_redis"]:
update_redis_server()
# Flask scheduler for scanner
if app_settings["run_background"]:
if app_settings["log_output"]:
print("Loading flask background subroutine...")
scheduler.add_job(id='background_loop',
func=background_loop,
trigger='interval',
seconds=1)
scheduler.init_app(app)
scheduler.start()
if app_settings["log_output"]:
print("...Done")
else:
if app_settings["log_output"]:
print("Skipping flask background task")
# Flask API
app.run(debug=True, host=service_gateway_ip(), port=5000)