{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Scenario 1 (Homogenous) Results:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ScenarioModeScaling Speed (mins)CPU Utilization (%)Memory Utilization (%)Cost EfficiencyResource Provisioning
0Scenario 1 (Homogenous): IBMCas30.06.06.0$0.00 per hourStatic
1Scenario 1 (Homogenous): IBMKarpenter30.03.03.0$0.00 per hourDynamic
\n", "
" ], "text/plain": [ " Scenario Mode Scaling Speed (mins) \\\n", "0 Scenario 1 (Homogenous): IBM Cas 30.0 \n", "1 Scenario 1 (Homogenous): IBM Karpenter 30.0 \n", "\n", " CPU Utilization (%) Memory Utilization (%) Cost Efficiency \\\n", "0 6.0 6.0 $0.00 per hour \n", "1 3.0 3.0 $0.00 per hour \n", "\n", " Resource Provisioning \n", "0 Static \n", "1 Dynamic " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Scenario 2 (Heterogenous) Results:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ScenarioModeScaling Speed (mins)CPU Utilization (%)Memory Utilization (%)Cost EfficiencyResource Provisioning
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Scenario, Mode, Scaling Speed (mins), CPU Utilization (%), Memory Utilization (%), Cost Efficiency, Resource Provisioning]\n", "Index: []" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Import required libraries\n", "import os\n", "import json\n", "import pandas as pd\n", "import numpy as npimport matplotlib.pyplot as plt\n", "import json\n", "\n", "# Directories for analysis\n", "providers = [\"aws\", \"ibm\"]\n", "setups = [\"heterogenous\", \"homogenous\"]\n", "modes = [\"cas\", \"karpenter\"]\n", "\n", "# Initialize results dictionary\n", "results = {}\n", "\n", "# Function to calculate scaling speed\n", "def calculate_scaling_speed(values):\n", " timestamps = [val[0] for val in values]\n", " deltas = np.diff(timestamps)\n", " return np.mean(deltas)\n", "\n", "# Function to calculate average utilization\n", "def calculate_average_utilization(values):\n", " utilizations = [float(val[1]) for val in values]\n", " return np.mean(utilizations)\n", "\n", "# Function to get cost from billing metrics\n", "def get_cost_from_billing(provider, setup, mode):\n", " billing_file = os.path.join(provider, setup, mode, f\"{provider}_billing_metrics_{mode}.json\")\n", " if os.path.exists(billing_file):\n", " with open(billing_file, \"r\") as file:\n", " data = json.load(file)\n", " # Get the most recent time period's cost\n", " latest_period = data[\"ResultsByTime\"][-1]\n", " cost = float(latest_period[\"Total\"][\"UnblendedCost\"][\"Amount\"])\n", " return f\"${cost:.2f} per hour\"\n", " return \"$X per hour\" # Keep original placeholder if file not found\n", "\n", "# Traverse directories and process files\n", "for provider in providers:\n", " results[provider] = {}\n", " for setup in setups:\n", " results[provider][setup] = {}\n", " for mode in modes:\n", " # Define the path to the JSON file\n", " json_path = os.path.join(provider, setup, mode, \"NodeCount.json\")\n", " if os.path.exists(json_path):\n", " with open(json_path, \"r\") as file:\n", " data = json.load(file)\n", " values = data[\"data\"][\"result\"][0][\"values\"]\n", " # Calculate metrics\n", " scaling_speed = calculate_scaling_speed(values)\n", " average_utilization = calculate_average_utilization(values)\n", " # Store results\n", " results[provider][setup][mode] = {\n", " \"Scaling Speed\": scaling_speed,\n", " \"CPU Utilization\": average_utilization,\n", " \"Memory Utilization\": average_utilization, # Placeholder for memory\n", " \"Cost Efficiency\": get_cost_from_billing(provider, setup, mode),\n", " \"Resource Provisioning\": \"Dynamic\" if mode == \"karpenter\" else \"Static\"\n", " }\n", "\n", "# Convert results to DataFrame for display\n", "def format_results(results, setup_type):\n", " rows = []\n", " scenario = \"1 (Homogenous)\" if setup_type == \"homogenous\" else \"2 (Heterogenous)\"\n", " \n", " for provider, setups in results.items():\n", " if setup_type in setups:\n", " for mode, metrics in setups[setup_type].items():\n", " rows.append([\n", " f\"Scenario {scenario}: {provider.upper()}\",\n", " mode.capitalize(),\n", " metrics[\"Scaling Speed\"],\n", " metrics[\"CPU Utilization\"],\n", " metrics[\"Memory Utilization\"],\n", " metrics[\"Cost Efficiency\"],\n", " metrics[\"Resource Provisioning\"]\n", " ])\n", " \n", " return pd.DataFrame(rows, columns=[\n", " \"Scenario\", \"Mode\", \"Scaling Speed (mins)\",\n", " \"CPU Utilization (%)\", \"Memory Utilization (%)\",\n", " \"Cost Efficiency\", \"Resource Provisioning\"\n", " ])\n", "\n", "# Format results for homogenous (Scenario 1) and heterogenous (Scenario 2)\n", "homogenous_results = format_results(results, \"homogenous\")\n", "heterogenous_results = format_results(results, \"heterogenous\")\n", "\n", "# Display results as tables\n", "from IPython.display import display\n", "print(\"Scenario 1 (Homogenous) Results:\")\n", "display(homogenous_results)\n", "print(\"\\nScenario 2 (Heterogenous) Results:\")\n", "display(heterogenous_results)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.rcParams['figure.dpi'] = 1200\n", "\n", "# Load IBM data\n", "with open('ibm/homogenous/karpenter/NodeCount.json', 'r') as f:\n", " karpenter_data = json.load(f)\n", "with open('ibm/homogenous/cas/NodeCount.json', 'r') as f:\n", " cas_data = json.load(f)\n", "\n", "# Extract timestamps and values\n", "karpenter_times = [val[0] for val in karpenter_data['data']['result'][0]['values']]\n", "karpenter_nodes = [float(val[1]) for val in karpenter_data['data']['result'][0]['values']]\n", "\n", "cas_times = [val[0] for val in cas_data['data']['result'][0]['values']]\n", "cas_nodes = [float(val[1]) for val in cas_data['data']['result'][0]['values']]\n", "\n", "# Convert timestamps to minutes from start\n", "karpenter_times = np.array([(t - karpenter_times[0])/60 for t in karpenter_times])\n", "cas_times = np.array([(t - cas_times[0])/60 for t in cas_times])\n", "\n", "# Plot the data\n", "fig, ax = plt.subplots(figsize=(5, 3))\n", "ax.plot(cas_times, cas_nodes, label='CAS', linestyle='-', marker='o', markersize=3)\n", "ax.plot(karpenter_times, karpenter_nodes, label='Karpenter', linestyle='--', marker='x', markersize=3)\n", "\n", "ax.set_xlabel('Time (minutes)')\n", "ax.set_ylabel('Number of Nodes')\n", "ax.set_title('IBM Node Count Over Time')\n", "ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.5), ncol=2)\n", "ax.grid(True)\n", "\n", "# Ensure y-axis shows integer values\n", "ax.yaxis.set_major_locator(plt.MaxNLocator(integer=True))\n", "\n", "plt.tight_layout()\n", "plt.savefig('../plots/ibm_node_count.png', bbox_inches='extra')\n", "plt.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.20" } }, "nbformat": 4, "nbformat_minor": 4 }