{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scenario 1 (Homogenous) Results:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Scenario</th>\n",
       "      <th>Mode</th>\n",
       "      <th>Scaling Speed (mins)</th>\n",
       "      <th>CPU Utilization (%)</th>\n",
       "      <th>Memory Utilization (%)</th>\n",
       "      <th>Cost Efficiency</th>\n",
       "      <th>Resource Provisioning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Scenario 1 (Homogenous): IBM</td>\n",
       "      <td>Cas</td>\n",
       "      <td>30.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>$0.00 per hour</td>\n",
       "      <td>Static</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Scenario 1 (Homogenous): IBM</td>\n",
       "      <td>Karpenter</td>\n",
       "      <td>30.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>$0.00 per hour</td>\n",
       "      <td>Dynamic</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       Scenario       Mode  Scaling Speed (mins)  \\\n",
       "0  Scenario 1 (Homogenous): IBM        Cas                  30.0   \n",
       "1  Scenario 1 (Homogenous): IBM  Karpenter                  30.0   \n",
       "\n",
       "   CPU Utilization (%)  Memory Utilization (%) Cost Efficiency  \\\n",
       "0                  6.0                     6.0  $0.00 per hour   \n",
       "1                  3.0                     3.0  $0.00 per hour   \n",
       "\n",
       "  Resource Provisioning  \n",
       "0                Static  \n",
       "1               Dynamic  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Scenario 2 (Heterogenous) Results:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Scenario</th>\n",
       "      <th>Mode</th>\n",
       "      <th>Scaling Speed (mins)</th>\n",
       "      <th>CPU Utilization (%)</th>\n",
       "      <th>Memory Utilization (%)</th>\n",
       "      <th>Cost Efficiency</th>\n",
       "      <th>Resource Provisioning</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [Scenario, Mode, Scaling Speed (mins), CPU Utilization (%), Memory Utilization (%), Cost Efficiency, Resource Provisioning]\n",
       "Index: []"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Import required libraries\n",
    "import os\n",
    "import json\n",
    "import pandas as pd\n",
    "import numpy as npimport matplotlib.pyplot as plt\n",
    "import json\n",
    "\n",
    "# Directories for analysis\n",
    "providers = [\"aws\", \"ibm\"]\n",
    "setups = [\"heterogenous\", \"homogenous\"]\n",
    "modes = [\"cas\", \"karpenter\"]\n",
    "\n",
    "# Initialize results dictionary\n",
    "results = {}\n",
    "\n",
    "# Function to calculate scaling speed\n",
    "def calculate_scaling_speed(values):\n",
    "    timestamps = [val[0] for val in values]\n",
    "    deltas = np.diff(timestamps)\n",
    "    return np.mean(deltas)\n",
    "\n",
    "# Function to calculate average utilization\n",
    "def calculate_average_utilization(values):\n",
    "    utilizations = [float(val[1]) for val in values]\n",
    "    return np.mean(utilizations)\n",
    "\n",
    "# Function to get cost from billing metrics\n",
    "def get_cost_from_billing(provider, setup, mode):\n",
    "    billing_file = os.path.join(provider, setup, mode, f\"{provider}_billing_metrics_{mode}.json\")\n",
    "    if os.path.exists(billing_file):\n",
    "        with open(billing_file, \"r\") as file:\n",
    "            data = json.load(file)\n",
    "            # Get the most recent time period's cost\n",
    "            latest_period = data[\"ResultsByTime\"][-1]\n",
    "            cost = float(latest_period[\"Total\"][\"UnblendedCost\"][\"Amount\"])\n",
    "            return f\"${cost:.2f} per hour\"\n",
    "    return \"$X per hour\"  # Keep original placeholder if file not found\n",
    "\n",
    "# Traverse directories and process files\n",
    "for provider in providers:\n",
    "    results[provider] = {}\n",
    "    for setup in setups:\n",
    "        results[provider][setup] = {}\n",
    "        for mode in modes:\n",
    "            # Define the path to the JSON file\n",
    "            json_path = os.path.join(provider, setup, mode, \"NodeCount.json\")\n",
    "            if os.path.exists(json_path):\n",
    "                with open(json_path, \"r\") as file:\n",
    "                    data = json.load(file)\n",
    "                    values = data[\"data\"][\"result\"][0][\"values\"]\n",
    "                    # Calculate metrics\n",
    "                    scaling_speed = calculate_scaling_speed(values)\n",
    "                    average_utilization = calculate_average_utilization(values)\n",
    "                    # Store results\n",
    "                    results[provider][setup][mode] = {\n",
    "                        \"Scaling Speed\": scaling_speed,\n",
    "                        \"CPU Utilization\": average_utilization,\n",
    "                        \"Memory Utilization\": average_utilization,  # Placeholder for memory\n",
    "                        \"Cost Efficiency\": get_cost_from_billing(provider, setup, mode),\n",
    "                        \"Resource Provisioning\": \"Dynamic\" if mode == \"karpenter\" else \"Static\"\n",
    "                    }\n",
    "\n",
    "# Convert results to DataFrame for display\n",
    "def format_results(results, setup_type):\n",
    "    rows = []\n",
    "    scenario = \"1 (Homogenous)\" if setup_type == \"homogenous\" else \"2 (Heterogenous)\"\n",
    "    \n",
    "    for provider, setups in results.items():\n",
    "        if setup_type in setups:\n",
    "            for mode, metrics in setups[setup_type].items():\n",
    "                rows.append([\n",
    "                    f\"Scenario {scenario}: {provider.upper()}\",\n",
    "                    mode.capitalize(),\n",
    "                    metrics[\"Scaling Speed\"],\n",
    "                    metrics[\"CPU Utilization\"],\n",
    "                    metrics[\"Memory Utilization\"],\n",
    "                    metrics[\"Cost Efficiency\"],\n",
    "                    metrics[\"Resource Provisioning\"]\n",
    "                ])\n",
    "    \n",
    "    return pd.DataFrame(rows, columns=[\n",
    "        \"Scenario\", \"Mode\", \"Scaling Speed (mins)\",\n",
    "        \"CPU Utilization (%)\", \"Memory Utilization (%)\",\n",
    "        \"Cost Efficiency\", \"Resource Provisioning\"\n",
    "    ])\n",
    "\n",
    "# Format results for homogenous (Scenario 1) and heterogenous (Scenario 2)\n",
    "homogenous_results = format_results(results, \"homogenous\")\n",
    "heterogenous_results = format_results(results, \"heterogenous\")\n",
    "\n",
    "# Display results as tables\n",
    "from IPython.display import display\n",
    "print(\"Scenario 1 (Homogenous) Results:\")\n",
    "display(homogenous_results)\n",
    "print(\"\\nScenario 2 (Heterogenous) Results:\")\n",
    "display(heterogenous_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.rcParams['figure.dpi'] = 1200\n",
    "\n",
    "# Load IBM data\n",
    "with open('ibm/homogenous/karpenter/NodeCount.json', 'r') as f:\n",
    "    karpenter_data = json.load(f)\n",
    "with open('ibm/homogenous/cas/NodeCount.json', 'r') as f:\n",
    "    cas_data = json.load(f)\n",
    "\n",
    "# Extract timestamps and values\n",
    "karpenter_times = [val[0] for val in karpenter_data['data']['result'][0]['values']]\n",
    "karpenter_nodes = [float(val[1]) for val in karpenter_data['data']['result'][0]['values']]\n",
    "\n",
    "cas_times = [val[0] for val in cas_data['data']['result'][0]['values']]\n",
    "cas_nodes = [float(val[1]) for val in cas_data['data']['result'][0]['values']]\n",
    "\n",
    "# Convert timestamps to minutes from start\n",
    "karpenter_times = np.array([(t - karpenter_times[0])/60 for t in karpenter_times])\n",
    "cas_times = np.array([(t - cas_times[0])/60 for t in cas_times])\n",
    "\n",
    "# Plot the data\n",
    "fig, ax = plt.subplots(figsize=(5, 3))\n",
    "ax.plot(cas_times, cas_nodes, label='CAS', linestyle='-', marker='o', markersize=3)\n",
    "ax.plot(karpenter_times, karpenter_nodes, label='Karpenter', linestyle='--', marker='x', markersize=3)\n",
    "\n",
    "ax.set_xlabel('Time (minutes)')\n",
    "ax.set_ylabel('Number of Nodes')\n",
    "ax.set_title('IBM Node Count Over Time')\n",
    "ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.5), ncol=2)\n",
    "ax.grid(True)\n",
    "\n",
    "# Ensure y-axis shows integer values\n",
    "ax.yaxis.set_major_locator(plt.MaxNLocator(integer=True))\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig('../plots/ibm_node_count.png', bbox_inches='extra')\n",
    "plt.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.20"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}