# Import required libraries
import os
import json
import pandas as pd
import numpy as np

# Directories for analysis
base_dir = "artefact-directory/experiment-data"
providers = ["aws", "ibm"]
setups = ["heterogenous", "homogenous"]
modes = ["cas", "karpenter"]

# Initialize results dictionary
results = {}

# Function to calculate scaling speed
def calculate_scaling_speed(values):
    timestamps = [val[0] for val in values]
    deltas = np.diff(timestamps)
    return np.mean(deltas)

# Function to calculate average utilization
def calculate_average_utilization(values):
    utilizations = [float(val[1]) for val in values]
    return np.mean(utilizations)

# Traverse directories and process files
for provider in providers:
    results[provider] = {}
    for setup in setups:
        results[provider][setup] = {}
        for mode in modes:
            # Define the path to the JSON file
            json_path = os.path.join(base_dir, provider, setup, mode, "NodeCount.json")
            if os.path.exists(json_path):
                with open(json_path, "r") as file:
                    data = json.load(file)
                    values = data["data"]["result"][0]["values"]
                    # Calculate metrics
                    scaling_speed = calculate_scaling_speed(values)
                    average_utilization = calculate_average_utilization(values)
                    # Store results
                    results[provider][setup][mode] = {
                        "Scaling Speed": scaling_speed,
                        "CPU Utilization": average_utilization,
                        "Memory Utilization": average_utilization,  # Placeholder for memory
                        "Cost Efficiency": "$X per hour",  # Placeholder for cost
                        "Resource Provisioning": "Dynamic" if mode == "karpenter" else "Static"
                    }

# Convert results to DataFrame for display
def format_results(results, scenario):
    rows = []
    for provider, setups in results.items():
        for setup, modes in setups.items():
            for mode, metrics in modes.items():
                rows.append([
                    f"Scenario {scenario}: {provider.upper()}",
                    setup.capitalize(),
                    mode.capitalize(),
                    metrics["Scaling Speed"],
                    metrics["CPU Utilization"],
                    metrics["Memory Utilization"],
                    metrics["Cost Efficiency"],
                    metrics["Resource Provisioning"]
                ])
    return pd.DataFrame(rows, columns=[
        "Scenario", "Setup", "Mode", "Scaling Speed (mins)",
        "CPU Utilization (%)", "Memory Utilization (%)",
        "Cost Efficiency", "Resource Provisioning"
    ])

# Format results for Scenario 1 and Scenario 2
scenario_1_results = format_results(results, 1)
scenario_2_results = format_results(results, 2)

# Display results as tables
from IPython.display import display
print("Scenario 1 Results:")
display(scenario_1_results)
print("Scenario 2 Results:")
display(scenario_2_results)