#!/bin/bash

# Check if a file is provided
if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <markdown-file>"
    exit 1
fi

# Function to count words in a line
count_words() {
    cleaned_line=$(echo "$1" | sed -E 's/\[@[^]]*\]//g; s/\\[a-zA-Z]+//g; s/\!\[.*\]\(.*\)//g')
    echo "$cleaned_line" | wc -w
}

# Variables
declare -A subsection_counts
current_section="None"
current_subsection="None"
section_word_count=0
subsection_word_count=0
in_major_section=false
in_subsection=false
in_code_block=false
file="$1"

print_section_info() {
    echo "$1 total: $2"
    for key in "${!subsection_counts[@]}"; do
        display_key=$(echo "$key" | sed 's/_/ /g')
        echo "  $display_key: ${subsection_counts[$key]}"
    done
}

sanitize_key() {
    echo "$1" | sed -E 's/[^a-zA-Z0-9- ]/_/g' | sed -E 's/ /_/g'
}

# Read the Markdown file line by line
while IFS= read -r line; do
    if [[ "$line" == \`\`\`* ]]; then
        in_code_block=!$in_code_block
        continue
    fi

    $in_code_block && continue

    if [[ "$line" =~ ^\#[^#]* ]]; then
        if [ "$current_section" != "None" ]; then
            print_section_info "$current_section" $section_word_count
            section_word_count=0
            unset subsection_counts
            declare -A subsection_counts
        fi
        current_section=$(echo "$line" | sed 's/^# //')
        in_major_section=true
        in_subsection=false
    elif [[ "$line" =~ ^\##[^#]* ]]; then
        if [ "$current_subsection" != "None" ]; then
            formatted_subsection=$(sanitize_key "$current_subsection")
            subsection_counts["$formatted_subsection"]=$subsection_word_count
        fi
        subsection_word_count=0
        current_subsection=$(echo "$line" | sed 's/^## //')
        in_subsection=true
    fi

    if $in_major_section; then
        line_word_count=$(count_words "$line")
        section_word_count=$((section_word_count + line_word_count))
        $in_subsection && subsection_word_count=$((subsection_word_count + line_word_count))
    fi
done < "$file"

if [ "$current_subsection" != "None" ]; then
    formatted_subsection=$(sanitize_key "$current_subsection")
    subsection_counts["$formatted_subsection"]=$subsection_word_count
fi
if [ "$current_section" != "None" ]; then
    print_section_info "$current_section" $section_word_count
fi

exit 0