#!/bin/bash

# Check if a file is provided
if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <markdown-file>"
    exit 1
fi

# Function to count words in a line
count_words() {
    cleaned_line=$(echo $1 | sed -E 's/\[@[^]]*\]//g' | sed -E 's/\\[a-zA-Z]+//g' | sed -E 's/\!\[.*\]\(.*\)//g')
    echo $cleaned_line | wc -w
}

# Variables
current_section="None"
current_subsection="None"
section_word_count=0
subsection_word_count=0
in_major_section=false
in_subsection=false
in_code_block=false
file=$1

print_section_info() {
    declare -A subsection_counts_local
    echo "$1 total: $2"
    for key in "${!subsection_counts_local[@]}"; do
        display_key=$(echo $key | sed 's/_/ /g')
        echo "  $display_key: ${subsection_counts_local[$key]}"
    done
}

sanitize_key() {
    echo $1 | sed -E 's/[^a-zA-Z0-9 ]/_/g' | sed -E 's/ /_/g'
}

# Read the Markdown file line by line
while IFS= read -r line; do
    if [[ $line == \`\`\`* ]]; then
        if $in_code_block; then
            in_code_block=false
        else
            in_code_block=true
        fi
        continue
    fi
    
    if $in_code_block; then
        continue
    fi

    if [[ $line == \#[^#]* ]]; then
        if [ "$current_section" != "None" ]; then
            print_section_info "$current_section" $section_word_count
            section_word_count=0
            unset subsection_counts
        fi
        current_section=$(echo $line | sed 's/# //')
        in_major_section=true
        in_subsection=false
    elif [[ $line == \##[^#]* ]]; then
        if [ "$current_subsection" != "None" ]; then
            formatted_subsection=$(sanitize_key "$current_subsection")
            subsection_counts["$formatted_subsection"]=$subsection_word_count
            subsection_word_count=0
        fi
        current_subsection=$(echo $line | sed 's/## //')
        in_subsection=true
    fi

    if $in_major_section; then
        line_word_count=$(count_words "$line")
        section_word_count=$((section_word_count + line_word_count))
        if $in_subsection; then
            subsection_word_count=$((subsection_word_count + line_word_count))
        fi
    fi
done < "$file"

if [ "$current_subsection" != "None" ]; then
    formatted_subsection=$(sanitize_key "$current_subsection")
    subsection_counts["$formatted_subsection"]=$subsection_word_count
fi
print_section_info "$current_section" $section_word_count