#!/bin/bash

# Check if a file is provided
if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <markdown-file>"
    exit 1
fi

# Function to count words in a line
count_words() {
    # Remove reference patterns, LaTeX commands, and image lines before counting words
    cleaned_line=$(echo $1 | sed -E 's/\[@[^]]*\]//g' | sed -E 's/\\[a-zA-Z]+//g' | sed -E 's/\!\[.*\]\(.*\)//g')
    echo $cleaned_line | wc -w
}

# Variables
declare -A subsection_counts
current_section="None"
current_subsection="None"
section_word_count=0
subsection_word_count=0
in_major_section=false
in_subsection=false
in_code_block=false
file=$1

print_section_info() {
    echo "$1 total: $2"
    for key in "${!subsection_counts[@]}"; do
        # Replace underscores back to spaces for display
        display_key=$(echo $key | sed 's/_/ /g')
        echo "  $display_key: ${subsection_counts[$key]}"
    done
}

# Read the Markdown file line by line
while IFS= read -r line; do
    # Check for the start or end of a code block
    if [[ $line == \`\`\`* ]]; then
        if $in_code_block; then
            in_code_block=false
        else
            in_code_block=true
        fi
        continue
    fi

    # Skip processing if it's a line inside a code block
    if $in_code_block; then
        continue
    fi

    # Check for major section heading
    if [[ $line == \#[^#]* ]]; then
        # Print the word count of the previous major section and its subsections
        if [ "$current_section" != "None" ]; then
            print_section_info "$current_section" $section_word_count
            unset subsection_counts # Clear the subsection counts
        fi
        # Reset word counts and set new major section
        section_word_count=0
        current_section=$(echo $line | sed 's/# //')
        in_major_section=true
    elif [[ $line == \##[^#]* ]]; then
        # If it's a new subsection, add the previous subsection count to the array
        if [ "$current_subsection" != "None" ]; then
            # Replace spaces with underscores for array key
            formatted_subsection=$(echo $current_subsection | sed 's/ /_/g')
            subsection_counts["$formatted_subsection"]=$subsection_word_count
        fi
        # Reset subsection word count and set new subsection
        subsection_word_count=0
        current_subsection=$(echo $line | sed 's/## //')
        in_subsection=true
    fi
    
    # Count words in major section and subsections
    if $in_major_section; then
        line_word_count=$(count_words "$line")
        section_word_count=$((section_word_count + line_word_count))
        if $in_subsection; then
            subsection_word_count=$((subsection_word_count + line_word_count))
        fi
    fi
done < "$file"

# Print the word count of the last major section and its subsections
if [ "$current_subsection" != "None" ]; then
    formatted_subsection=$(echo $current_subsection | sed 's/ /_/g')
    subsection_counts["$formatted_subsection"]=$subsection_word_count
fi
print_section_info "$current_section" $section_word_count