#!/bin/bash

# Check if a file is provided
if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <markdown-file>"
    exit 1
fi

# Function to count words in a line
count_words() {
    # Remove reference patterns, LaTeX commands, and image lines before counting words
    cleaned_line=$(echo $1 | sed -E 's/\[@[^]]*\]//g' | sed -E 's/\\[a-zA-Z]+//g' | sed -E 's/\!\[.*\]\(.*\)//g')
    echo $cleaned_line | wc -w
}

# Variables
current_section="None"
current_subsection="None"
section_word_count=0
subsection_word_count=0
in_major_section=false
in_subsection=false
in_code_block=false
file=$1

# Read the Markdown file line by line
while IFS= read -r line; do
    # Check for the start or end of a code block
    if [[ $line == \`\`\`* ]]; then
        in_code_block=!$in_code_block
        continue
    fi

    # Skip processing if it's a line inside a code block
    if $in_code_block; then
        continue
    fi

    # Check for major section heading
    if [[ $line == \#[^#]* ]]; then
        # Print the word count of the previous major section and subsection
        if [ "$current_section" != "None" ]; then
            echo "$current_section total: $section_word_count"
            if [ "$current_subsection" != "None" ]; then
                echo "  $current_subsection: $subsection_word_count"
            fi
        fi
        # Reset word counts and set new major section
        section_word_count=0
        subsection_word_count=0
        current_section=$(echo $line | sed 's/# //')
        current_subsection="None"
        in_major_section=true
        in_subsection=false
    elif [[ $line == \##[^#]* ]]; then
        # Print the word count of the previous subsection
        if [ "$current_subsection" != "None" ]; then
            echo "  $current_subsection: $subsection_word_count"
        fi
        # Reset subsection word count and set new subsection
        subsection_word_count=0
        current_subsection=$(echo $line | sed 's/## //')
        in_subsection=true
    fi
    
    # Count words in major section and subsections
    if $in_major_section; then
        line_word_count=$(count_words "$line")
        section_word_count=$((section_word_count + line_word_count))
        if $in_subsection; then
            subsection_word_count=$((subsection_word_count + line_word_count))
        fi
    fi
done < "$file"

# Print the word count of the last major section and subsection
echo "$current_section total: $section_word_count"
if [ "$current_subsection" != "None" ]; then
    echo "  $current_subsection: $subsection_word_count"
fi