#!/bin/bash

# Check if a file is provided
if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <markdown-file>"
    exit 1
fi

# Function to count words in a line
count_words() {
    # Remove reference patterns from the line before counting words
    cleaned_line=$(echo $1 | sed -E 's/\[@[^]]*\]//g')
    echo $cleaned_line | wc -w
}

# Variables
current_section="None"
word_count=0
in_major_section=false
in_code_block=false
file=$1

# Read the Markdown file line by line
while IFS= read -r line; do
    # Check for the start or end of a code block
    if [[ $line == \`\`\`* ]]; then
        if $in_code_block; then
            in_code_block=false
        else
            in_code_block=true
        fi
        continue
    fi

    # Skip processing if it's a line inside a code block or an image line
    if $in_code_block || [[ $line == \!\[* ]]; then
        continue
    fi

    # Check for major section heading
    if [[ $line == \#[^#]* ]]; then
        # Print the word count of the previous major section
        if [ "$current_section" != "None" ]; then
            echo "$current_section: $word_count"
        fi
        # Reset word count and set new major section
        word_count=0
        current_section=$(echo $line | sed 's/# //')
        in_major_section=true
    elif [[ $line == \##* ]]; then
        # Continue counting words for subsections
        in_major_section=true
    fi
    
    if $in_major_section; then
        # Count words in major section and its subsections
        line_word_count=$(count_words "$line")
        word_count=$((word_count + line_word_count))
    fi
done < "$file"

# Print the word count of the last major section
echo "$current_section: $word_count"