#!/bin/bash
set -e

# Configuration
BLOG_DIR="blog"
TEMPLATE_FILE="$BLOG_DIR/template.html"
INDEX_FILE="$BLOG_DIR/index.html"
RSS_FILE="rss.xml"
SITEMAP_FILE="sitemap.xml"
SITE_URL="https://josie.lol"

# Configuration - Add talks directory
TALKS_DIR="talks"
TALKS_TEMPLATE_FILE="$TALKS_DIR/template.html"
TALKS_INDEX_FILE="$TALKS_DIR/index.html"

# Ensure required commands are available
if ! command -v pandoc &> /dev/null; then
    echo "Error: pandoc is not installed. Please install it with:"
    echo "  sudo pacman -S pandoc  # For Arch Linux"
    exit 1
fi

echo "🔄 Building blog posts from Markdown..."

# Create RSS feed header if it doesn't exist
if [ ! -f "$RSS_FILE" ]; then
    cat > "$RSS_FILE" << EOF
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
  <title>josie.lol Blog</title>
  <link>${SITE_URL}</link>
  <description>Thoughts on cloud-native tech, infrastructure, security, and developer experience</description>
  <atom:link href="${SITE_URL}/${RSS_FILE}" rel="self" type="application/rss+xml" />
  <language>en-us</language>
  <lastBuildDate>$(date -R)</lastBuildDate>
  
<!-- POSTS_START -->
<!-- POSTS_END -->

</channel>
</rss>
EOF
    echo "📝 Created new RSS feed file"
fi

# Initialize or read the posts array
declare -A posts
post_files=()

# Process each markdown file in the blog directory
for md_file in $(find "$BLOG_DIR" -name "*.md" | sort -r); do
    filename=$(basename "$md_file")
    html_filename="${filename%.md}.html"
    
    # Skip processing template and index files
    if [[ "$filename" == "template.md" || "$filename" == "index.md" ]]; then
        continue
    fi
    
    echo "🔨 Processing: $filename"
    
    # Extract metadata from markdown file
    title=$(grep -m 1 "^title:" "$md_file" | sed 's/^title: *//')
    date=$(grep -m 1 "^date:" "$md_file" | sed 's/^date: *//')
    author=$(grep -m 1 "^author:" "$md_file" | sed 's/^author: *//')
    description=$(grep -m 1 "^description:" "$md_file" | sed 's/^description: *//')
    tags_line=$(grep -m 1 "^tags:" "$md_file" | sed 's/^tags: *//')
    
    # Extract guest post metadata
    guest_post=$(grep -m 1 "^guest_post:" "$md_file" | sed 's/^guest_post: *//')
    guest_bio=$(grep -m 1 "^guest_bio:" "$md_file" | sed 's/^guest_bio: *//')
    guest_link=$(grep -m 1 "^guest_link:" "$md_file" | sed 's/^guest_link: *//')
    
    # Format tags as comma-separated list for meta tags (for <meta> tags)
    tags_meta=$(echo "$tags_line" | sed 's/\[//g' | sed 's/\]//g')
    
    # Create an ISO date for meta tags
    date_iso="${date}T12:00:00Z"
    
    # Generate HTML tags for each tag and save to a file
    tags_html_file=$(mktemp)
    
    # Extract tags from the [tag1, tag2, ...] format
    if [[ "$tags_line" =~ \[(.*)\] ]]; then
        IFS=', ' read -r -a tag_array <<< "${BASH_REMATCH[1]}"
        for tag in "${tag_array[@]}"; do
            # Remove any quotes around the tag
            tag=$(echo "$tag" | sed 's/^"//g' | sed 's/"$//g' | sed "s/^'//g" | sed "s/'$//g")
            echo -n "<span class=\"text-xs mr-2 mb-2 px-2 py-1 bg-purple-900 bg-opacity-30 rounded-md\">$tag</span>" >> "$tags_html_file"
        done
    fi
    
    # Convert markdown to HTML using pandoc and save to a file
    content_file=$(mktemp)
    
    # Use pandoc with syntax highlighting enabled (remove --no-highlight flag)
    pandoc -f markdown -t html --highlight-style=pygments "$md_file" > "$content_file"
    
    # Process the generated HTML to adjust the HTML structure but preserve highlighting
    final_content_file=$(mktemp)
    
    # Fix code blocks - adjust data attributes while preserving Pandoc's highlighting
    # Use sed to handle the replacements while preserving newlines
    sed -E '
    # For div.sourceCode with pre tags on same line
    s|<div class="sourceCode" id="[^"]+"><pre class="sourceCode ([^"]+)">|<div class="sourceCode"><pre data-prompt="\1" class="sourceCode \1">|g
    
    # For div.sourceCode with pre tags on next line - join lines first
    /<div class="sourceCode"/ {
        N
        s|<div class="sourceCode" id="[^"]+">[\n[:space:]]*<pre[\n[:space:]]+class="sourceCode ([^"]+)">|<div class="sourceCode"><pre data-prompt="\1" class="sourceCode \1">|g
    }
    
    # For inline code blocks
    s|<pre><code class="sourceCode ([^"]+)">|<pre data-prompt="\1"><code class="sourceCode language-\1">|g
    
    # For code blocks without language
    s|<pre><code>|<pre data-prompt="bash"><code class="no-language">|g
    ' "$content_file" > "$final_content_file"
    
    # Process images with captions (standard Markdown images are converted to figures with captions)
    image_content_file=$(mktemp)
    
    # This regex matches the pattern for <p><img src="..." alt="..." /></p> and transforms it to figure with caption
    # It extracts the alt text as the caption and adds it to a figcaption element
    cat "$final_content_file" | sed -E 's|<p><img src="([^"]+)" alt="([^"]+)" /></p>|<figure>\n  <img src="\1" alt="\2" class="w-full rounded-md shadow-lg" />\n  <figcaption>\2</figcaption>\n</figure>|g' > "$image_content_file"
    
    # Replace the content file with the improved version
    mv "$image_content_file" "$final_content_file"
    mv "$final_content_file" "$content_file"
    
    # Copy template to a working file
    working_file=$(mktemp)
    cp "$TEMPLATE_FILE" "$working_file"
    
    # Replace simple placeholders - use | as delimiter to handle slashes in content
    sed -i "s|TITLE_PLACEHOLDER|$title|g" "$working_file"
    sed -i "s|DESCRIPTION_PLACEHOLDER|$description|g" "$working_file"
    sed -i "s|AUTHOR_PLACEHOLDER|$author|g" "$working_file"
    sed -i "s|DATE_PLACEHOLDER|$date|g" "$working_file"
    sed -i "s|DATE_ISO_PLACEHOLDER|$date_iso|g" "$working_file"
    sed -i "s|TAGS_PLACEHOLDER|$tags_meta|g" "$working_file"
    sed -i "s|URL_PLACEHOLDER|$html_filename|g" "$working_file"
    
    # Read the tags HTML and content files
    tags_html=$(cat "$tags_html_file")
    
    # Create a temp file for the final HTML
    final_html=$(mktemp)
    
    # Generate guest post badge and bio HTML
    guest_badge_html=""
    guest_bio_html=""
    
    if [[ "$guest_post" == "true" ]]; then
        guest_badge_html='<span class="guest-post-badge">Guest Post</span>'
        
        if [[ ! -z "$guest_bio" ]]; then
            # Escape special characters in guest bio
            escaped_bio=$(printf '%s' "$guest_bio" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g; s/"/\&quot;/g')
            guest_bio_html="<div class=\"guest-author-bio\"><h3>About the Guest Author</h3><p>$escaped_bio</p>"
            if [[ ! -z "$guest_link" ]]; then
                guest_bio_html+="<p><a href=\"$guest_link\" target=\"_blank\" rel=\"noopener noreferrer\">Learn more about $author →</a></p>"
            fi
            guest_bio_html+="</div>"
        fi
    fi
    
    # Process the working file line by line until we find CONTENT_PLACEHOLDER
    content_found=false
    while IFS= read -r line; do
        if [[ "$line" == *"TAGS_HTML_PLACEHOLDER"* ]]; then
            # This line contains the tags placeholder, replace it
            echo "${line/TAGS_HTML_PLACEHOLDER/$tags_html}" >> "$final_html"
        elif [[ "$line" == *"GUEST_BADGE_PLACEHOLDER"* ]]; then
            # Replace guest badge placeholder
            echo "${line/GUEST_BADGE_PLACEHOLDER/$guest_badge_html}" >> "$final_html"
        elif [[ "$line" == *"GUEST_BIO_PLACEHOLDER"* ]]; then
            # Replace guest bio placeholder
            echo "${line/GUEST_BIO_PLACEHOLDER/$guest_bio_html}" >> "$final_html"
        elif [[ "$line" == *"CONTENT_PLACEHOLDER"* && "$content_found" == false ]]; then
            # Found the content placeholder
            content_found=true
            
            # Insert the entire content from the file
            cat "$content_file" >> "$final_html"
        else
            # Skip lines with CONTENT_PLACEHOLDER if we've already processed it
            if [[ "$line" != *"CONTENT_PLACEHOLDER"* ]]; then
                # Regular line, just copy it
                echo "$line" >> "$final_html"
            fi
        fi
    done < "$working_file"
    
    # Copy the final HTML to the target location
    cp "$final_html" "$BLOG_DIR/$html_filename"
    
    # Clean up temp files
    rm "$tags_html_file" "$content_file" "$working_file" "$final_html"
    
    echo "✅ Generated: $BLOG_DIR/$html_filename"
    
    # Store paths and metadata for index and RSS
    # Store HTML tags for each tag (can't store directly in associative array due to special chars)
    tags_html_file=$(mktemp)
    cat > "$tags_html_file" << EOF
$(cat "$BLOG_DIR/$html_filename" | grep -o '<span class="text-xs mr-2 mb-2.*rounded-md">.*</span>' || echo "")
EOF
    
    posts["$html_filename"]="$title|$date|$description|$tags_html_file|$guest_post|$author"
    post_files+=("$html_filename")
done

# Sort post files by date (assuming YYYY-MM-DD format in filename or metadata)
IFS=$'\n' sorted_files=($(
    for file in "${post_files[@]}"; do
        date=$(echo "${posts[$file]}" | cut -d'|' -f2)
        echo "$date|$file"
    done | sort -r | cut -d'|' -f2
))
unset IFS

# Update blog index.html with post list
if [ -f "$INDEX_FILE" ]; then
    echo "🔄 Updating blog index..."
    
    # Create a temp file for the rebuilt index
    temp_index=$(mktemp)
    
    # Variables to track if we're in the post list section
    in_post_list=false
    
    # Read the index file line by line
    while IFS= read -r line; do
        if [[ "$line" == *"<!-- BLOG_POST_LIST_START -->"* ]]; then
            echo "$line" >> "$temp_index"
            in_post_list=true
            
            # Add all posts
            for file in "${sorted_files[@]}"; do
                post_data="${posts[$file]}"
                IFS='|' read -r title date description tags_html_file is_guest author_name <<< "$post_data"
                
                # Get tags HTML from file
                tags_html=$(cat "$tags_html_file")
                
                # Generate guest post badge for index
                guest_badge=""
                author_text="$author_name"
                if [[ "$is_guest" == "true" ]]; then
                    guest_badge=' <span class="guest-post-badge">Guest Post</span>'
                fi
                
                cat >> "$temp_index" << EOF
        <div class="py-4 border-b border-gray-700 dark:border-gray-600">
          <p class="text-sm text-gray-500 mb-2">$date · by $author_text$guest_badge</p>
          <h2 class="text-xl mb-2">
            <a href="$file" class="text-purple-500 dark:text-purple-400 no-underline transition-colors duration-200 hover:text-black dark:hover:text-white hover:underline">$title</a>
          </h2>
          <p class="text-gray-300 mb-2">$description</p>
          <div class="flex flex-wrap">
            $tags_html
          </div>
        </div>
EOF
            done
        elif [[ "$line" == *"<!-- BLOG_POST_LIST_END -->"* ]]; then
            echo "$line" >> "$temp_index"
            in_post_list=false
        elif [[ "$in_post_list" == false ]]; then
            echo "$line" >> "$temp_index"
        fi
    done < "$INDEX_FILE"
    
    # Replace the original index file
    cp "$temp_index" "$INDEX_FILE"
    rm "$temp_index"
    
    echo "✅ Updated blog index page"
fi

# Update RSS feed with posts
if [ -f "$RSS_FILE" ]; then
    echo "🔄 Updating RSS feed..."
    
    # Create a temp file for the rebuilt RSS
    temp_rss=$(mktemp)
    
    # Variables to track if we're in the posts section
    in_posts=false
    
    # Read the RSS file line by line
    while IFS= read -r line; do
        if [[ "$line" == *"<!-- POSTS_START -->"* ]]; then
            echo "$line" >> "$temp_rss"
            in_posts=true
            
            # Add all posts
            for file in "${sorted_files[@]}"; do
                post_data="${posts[$file]}"
                IFS='|' read -r title date description tags_html_file <<< "$post_data"
                
                # Convert date to RFC822 format for RSS
                rfc_date=$(date -d "$date" -R 2>/dev/null || date -R)
                
                cat >> "$temp_rss" << EOF
  <item>
    <title>$title</title>
    <link>${SITE_URL}/blog/$file</link>
    <description><![CDATA[$description]]></description>
    <pubDate>$rfc_date</pubDate>
    <guid>${SITE_URL}/blog/$file</guid>
  </item>
EOF
            done
        elif [[ "$line" == *"<!-- POSTS_END -->"* ]]; then
            echo "$line" >> "$temp_rss"
            in_posts=false
        elif [[ "$line" == *"<lastBuildDate>"* ]]; then
            # Update the lastBuildDate
            echo "  <lastBuildDate>$(date -R)</lastBuildDate>" >> "$temp_rss"
        elif [[ "$in_posts" == false ]]; then
            echo "$line" >> "$temp_rss"
        fi
    done < "$RSS_FILE"
    
    # Replace the original RSS file
    cp "$temp_rss" "$RSS_FILE"
    rm "$temp_rss"
    
    echo "✅ Updated RSS feed"
fi

# Clean up any remaining temp files
for file in "${post_files[@]}"; do
    post_data="${posts[$file]}"
    IFS='|' read -r title date description tags_html_file <<< "$post_data"
    
    # Remove temp tag file if it exists
    if [[ -f "$tags_html_file" ]]; then
        rm "$tags_html_file"
    fi
done

echo "🎉 Blog build completed successfully!"

# After processing blog posts, process talks
echo "🔄 Building talks index..."

# Check if the index file exists and has the right markers
if ! grep -q "<!-- TALKS_LIST_START -->" "$TALKS_INDEX_FILE"; then
    echo "⚠️ Could not find TALKS_LIST_START marker in $TALKS_INDEX_FILE"
    exit 1
fi

if ! grep -q "<!-- TALKS_LIST_END -->" "$TALKS_INDEX_FILE"; then
    echo "⚠️ Could not find TALKS_LIST_END marker in $TALKS_INDEX_FILE"
    exit 1
fi

# Create a temp file for the rebuilt index
temp_index=$(mktemp)

# Copy everything before the TALKS_LIST_START marker
sed -n '1,/<!-- TALKS_LIST_START -->/p' "$TALKS_INDEX_FILE" > "$temp_index"

# Find markdown files
talk_files=$(find "$TALKS_DIR" -name "*.md" | grep -v "index.md" | grep -v "template.md")
talk_count=$(echo "$talk_files" | wc -l)
echo "📊 Found $talk_count talk markdown files"

# Now manually add our talks content
echo "        <!-- Auto-generated talks list -->" >> "$temp_index"

# Process each markdown file
for md_file in $talk_files; do
    filename=$(basename "$md_file")
    echo "🔨 Processing talk: $filename"
    
    # Extract basic metadata with debug output
    title=$(grep -m 1 "^title:" "$md_file" | sed 's/^title: *//')
    echo "  Title: $title"
    
    date=$(grep -m 1 "^date:" "$md_file" | sed 's/^date: *//')
    echo "  Date: $date"
    
    abstract=$(grep -m 1 "^abstract:" "$md_file" | sed 's/^abstract: *//')
    echo "  Abstract: $abstract"
    
    # Extract and fix slides path
    slides=$(grep -m 1 "^slides:" "$md_file" | sed 's/^slides: *//')
    if [[ "$slides" == "/slides/"* ]]; then
        slides="/talks${slides}"
    fi
    echo "  Slides: $slides"
    
    # Extract recording link
    recording=$(grep -m 1 "^recording:" "$md_file" | sed 's/^recording: *//')
    echo "  Recording: $recording"
    
    # Build links HTML
    links_html=""
    if [[ ! -z "$slides" ]]; then
        links_html+="<a href=\"$slides\" class=\"text-purple-500 dark:text-purple-400 mr-4 no-underline transition-colors duration-200 hover:text-white hover:underline\">Slides</a>"
    fi
    if [[ ! -z "$recording" ]]; then
        links_html+="<a href=\"$recording\" class=\"text-purple-500 dark:text-purple-400 no-underline transition-colors duration-200 hover:text-white hover:underline\">Recording</a>"
    fi
    
    # Debug: show the conferences section
    echo "  Extracting conferences section:"
    sed -n '/^conferences:/,/^---/p' "$md_file"
    
    # Manually extract conferences
    conferences_html=""
    # Find the line number of 'conferences:' in the file
    conf_line=$(grep -n "^conferences:" "$md_file" | cut -d: -f1)
    
    if [[ ! -z "$conf_line" ]]; then
        echo "  Found conferences section at line $conf_line"
        
        # Read the file line by line starting from the conferences line
        in_conferences=false
        current_conf=""
        
        while IFS= read -r line; do
            if [[ "$line" == "conferences:" ]]; then
                in_conferences=true
                continue
            fi
            
            if [[ "$in_conferences" == true ]]; then
                if [[ "$line" == "---" || "$line" == "" ]]; then
                    # End of frontmatter or empty line
                    break
                fi
                
                if [[ "$line" =~ ^[[:space:]]+- ]]; then
                    # Start of a new conference entry
                    echo "    New conference entry: $line"
                    current_conf=""
                fi
                
                if [[ "$line" =~ name: ]]; then
                    conf_name=$(echo "$line" | sed 's/.*name:[[:space:]]*//')
                    echo "    Name: $conf_name"
                    current_conf="$conf_name"
                fi
                
                if [[ "$line" =~ location: ]]; then
                    conf_location=$(echo "$line" | sed 's/.*location:[[:space:]]*//')
                    echo "    Location: $conf_location"
                    current_conf="$current_conf, $conf_location"
                fi
                
                if [[ "$line" =~ date: ]]; then
                    conf_date=$(echo "$line" | sed 's/.*date:[[:space:]]*//')
                    echo "    Date: $conf_date"
                    current_conf="$current_conf ($conf_date)"
                    
                    # Add completed conference to HTML
                    conferences_html+="<div class=\"mb-1\">$current_conf</div>"
                    echo "    Added conference: $current_conf"
                fi
            fi
        done < "$md_file"
    else
        echo "  No conferences section found"
    fi
    
    echo "  Conferences HTML: $conferences_html"
    
    # Write the talk entry to the index file
    cat >> "$temp_index" << EOF
        <div class="py-6 border-b border-gray-700 dark:border-gray-600">
          <h2 class="text-xl font-bold mb-2 text-purple-400">$title</h2>
          <p class="text-gray-300 mb-4">$abstract</p>
          <div class="flex mb-4">
            $links_html
          </div>
          <div class="text-sm text-gray-400">
            <p class="mb-2 font-bold">Presented at:</p>
            $conferences_html
          </div>
        </div>
EOF
    
    echo "✅ Added $title to index"
done

# Copy everything after the TALKS_LIST_END marker
sed -n '/<!-- TALKS_LIST_END -->/,$p' "$TALKS_INDEX_FILE" >> "$temp_index"

# Replace the original index file
cp "$temp_index" "$TALKS_INDEX_FILE"
rm "$temp_index"

echo "✅ Updated talks index page at $TALKS_INDEX_FILE"

# After processing blog posts and talks, generate sitemap
echo "🔄 Generating sitemap..."

# Create a temp file for the sitemap
temp_sitemap=$(mktemp)

# Add XML header and urlset opening tag
cat > "$temp_sitemap" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <!-- Main pages -->
  <url>
    <loc>${SITE_URL}/</loc>
    <changefreq>weekly</changefreq>
    <priority>1.0</priority>
  </url>
  <url>
    <loc>${SITE_URL}/blog/</loc>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>${SITE_URL}/talks/</loc>
    <changefreq>monthly</changefreq>
    <priority>0.9</priority>
  </url>
EOF

# Add blog posts to sitemap
for file in "${sorted_files[@]}"; do
    post_data="${posts[$file]}"
    IFS='|' read -r title date description tags_html_file <<< "$post_data"
    
    # Convert date to W3C format (YYYY-MM-DD) for sitemap
    w3c_date=$(date -d "$date" +%Y-%m-%d 2>/dev/null || echo "$date")
    
    cat >> "$temp_sitemap" << EOF
  <url>
    <loc>${SITE_URL}/blog/$file</loc>
    <lastmod>${w3c_date}</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.8</priority>
  </url>
EOF
done

# Add talks to sitemap if there are any
if [ -n "$talk_files" ]; then
    for md_file in $talk_files; do
        filename=$(basename "$md_file")
        date=$(grep -m 1 "^date:" "$md_file" | sed 's/^date: *//')
        # Convert date to W3C format for sitemap
        w3c_date=$(date -d "$date" +%Y-%m-%d 2>/dev/null || echo "$date")
        
        # Extract slides path for sitemap
        slides=$(grep -m 1 "^slides:" "$md_file" | sed 's/^slides: *//')
        if [[ ! -z "$slides" ]]; then
            if [[ "$slides" == "/slides/"* ]]; then
                slides="/talks${slides}"
            fi
            
            cat >> "$temp_sitemap" << EOF
  <url>
    <loc>${SITE_URL}${slides}</loc>
    <lastmod>${w3c_date}</lastmod>
    <changefreq>yearly</changefreq>
    <priority>0.7</priority>
  </url>
EOF
        fi
    done
fi

# Close the urlset tag
echo "</urlset>" >> "$temp_sitemap"

# Replace the original sitemap file
cp "$temp_sitemap" "$SITEMAP_FILE"
rm "$temp_sitemap"

echo "✅ Generated sitemap at $SITEMAP_FILE"

echo "🎉 All build operations completed successfully!" 