#!/usr/bin/env python3
"""
20min.ch Comment Analysis Dashboard

A web-based dashboard for visualizing comment data and user analysis results.
"""

import os
import json
import logging
import datetime
import subprocess
from pathlib import Path
from flask import Flask, render_template, request, jsonify, redirect, url_for, flash
from collections import defaultdict, Counter

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("dashboard.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Initialize Flask app
app = Flask(__name__)

# Enable sessions
app.secret_key = os.environ.get('SECRET_KEY', 'default-secret-key-for-development')

# Directories
DATA_DIR = Path("../data")
ANALYSIS_DIR = Path("../analysis")

# Template functions
@app.template_filter('getBotScoreClass')
def get_bot_score_class(score):
    """Return the appropriate CSS class for a bot score."""
    if score >= 80:
        return "bg-danger"  # High risk - red
    elif score >= 60:
        return "bg-warning text-dark"  # Medium risk - orange/yellow
    elif score >= 40:
        return "bg-info"  # Low risk - blue
    else:
        return "bg-success"  # No risk - green

def get_recent_analyses():
    """Get list of recent analysis files sorted by date (newest first)."""
    analysis_files = list(ANALYSIS_DIR.glob("analysis_*.json"))
    analysis_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
    return analysis_files

def get_user_analyses():
    """Get list of user analysis files sorted by date (newest first)."""
    analysis_files = list(ANALYSIS_DIR.glob("user_metrics_*.json"))
    analysis_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
    return analysis_files

def get_suspicious_users():
    """Get list of suspicious user reports sorted by date (newest first)."""
    analysis_files = list(ANALYSIS_DIR.glob("suspicious_users_*.json"))
    analysis_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
    return analysis_files

def count_articles_with_comments():
    """Count the number of articles with comments."""
    return len(list(DATA_DIR.glob("*.json")))

def load_recent_comments(limit=50):
    """Load the most recent comments across all articles."""
    recent_comments = []
    
    # Get all comment files sorted by modification time (newest first)
    comment_files = list(DATA_DIR.glob("*.json"))
    comment_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
    
    for file_path in comment_files[:10]:  # Limit to 10 most recent files for performance
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            
            article_id = file_path.stem.split('_')[-1]
            article_title = ' '.join(file_path.stem.split('_')[:-1]).replace('_', ' ')
            
            # Process each comment
            for comment in data.get("comments", []):
                timestamp = comment.get("createdAt")
                if not timestamp:
                    continue
                    
                # Convert to datetime object for sorting
                try:
                    dt = datetime.datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
                except:
                    continue
                
                # Count reactions
                reaction_count = sum(comment.get("reactions", {}).values())
                
                # Add to our list
                recent_comments.append({
                    "text": comment.get("body", ""),
                    "author": comment.get("authorNickname", "Anonymous"),
                    "timestamp": timestamp,
                    "datetime": dt,
                    "article_id": article_id,
                    "article_title": article_title,
                    "reaction_count": reaction_count
                })
                
                # Also add any replies
                for reply in comment.get("replies", []):
                    reply_timestamp = reply.get("createdAt")
                    if not reply_timestamp:
                        continue
                        
                    try:
                        reply_dt = datetime.datetime.fromisoformat(reply_timestamp.replace('Z', '+00:00'))
                    except:
                        continue
                    
                    # Count reactions for reply
                    reply_reaction_count = sum(reply.get("reactions", {}).values())
                    
                    # Add to our list
                    recent_comments.append({
                        "text": reply.get("body", ""),
                        "author": reply.get("authorNickname", "Anonymous"),
                        "timestamp": reply_timestamp,
                        "datetime": reply_dt,
                        "article_id": article_id,
                        "article_title": article_title,
                        "is_reply": True,
                        "parent_author": comment.get("authorNickname", "Anonymous"),
                        "reaction_count": reply_reaction_count
                    })
                    
                # Stop if we've reached our limit
                if len(recent_comments) >= limit * 2:  # Get more than we need for sorting
                    break
                    
        except Exception as e:
            logger.error(f"Error loading comments from {file_path}: {e}")
    
    # Sort by datetime and take the most recent ones
    recent_comments.sort(key=lambda x: x["datetime"], reverse=True)
    return recent_comments[:limit]

def calculate_user_stats():
    """Calculate user statistics across all comments."""
    user_stats = defaultdict(lambda: {
        "comment_count": 0,
        "reply_count": 0,
        "articles": set(),
        "reactions_received": 0
    })
    
    # Process all comment files
    for file_path in DATA_DIR.glob("*.json"):
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
            article_id = file_path.stem.split('_')[-1]
            
            # Process each comment
            for comment in data.get("comments", []):
                author = comment.get("authorNickname")
                if not author:
                    continue
                    
                # Update stats for this user
                user_stats[author]["comment_count"] += 1
                user_stats[author]["articles"].add(article_id)
                user_stats[author]["reactions_received"] += sum(comment.get("reactions", {}).values())
                
                # Process replies
                for reply in comment.get("replies", []):
                    reply_author = reply.get("authorNickname")
                    if not reply_author:
                        continue
                        
                    # Update stats for reply author
                    user_stats[reply_author]["reply_count"] += 1
                    user_stats[reply_author]["articles"].add(article_id)
                    user_stats[reply_author]["reactions_received"] += sum(reply.get("reactions", {}).values())
                    
        except Exception as e:
            logger.error(f"Error processing {file_path}: {e}")
    
    # Convert for final output
    result = []
    for user, stats in user_stats.items():
        result.append({
            "username": user,
            "comment_count": stats["comment_count"],
            "reply_count": stats["reply_count"],
            "total_count": stats["comment_count"] + stats["reply_count"],
            "article_count": len(stats["articles"]),
            "reactions_received": stats["reactions_received"]
        })
    
    # Sort by total count
    result.sort(key=lambda x: x["total_count"], reverse=True)
    return result

def calculate_reaction_stats():
    """Calculate reaction statistics across all comments."""
    reaction_counts = Counter()
    
    # Process all comment files
    for file_path in DATA_DIR.glob("*.json"):
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            
            # Process each comment
            for comment in data.get("comments", []):
                # Count each reaction type
                for reaction_type, count in comment.get("reactions", {}).items():
                    reaction_counts[reaction_type] += count
                
                # Process replies
                for reply in comment.get("replies", []):
                    for reaction_type, count in reply.get("reactions", {}).items():
                        reaction_counts[reaction_type] += count
                        
        except Exception as e:
            logger.error(f"Error processing {file_path}: {e}")
    
    return dict(reaction_counts)

def load_single_analysis(filename):
    """Load a single analysis file by filename."""
    file_path = ANALYSIS_DIR / filename
    if not file_path.exists():
        return None
        
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        logger.error(f"Error loading analysis {file_path}: {e}")
        return None

def find_bot_like_users():
    """Find users exhibiting bot-like behavior from user metrics."""
    metrics_files = get_user_analyses()
    if not metrics_files:
        return []
        
    # Use the most recent analysis
    file_path = metrics_files[0]
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            metrics = json.load(f)
            
        # Extract users with high bot scores
        suspicious = []
        for username, user_metrics in metrics.items():
            if user_metrics.get("bot_score", 0) >= 70:  # Threshold for suspicion
                suspicious.append({
                    "username": username,
                    "bot_score": user_metrics["bot_score"],
                    "comments_count": user_metrics.get("comments_count", 0),
                    "comments_per_day": user_metrics.get("comments_per_day", 0),
                    "similarity": user_metrics.get("avg_similarity", 0)
                })
                
        # Sort by bot score
        suspicious.sort(key=lambda x: x["bot_score"], reverse=True)
        return suspicious
        
    except Exception as e:
        logger.error(f"Error loading user metrics from {file_path}: {e}")
        return []

@app.route('/')
def index():
    """Display the dashboard homepage."""
    # Get basic stats
    article_count = count_articles_with_comments()
    
    # Get most recent analysis
    analyses = get_recent_analyses()
    latest_analysis = None
    if analyses:
        try:
            with open(analyses[0], 'r', encoding='utf-8') as f:
                latest_analysis = json.load(f)
        except Exception as e:
            logger.error(f"Error loading analysis {analyses[0]}: {e}")
    
    # Get recent comments
    recent_comments = load_recent_comments(10)
    
    # Get user stats
    top_users = calculate_user_stats()[:10]  # Top 10 users
    
    # Get reaction stats
    reaction_stats = calculate_reaction_stats()
    
    # Get bot-like users
    suspicious_users = find_bot_like_users()[:5]  # Top 5 suspicious users
    
    # Render the template
    return render_template(
        'index.html',
        article_count=article_count,
        latest_analysis=latest_analysis,
        recent_comments=recent_comments,
        top_users=top_users,
        reaction_stats=reaction_stats,
        suspicious_users=suspicious_users
    )

@app.route('/articles')
def articles():
    """Display list of articles with comments."""
    # Get all comment files
    comment_files = list(DATA_DIR.glob("*.json"))
    articles = []
    
    for file_path in comment_files:
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
            article_id = file_path.stem.split('_')[-1]
            article_title = ' '.join(file_path.stem.split('_')[:-1]).replace('_', ' ')
            
            if not article_title.strip():
                article_title = f"Article {article_id}"
            
            comments = data.get("comments", [])
            comment_count = len(comments)
            
            # Count replies
            reply_count = 0
            for comment in comments:
                reply_count += len(comment.get("replies", []))
            
            # Count reactions
            reaction_count = 0
            for comment in comments:
                reaction_count += sum(comment.get("reactions", {}).values())
                for reply in comment.get("replies", []):
                    reaction_count += sum(reply.get("reactions", {}).values())
            
            articles.append({
                "id": article_id,
                "title": article_title,
                "comments_count": comment_count,
                "replies_count": reply_count,
                "reactions_count": reaction_count,
                "date": data.get("createdAt", ""),
                "modified": datetime.datetime.fromtimestamp(file_path.stat().st_mtime)
            })
                
        except Exception as e:
            logger.error(f"Error processing {file_path}: {e}")
    
    # Sort by modification time (newest first)
    articles.sort(key=lambda x: x["modified"], reverse=True)
    
    # Process query parameters
    search = request.args.get('search', '').lower()
    sort = request.args.get('sort', 'date_desc')
    
    # Filter by search term if provided
    if search:
        articles = [a for a in articles if search in a["title"].lower() or search in a["id"].lower()]
    
    # Sort based on parameter
    if sort == 'date_asc':
        articles.sort(key=lambda x: x["modified"])
    elif sort == 'comments_desc':
        articles.sort(key=lambda x: x["comments_count"], reverse=True)
    elif sort == 'comments_asc':
        articles.sort(key=lambda x: x["comments_count"])
    # Default is date_desc, already sorted
    
    # Add current datetime for the template
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    return render_template('articles.html', articles=articles, now=now)

@app.route('/article/<article_id>')
def article_detail(article_id):
    """Display detailed information about a specific article and its comments."""
    # Find the article file
    article_files = list(DATA_DIR.glob(f"*_{article_id}.json"))
    
    if not article_files:
        # Try with just the ID
        article_files = list(DATA_DIR.glob(f"article_{article_id}.json"))
        
    if not article_files:
        # Article not found
        flash(f"Article with ID {article_id} not found", "error")
        return redirect(url_for('articles'))
    
    file_path = article_files[0]
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            article_data = json.load(f)
            
        # Extract article information
        article_title = ' '.join(file_path.stem.split('_')[:-1]).replace('_', ' ')
        if not article_title:
            article_title = f"Article {article_id}"
            
        comments = article_data.get("comments", [])
        
        # Calculate comment counts and reaction stats
        comment_count = len(comments)
        reply_count = sum(len(comment.get("replies", [])) for comment in comments)
        
        # Count all reaction types
        article_reactions = defaultdict(int)
        for comment in comments:
            for reaction_type, count in comment.get("reactions", {}).items():
                article_reactions[reaction_type] += count
                
            # Include reactions from replies
            for reply in comment.get("replies", []):
                for reaction_type, count in reply.get("reactions", {}).items():
                    article_reactions[reaction_type] += count
        
        # Create a timeline of comment activity
        comment_times = []
        for comment in comments:
            time_str = comment.get("createdAt", "")
            if time_str:
                try:
                    dt = datetime.datetime.fromisoformat(time_str.replace('Z', '+00:00'))
                    comment_times.append(dt)
                except:
                    pass
                    
            # Include reply times
            for reply in comment.get("replies", []):
                reply_time_str = reply.get("createdAt", "")
                if reply_time_str:
                    try:
                        reply_dt = datetime.datetime.fromisoformat(reply_time_str.replace('Z', '+00:00'))
                        comment_times.append(reply_dt)
                    except:
                        pass
        
        # Sort times and group by hour for the timeline
        comment_times.sort()
        timeline = {"labels": [], "comments": []}
        if comment_times:
            # Get the time range
            first_time = comment_times[0]
            last_time = comment_times[-1]
            
            # Create hourly buckets if the timespan is more than 6 hours
            time_diff = (last_time - first_time).total_seconds()
            if time_diff > 6 * 3600:
                # Group by hour
                hour_counts = defaultdict(int)
                for dt in comment_times:
                    hour_key = dt.strftime("%Y-%m-%d %H:00")
                    hour_counts[hour_key] += 1
                
                # Convert to sorted lists for the chart
                sorted_hours = sorted(hour_counts.keys())
                timeline["labels"] = sorted_hours
                timeline["comments"] = [hour_counts[hour] for hour in sorted_hours]
            else:
                # Use 10-minute intervals for shorter timespans
                minute_counts = defaultdict(int)
                for dt in comment_times:
                    # Round to nearest 10 minutes
                    minute_key = dt.strftime("%H:%M")
                    minute_counts[minute_key] += 1
                
                # Convert to sorted lists for the chart
                sorted_minutes = sorted(minute_counts.keys())
                timeline["labels"] = sorted_minutes
                timeline["comments"] = [minute_counts[minute] for minute in sorted_minutes]
        
        # Get user engagement data
        user_engagement = {"labels": [], "comments": [], "reactions": []}
        user_comments = defaultdict(int)
        user_reactions = defaultdict(int)
        
        for comment in comments:
            author = comment.get("authorNickname", "Anonymous")
            user_comments[author] += 1
            
            # Count reactions received
            for _, count in comment.get("reactions", {}).items():
                user_reactions[author] += count
            
            # Process replies
            for reply in comment.get("replies", []):
                reply_author = reply.get("authorNickname", "Anonymous")
                user_comments[reply_author] += 1
                
                # Count reactions received on replies
                for _, count in reply.get("reactions", {}).items():
                    user_reactions[reply_author] += count
        
        # Sort by comment count and take top 10
        top_users = sorted(user_comments.items(), key=lambda x: x[1], reverse=True)[:10]
        user_engagement["labels"] = [user for user, _ in top_users]
        user_engagement["comments"] = [count for _, count in top_users]
        user_engagement["reactions"] = [user_reactions[user] for user, _ in top_users]
        
        # Calculate bot score distribution
        bot_score_distribution = [0, 0, 0, 0, 0]  # 0-20, 21-40, 41-60, 61-80, 81-100
        
        # Get user metrics if available
        user_metrics_files = get_user_analyses()
        if user_metrics_files:
            try:
                with open(user_metrics_files[0], 'r', encoding='utf-8') as f:
                    user_metrics = json.load(f)
                    
                # Calculate bot score distribution for users in this article
                article_users = set()
                for comment in comments:
                    author = comment.get("authorNickname", "Anonymous")
                    if author != "Anonymous":
                        article_users.add(author)
                    
                    # Include reply authors
                    for reply in comment.get("replies", []):
                        reply_author = reply.get("authorNickname", "Anonymous")
                        if reply_author != "Anonymous":
                            article_users.add(reply_author)
                
                # Get top users for this article with their metrics
                top_users = []
                for user in article_users:
                    if user in user_metrics:
                        metrics = user_metrics[user]
                        top_users.append({
                            "username": user,
                            "comments_count": metrics.get("comments_count", 0),
                            "replies_count": 0,  # We'll calculate this
                            "bot_score": metrics.get("bot_score", 0),
                            "reactions_received": sum(user_reactions.get(user, 0))
                        })
                
                # Sort by bot score (highest first)
                top_users.sort(key=lambda x: x["bot_score"], reverse=True)
                
                # Calculate bot score distribution
                for user in article_users:
                    if user in user_metrics:
                        bot_score = user_metrics[user].get("bot_score", 0)
                        if 0 <= bot_score <= 20:
                            bot_score_distribution[0] += 1
                        elif 21 <= bot_score <= 40:
                            bot_score_distribution[1] += 1
                        elif 41 <= bot_score <= 60:
                            bot_score_distribution[2] += 1
                        elif 61 <= bot_score <= 80:
                            bot_score_distribution[3] += 1
                        elif 81 <= bot_score <= 100:
                            bot_score_distribution[4] += 1
            except Exception as e:
                logger.error(f"Error loading user metrics: {e}")
                top_users = []
        else:
            top_users = []
        
        article = {
            "id": article_id,
            "title": article_title,
            "date": article_data.get("createdAt", ""),
            "author": article_data.get("authorName", ""),
            "category": article_data.get("category", ""),
            "comments_count": comment_count,
            "replies_count": reply_count,
            "reactions_count": sum(article_reactions.values())
        }
        
        return render_template(
            'article_detail.html',
            article=article,
            comments=comments,
            article_reactions=article_reactions,
            timeline=timeline,
            user_engagement=user_engagement,
            top_users=top_users,
            bot_score_distribution=bot_score_distribution
        )
        
    except Exception as e:
        logger.error(f"Error processing article {article_id}: {e}")
        flash(f"Error loading article: {e}", "error")
        return redirect(url_for('articles'))

@app.route('/users')
def users():
    """Display list of users and their activity."""
    user_stats = calculate_user_stats()
    
    # Get suspicious users
    suspicious_users = find_bot_like_users()
    suspicious_usernames = {user["username"] for user in suspicious_users}
    
    return render_template(
        'users.html', 
        users=user_stats, 
        suspicious_usernames=suspicious_usernames
    )

@app.route('/user/<username>')
def user_detail(username):
    """Display detailed information about a user."""
    # Get latest user metrics
    metrics_files = get_user_analyses()
    user_metrics = None
    
    if metrics_files:
        try:
            with open(metrics_files[0], 'r', encoding='utf-8') as f:
                metrics = json.load(f)
                user_metrics = metrics.get(username)
        except Exception as e:
            logger.error(f"Error loading user metrics: {e}")
    
    # Get recent comments by this user
    recent_comments = []
    
    # Process all comment files
    for file_path in DATA_DIR.glob("*.json"):
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
            article_id = file_path.stem.split('_')[-1]
            article_title = ' '.join(file_path.stem.split('_')[:-1]).replace('_', ' ')
            
            # Find comments by this user
            for comment in data.get("comments", []):
                if comment.get("authorNickname") == username:
                    # Convert timestamp to datetime
                    timestamp = comment.get("createdAt")
                    try:
                        dt = datetime.datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
                    except:
                        dt = datetime.datetime.now()
                    
                    recent_comments.append({
                        "text": comment.get("body", ""),
                        "timestamp": timestamp,
                        "datetime": dt,
                        "article_id": article_id,
                        "article_title": article_title,
                        "reaction_count": sum(comment.get("reactions", {}).values()),
                        "reactions": comment.get("reactions", {})
                    })
            
            # Find replies by this user
            for comment in data.get("comments", []):
                for reply in comment.get("replies", []):
                    if reply.get("authorNickname") == username:
                        # Convert timestamp to datetime
                        timestamp = reply.get("createdAt")
                        try:
                            dt = datetime.datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
                        except:
                            dt = datetime.datetime.now()
                        
                        recent_comments.append({
                            "text": reply.get("body", ""),
                            "timestamp": timestamp,
                            "datetime": dt,
                            "article_id": article_id,
                            "article_title": article_title,
                            "is_reply": True,
                            "parent_author": comment.get("authorNickname", "Anonymous"),
                            "reaction_count": sum(reply.get("reactions", {}).values()),
                            "reactions": reply.get("reactions", {})
                        })
                
        except Exception as e:
            logger.error(f"Error processing {file_path} for user {username}: {e}")
    
    # Sort by datetime
    recent_comments.sort(key=lambda x: x["datetime"], reverse=True)
    
    return render_template(
        'user_detail.html',
        username=username,
        user_metrics=user_metrics,
        recent_comments=recent_comments
    )

@app.route('/bot-detection')
def bot_detection():
    """Display bot detection and user pattern analysis results."""
    # Get suspicious users
    suspicious_users = find_bot_like_users()
    
    # Get coordinated groups
    coordinated_groups = []
    group_files = list(ANALYSIS_DIR.glob("coordinated_groups_*.json"))
    if group_files:
        try:
            with open(group_files[0], 'r', encoding='utf-8') as f:
                coordinated_groups = json.load(f)
        except Exception as e:
            logger.error(f"Error loading coordinated groups: {e}")
    
    # Initialize variables
    total_users = 0
    potential_bots = 0
    suspicious_groups = len(coordinated_groups)
    high_activity_users = 0
    high_activity_pct = 0
    bot_score_distribution = [0, 0, 0, 0, 0]  # 0-20, 21-40, 41-60, 61-80, 81-100
    bot_score_detailed_distribution = [0] * 10  # 0-10, 11-20, ..., 91-100
    potential_bot_accounts = []
    
    # Get user metrics if available
    user_metrics = {}
    user_metrics_files = get_user_analyses()
    if user_metrics_files:
        try:
            with open(user_metrics_files[0], 'r', encoding='utf-8') as f:
                user_metrics = json.load(f)
                total_users = len(user_metrics)
                
            # Calculate bot score distributions
            for username, metrics in user_metrics.items():
                bot_score = metrics.get("bot_score", 0)
                
                # Update distribution counts
                if 0 <= bot_score <= 20:
                    bot_score_distribution[0] += 1
                    bot_score_detailed_distribution[0 if bot_score <= 10 else 1] += 1
                elif 21 <= bot_score <= 40:
                    bot_score_distribution[1] += 1
                    bot_score_detailed_distribution[2 if bot_score <= 30 else 3] += 1
                elif 41 <= bot_score <= 60:
                    bot_score_distribution[2] += 1
                    bot_score_detailed_distribution[4 if bot_score <= 50 else 5] += 1
                elif 61 <= bot_score <= 80:
                    bot_score_distribution[3] += 1
                    bot_score_detailed_distribution[6 if bot_score <= 70 else 7] += 1
                elif 81 <= bot_score <= 100:
                    bot_score_distribution[4] += 1
                    bot_score_detailed_distribution[8 if bot_score <= 90 else 9] += 1
                
                # Count high activity users
                if metrics.get("comments_per_day", 0) > 50:
                    high_activity_users += 1
                
                # Create potential bot accounts list
                if metrics.get("bot_score", 0) >= 70:
                    potential_bot_accounts.append({
                        "username": username,
                        "bot_score": metrics.get("bot_score", 0),
                        "comments_count": metrics.get("comments_count", 0),
                        "comments_per_day": metrics.get("comments_per_day", 0),
                        "active_hours": len(metrics.get("hour_distribution", {})),
                        "similar_comments_count": metrics.get("similar_comments", 0)
                    })
            
            # Sort potential bot accounts by bot score (highest first)
            potential_bot_accounts.sort(key=lambda x: x["bot_score"], reverse=True)
            
            # Calculate statistics
            potential_bots = len([u for u in suspicious_users if u.get("bot_score", 0) >= 70])
            high_activity_pct = round((high_activity_users / total_users) * 100) if total_users > 0 else 0
                
        except Exception as e:
            logger.error(f"Error processing user metrics: {e}")
    
    return render_template(
        'bot_detection.html',
        suspicious_users=suspicious_users,
        coordinated_groups=coordinated_groups,
        total_users=total_users,
        potential_bots=potential_bots,
        suspicious_groups=suspicious_groups,
        high_activity_pct=high_activity_pct,
        bot_score_distribution=bot_score_distribution,
        bot_score_detailed_distribution=bot_score_detailed_distribution,
        potential_bot_accounts=potential_bot_accounts
    )

@app.route('/analysis')
def analysis():
    """Display analysis results."""
    # Get all analysis files
    analysis_files = get_recent_analyses()
    user_analyses = get_user_analyses()
    suspicious_users = get_suspicious_users()
    
    analyses = []
    for file_path in analysis_files:
        analyses.append({
            "filename": file_path.name,
            "timestamp": datetime.datetime.fromtimestamp(file_path.stat().st_mtime),
            "size": file_path.stat().st_size
        })
    
    user_metrics = []
    for file_path in user_analyses:
        user_metrics.append({
            "filename": file_path.name,
            "timestamp": datetime.datetime.fromtimestamp(file_path.stat().st_mtime),
            "size": file_path.stat().st_size
        })
    
    suspicious = []
    for file_path in suspicious_users:
        suspicious.append({
            "filename": file_path.name,
            "timestamp": datetime.datetime.fromtimestamp(file_path.stat().st_mtime),
            "size": file_path.stat().st_size
        })
    
    return render_template(
        'analysis.html',
        analyses=analyses,
        user_metrics=user_metrics,
        suspicious=suspicious
    )

@app.route('/analysis/<filename>')
def analysis_detail(filename):
    """Display detailed view of a specific analysis file."""
    analysis_data = load_single_analysis(filename)
    if not analysis_data:
        return redirect(url_for('analysis'))
    
    return render_template(
        'analysis_detail.html',
        filename=filename,
        analysis=analysis_data
    )

@app.route('/fetch-new')
def fetch_new():
    """Fetch new articles and comments."""
    try:
        # Get the path to the comment_fetcher.py script
        comment_fetcher_path = Path(__file__).resolve().parent.parent / "comment_fetcher.py"
        
        # Ensure it's executable
        if not os.access(comment_fetcher_path, os.X_OK):
            os.chmod(comment_fetcher_path, 0o755)
        
        # Run the script to fetch new comments
        cmd = [str(comment_fetcher_path), "--all", "--output-dir", str(DATA_DIR)]
        subprocess.run(cmd, check=True)
        
        # Flash a success message
        flash("Successfully fetched new articles and comments", "success")
    except Exception as e:
        logger.error(f"Error fetching new articles: {e}")
        flash(f"Error fetching new articles: {e}", "error")
    
    # Redirect back to the articles page
    return redirect(url_for('articles'))

@app.route('/api/reaction-stats')
def api_reaction_stats():
    """API endpoint to get reaction statistics for charts."""
    return jsonify(calculate_reaction_stats())

@app.route('/api/user-stats')
def api_user_stats():
    """API endpoint to get user statistics for charts."""
    user_stats = calculate_user_stats()[:20]  # Top 20 users
    
    # Format for chart display
    chart_data = {
        "labels": [user["username"] for user in user_stats],
        "datasets": [
            {
                "label": "Comments",
                "data": [user["comment_count"] for user in user_stats]
            },
            {
                "label": "Replies",
                "data": [user["reply_count"] for user in user_stats]
            }
        ]
    }
    
    return jsonify(chart_data)

if __name__ == '__main__':
    # Create necessary directories
    DATA_DIR.mkdir(exist_ok=True)
    ANALYSIS_DIR.mkdir(exist_ok=True)
    
    # Make sure the templates directory exists
    templates_dir = Path('templates')
    templates_dir.mkdir(exist_ok=True)
    
    # Make sure the static directory exists
    static_dir = Path('static')
    static_dir.mkdir(exist_ok=True)
    
    # Start the Flask app
    app.run(debug=True, host='0.0.0.0', port=5000)