#!/usr/bin/env python3
"""
Update Dashboard Data

This script updates the dashboard data by:
1. Processing all comment data
2. Generating statistics and analysis results
3. Saving the results in a format readable by the dashboard

This is meant to be run on a schedule via cron job.
"""

import os
import sys
import json
import logging
import datetime
from pathlib import Path
import glob
import re
from collections import defaultdict, Counter

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler(Path(__file__).parent / 'dashboard_update.log')
    ]
)
logger = logging.getLogger('dashboard_update')

# Path configuration
PROJECT_ROOT = Path(__file__).resolve().parent.parent
COMMENTS_DIR = PROJECT_ROOT / "data" / "comments"
ANALYSIS_DIR = PROJECT_ROOT / "analysis"
DASHBOARD_DATA_DIR = Path(__file__).resolve().parent / "data"

# Ensure directories exist
DASHBOARD_DATA_DIR.mkdir(parents=True, exist_ok=True)

def load_all_comment_data():
    """Load all comment data from JSON files in the comments directory."""
    logger.info(f"Loading comment data from {COMMENTS_DIR}")
    comment_files = list(COMMENTS_DIR.glob("*.json"))
    
    if not comment_files:
        logger.warning(f"No comment files found in {COMMENTS_DIR}")
        return []
    
    all_articles = []
    for file_path in comment_files:
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                article_data = json.load(f)
                # Add the article ID from the filename
                article_id = re.search(r'(\d+)\.json$', file_path.name)
                if article_id:
                    article_data['id'] = article_id.group(1)
                all_articles.append(article_data)
        except Exception as e:
            logger.error(f"Error loading {file_path}: {e}")
    
    logger.info(f"Loaded {len(all_articles)} articles with comments")
    return all_articles

def load_user_metrics():
    """Load the latest user metrics file."""
    logger.info("Loading user metrics data")
    user_metrics_files = list(ANALYSIS_DIR.glob("user_metrics_*.json"))
    
    if not user_metrics_files:
        logger.warning(f"No user metrics files found in {ANALYSIS_DIR}")
        return {}
    
    # Get the most recent file
    latest_file = max(user_metrics_files, key=os.path.getmtime)
    logger.info(f"Using latest user metrics file: {latest_file}")
    
    try:
        with open(latest_file, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        logger.error(f"Error loading user metrics: {e}")
        return {}

def extract_article_metadata(article_data):
    """Extract metadata from article data."""
    article_id = article_data.get('id', '')
    comments = article_data.get('comments', [])
    
    # Extract basic metadata
    metadata = {
        'id': article_id,
        'title': article_data.get('title', f"Article {article_id}"),
        'date': article_data.get('createdAt', ''),
        'author': article_data.get('authorName', ''),
        'category': article_data.get('category', ''),
        'comments_count': len(comments),
        'replies_count': sum(len(comment.get('replies', [])) for comment in comments),
        'reactions_count': sum(
            sum(reaction.get('count', 0) for reaction in comment.get('reactions', {}).values())
            for comment in comments
        )
    }
    
    return metadata

def generate_article_list(all_articles):
    """Generate a list of articles with metadata for the dashboard."""
    logger.info("Generating article list")
    article_list = []
    
    for article_data in all_articles:
        metadata = extract_article_metadata(article_data)
        article_list.append(metadata)
    
    # Sort by date, newest first
    article_list.sort(key=lambda x: x['date'], reverse=True)
    
    # Save to dashboard data directory
    output_file = DASHBOARD_DATA_DIR / "articles.json"
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(article_list, f, indent=2)
    
    logger.info(f"Saved article list to {output_file}")
    return article_list

def extract_comments_from_articles(all_articles):
    """Extract all comments from articles into a flat list."""
    all_comments = []
    
    for article in all_articles:
        article_id = article.get('id', '')
        article_title = article.get('title', f"Article {article_id}")
        
        for comment in article.get('comments', []):
            comment_data = {
                'id': comment.get('id', ''),
                'article_id': article_id,
                'article_title': article_title,
                'author': comment.get('authorNickname', 'Anonymous'),
                'text': comment.get('body', ''),
                'timestamp': comment.get('createdAt', ''),
                'is_reply': False,
                'reactions': {},
                'replies': []
            }
            
            # Process reactions
            for reaction_type, reaction_data in comment.get('reactions', {}).items():
                comment_data['reactions'][reaction_type] = reaction_data.get('count', 0)
            
            # Process replies
            for reply in comment.get('replies', []):
                reply_data = {
                    'id': reply.get('id', ''),
                    'article_id': article_id,
                    'article_title': article_title,
                    'author': reply.get('authorNickname', 'Anonymous'),
                    'parent_author': comment.get('authorNickname', 'Anonymous'),
                    'text': reply.get('body', ''),
                    'timestamp': reply.get('createdAt', ''),
                    'is_reply': True,
                    'reactions': {}
                }
                
                # Process reply reactions
                for reaction_type, reaction_data in reply.get('reactions', {}).items():
                    reply_data['reactions'][reaction_type] = reaction_data.get('count', 0)
                
                comment_data['replies'].append(reply_data)
                all_comments.append(reply_data)
            
            all_comments.append(comment_data)
    
    return all_comments

def generate_comment_stats(all_articles):
    """Generate comment statistics for the dashboard."""
    logger.info("Generating comment statistics")
    
    # Extract all comments
    all_comments = extract_comments_from_articles(all_articles)
    
    # Count total comments and replies
    total_comments = len([c for c in all_comments if not c.get('is_reply', False)])
    total_replies = len([c for c in all_comments if c.get('is_reply', False)])
    
    # Count reactions by type
    reaction_counts = defaultdict(int)
    for comment in all_comments:
        for reaction_type, count in comment.get('reactions', {}).items():
            reaction_counts[reaction_type] += count
    
    # User statistics
    user_comment_counts = Counter([comment.get('author', 'Anonymous') for comment in all_comments])
    top_users = [
        {'username': username, 'comments_count': count}
        for username, count in user_comment_counts.most_common(10)
    ]
    
    # Recent comments (last 20)
    recent_comments = sorted(
        all_comments, 
        key=lambda x: x.get('timestamp', ''), 
        reverse=True
    )[:20]
    
    # Reaction distribution
    reaction_stats = dict(reaction_counts)
    
    # Create summary data
    summary_data = {
        'total_comments': total_comments,
        'total_replies': total_replies,
        'total_reactions': sum(reaction_counts.values()),
        'total_interactions': total_comments + total_replies + sum(reaction_counts.values()),
        'unique_users': len(user_comment_counts),
        'top_users': top_users,
        'reaction_stats': reaction_stats
    }
    
    # Save to dashboard data directory
    output_file = DASHBOARD_DATA_DIR / "comment_stats.json"
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(summary_data, f, indent=2)
    
    # Save recent comments
    recent_file = DASHBOARD_DATA_DIR / "recent_comments.json"
    with open(recent_file, 'w', encoding='utf-8') as f:
        json.dump(recent_comments, f, indent=2)
    
    logger.info(f"Saved comment statistics to {output_file}")
    return summary_data

def process_user_data(user_metrics, all_comments):
    """Process user data for the dashboard."""
    logger.info("Processing user data")
    
    # Organize comments by user
    comments_by_user = defaultdict(list)
    for comment in all_comments:
        comments_by_user[comment.get('author', 'Anonymous')].append(comment)
    
    # Calculate suspicious users (those with bot score >= 70)
    suspicious_users = []
    bot_score_distribution = [0, 0, 0, 0, 0] # 0-20, 21-40, 41-60, 61-80, 81-100
    bot_score_detailed = [0] * 10 # 0-10, 11-20, ..., 91-100
    
    for username, metrics in user_metrics.items():
        bot_score = metrics.get('bot_score', 0)
        
        # Update distribution counts
        if 0 <= bot_score <= 20:
            bot_score_distribution[0] += 1
            bot_score_detailed[0 if bot_score <= 10 else 1] += 1
        elif 21 <= bot_score <= 40:
            bot_score_distribution[1] += 1
            bot_score_detailed[2 if bot_score <= 30 else 3] += 1
        elif 41 <= bot_score <= 60:
            bot_score_distribution[2] += 1
            bot_score_detailed[4 if bot_score <= 50 else 5] += 1
        elif 61 <= bot_score <= 80:
            bot_score_distribution[3] += 1
            bot_score_detailed[6 if bot_score <= 70 else 7] += 1
        elif 81 <= bot_score <= 100:
            bot_score_distribution[4] += 1
            bot_score_detailed[8 if bot_score <= 90 else 9] += 1
        
        # Check if suspicious (bot score >= 70)
        if bot_score >= 70:
            comments = comments_by_user.get(username, [])
            user_data = {
                'username': username,
                'bot_score': bot_score,
                'comments_count': metrics.get('comments_count', 0),
                'comments_per_day': metrics.get('comments_per_day', 0),
                'articles_count': metrics.get('articles_count', 0),
                'first_seen': min([c.get('timestamp', '') for c in comments]) if comments else '',
                'last_seen': max([c.get('timestamp', '') for c in comments]) if comments else ''
            }
            suspicious_users.append(user_data)
    
    # Sort suspicious users by bot score (highest first)
    suspicious_users.sort(key=lambda x: x['bot_score'], reverse=True)
    
    # Save user data to dashboard
    user_data_file = DASHBOARD_DATA_DIR / "user_data.json"
    with open(user_data_file, 'w', encoding='utf-8') as f:
        json.dump({
            'total_users': len(user_metrics),
            'suspicious_users': suspicious_users,
            'bot_score_distribution': bot_score_distribution,
            'bot_score_detailed_distribution': bot_score_detailed
        }, f, indent=2)
    
    logger.info(f"Saved user data to {user_data_file}")
    return {
        'total_users': len(user_metrics),
        'suspicious_users': suspicious_users,
        'bot_score_distribution': bot_score_distribution
    }

def update_dashboard_data():
    """Main function to update all dashboard data."""
    logger.info("Starting dashboard data update")
    
    # Load all comment data
    all_articles = load_all_comment_data()
    if not all_articles:
        logger.error("No article data found, aborting update")
        return False
    
    # Generate flat list of comments
    all_comments = extract_comments_from_articles(all_articles)
    
    # Generate article list
    article_list = generate_article_list(all_articles)
    
    # Generate comment statistics
    comment_stats = generate_comment_stats(all_articles)
    
    # Load user metrics and process user data
    user_metrics = load_user_metrics()
    if user_metrics:
        user_data = process_user_data(user_metrics, all_comments)
    else:
        logger.warning("No user metrics data found")
    
    # Create dashboard status file with timestamp
    status_file = DASHBOARD_DATA_DIR / "status.json"
    with open(status_file, 'w', encoding='utf-8') as f:
        json.dump({
            'last_updated': datetime.datetime.now().isoformat(),
            'article_count': len(article_list),
            'total_comments': comment_stats['total_comments'],
            'total_replies': comment_stats['total_replies'],
            'total_interactions': comment_stats['total_interactions'],
            'unique_users': comment_stats['unique_users'],
            'version': '1.0.0'
        }, f, indent=2)
    
    logger.info(f"Dashboard data update completed. Status saved to {status_file}")
    return True

if __name__ == "__main__":
    try:
        success = update_dashboard_data()
        sys.exit(0 if success else 1)
    except Exception as e:
        logger.error(f"Error updating dashboard data: {e}", exc_info=True)
        sys.exit(1) 