#!/usr/bin/env python3
"""
Script to fetch comments from 20min.ch articles using their API endpoints.
Bypasses the need for web scraping by directly accessing the comment API.
"""

import requests
import json
import logging
import argparse
import time
import sys
import datetime
from pathlib import Path

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('comment_fetcher.log')
    ]
)

# API endpoints
COMMENTS_API = "https://api.20min.ch/comment/v1/comments"
COMMENT_REACTIONS_API = "https://api.20min.ch/comment/v2/reactions"
USER_REACTIONS_API = "https://api.20min.ch/comment/v1/user-reactions"

# Headers to mimic a browser request
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept": "application/json",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.20min.ch/",
    "Origin": "https://www.20min.ch"
}

def fetch_comments(article_id, tenant_id=6):
    """Fetch comments for a specific article"""
    logging.info(f"Fetching comments for article ID: {article_id}")
    
    params = {
        "contentId": article_id,
        "tenantId": tenant_id,
        "offset": 0,
        "limit": 500  # Adjust as needed to get all comments
    }
    
    logging.info(f"Requesting comments from: {COMMENTS_API}")
    response = requests.get(COMMENTS_API, params=params, headers=HEADERS)
    
    if response.status_code == 200:
        comments_data = response.json()
        logging.info(f"Successfully fetched {len(comments_data.get('comments', []))} comments")
        return comments_data
    else:
        logging.error(f"Failed to fetch comments: {response.status_code} - {response.text}")
        return None

def format_datetime(dt_str):
    """Format datetime string to a more readable format"""
    if not dt_str:
        return "Unknown date"
    
    try:
        # Parse ISO format datetime
        dt = datetime.datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
        # Format to a more readable string
        return dt.strftime('%Y-%m-%d %H:%M:%S')
    except Exception as e:
        logging.error(f"Error formatting datetime: {e}")
        return dt_str

def format_reply(reply):
    """Format a reply for display"""
    author = reply.get("authorNickname", "Anonymous")
    text = reply.get("body", "No text")
    created_at = format_datetime(reply.get("createdAt"))
    
    reactions = reply.get("reactions", {})
    reaction_counts = {
        r.get("type", "unknown"): r.get("count", 0) 
        for r in reactions
    }
    
    reaction_str = ", ".join([f"{count} {reaction_type}" for reaction_type, count in reaction_counts.items()])
    if not reaction_str:
        reaction_str = "No reactions"
    
    return f"  Reply by {author} on {created_at}:\n  {text}\n  Reactions: {reaction_str}"

def extract_article_id_from_url(url):
    """Extract article ID from a URL"""
    # Implementation depends on the URL structure
    # This is a placeholder
    return url.split('/')[-1]

def save_comments_to_file(comments_data, output_file):
    """Save comments data to a file"""
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(comments_data, f, ensure_ascii=False, indent=2)
    logging.info(f"Comments data saved to {output_file}")

def display_comment_summary(comments_data):
    """Display a summary of the fetched comments"""
    if not comments_data:
        print("No comments data available")
        return
    
    comments = comments_data.get("comments", [])
    commenting_enabled = comments_data.get("commentingEnabled", True)
    
    print(f"\nFound {len(comments)} comments. Commenting is {'enabled' if commenting_enabled else 'disabled'}.")
    
    # Display first 5 comments
    for i, comment in enumerate(comments[:5], 1):
        author = comment.get("authorNickname", "Anonymous")
        text = comment.get("body", "No text")
        created_at = format_datetime(comment.get("createdAt"))
        
        # Get reaction counts
        reactions = comment.get("reactions", [])
        reaction_counts = {}
        for reaction in reactions:
            reaction_type = reaction.get("type", "unknown")
            count = reaction.get("count", 0)
            reaction_counts[reaction_type] = count
        
        # Format reactions as a string
        reaction_str = ", ".join([f"{count} {reaction_type}" for reaction_type, count in reaction_counts.items()])
        if not reaction_str:
            reaction_str = "No reactions"
        
        # Count replies
        replies = comment.get("replies", [])
        reply_count = len(replies)
        
        print("-" * 50)
        print(f"Comment #{i}:")
        print(f"Author: {author}")
        print(f"Posted: {created_at}")
        print(f"Text: {text}")
        print(f"Reactions: {reaction_str}")
        print(f"Replies: {reply_count}")
        
        # Display replies if any
        if reply_count > 0:
            print("\nReplies:")
            for reply in replies[:3]:  # Show up to 3 replies per comment
                print(format_reply(reply))
            
            if reply_count > 3:
                print(f"  ... and {reply_count - 3} more replies.")
    
    if len(comments) > 5:
        print("-" * 50)
        print(f"... and {len(comments) - 5} more comments.")
    
    # Calculate total reactions across all comments
    total_reactions = {}
    for comment in comments:
        for reaction in comment.get("reactions", []):
            reaction_type = reaction.get("type", "unknown")
            count = reaction.get("count", 0)
            total_reactions[reaction_type] = total_reactions.get(reaction_type, 0) + count
            
        # Include reactions from replies too
        for reply in comment.get("replies", []):
            for reaction in reply.get("reactions", []):
                reaction_type = reaction.get("type", "unknown")
                count = reaction.get("count", 0)
                total_reactions[reaction_type] = total_reactions.get(reaction_type, 0) + count
    
    if total_reactions:
        print("\nTotal reactions across all comments and replies:")
        for reaction_type, count in total_reactions.items():
            print(f"{reaction_type}: {count}")

def main():
    """Main function to parse arguments and execute the script"""
    parser = argparse.ArgumentParser(description="Fetch comments from 20min.ch articles")
    
    # Define mutually exclusive group for article identification
    article_group = parser.add_mutually_exclusive_group(required=True)
    article_group.add_argument("--url", help="URL of the 20min.ch article")
    article_group.add_argument("--id", help="ID of the 20min.ch article")
    
    # Other arguments
    parser.add_argument("--tenant", type=int, default=6, help="Tenant ID (default: 6)")
    parser.add_argument("--output", default="comments.json", help="Output file path (default: comments.json)")
    
    args = parser.parse_args()
    
    # Get article ID
    article_id = args.id if args.id else extract_article_id_from_url(args.url)
    
    # Fetch comments
    comments_data = fetch_comments(article_id, args.tenant)
    
    if comments_data:
        # Save to file
        save_comments_to_file(comments_data, args.output)
        
        # Display summary
        display_comment_summary(comments_data)
    else:
        logging.error("Failed to fetch comments")
        sys.exit(1)

if __name__ == "__main__":
    main() 