#!/usr/bin/env python3
"""
Script to fetch comments from 20min.ch articles using their API endpoints.
Bypasses the need for web scraping by directly accessing the comment API.
"""

import requests
import json
import logging
import argparse
import time
import sys
import datetime
import re
from pathlib import Path

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('comment_fetcher.log')
    ]
)

# API endpoints
COMMENTS_API = "https://api.20min.ch/comment/v1/comments"
COMMENT_REACTIONS_API = "https://api.20min.ch/comment/v2/reactions"
USER_REACTIONS_API = "https://api.20min.ch/comment/v1/user-reactions"

# Headers to mimic a browser request
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept": "application/json",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.20min.ch/",
    "Origin": "https://www.20min.ch"
}

def fetch_comments(article_id, tenant_id=6):
    """Fetch comments for a specific article"""
    logging.info(f"Fetching comments for article ID: {article_id}")
    
    params = {
        "contentId": article_id,
        "tenantId": tenant_id,
        "offset": 0,
        "limit": 100  # API requires limit to be 100 or less
    }
    
    logging.info(f"Requesting comments from: {COMMENTS_API}")
    response = requests.get(COMMENTS_API, params=params, headers=HEADERS)
    
    if response.status_code == 200:
        comments_data = response.json()
        logging.info(f"Successfully fetched {len(comments_data.get('comments', []))} comments")
        return comments_data
    else:
        logging.error(f"Failed to fetch comments: {response.status_code} - {response.text}")
        return None

def format_datetime(dt_str):
    """Format datetime string to a more readable format"""
    if not dt_str:
        return "Unknown date"
    
    try:
        # Parse ISO format datetime
        dt = datetime.datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
        # Format to a more readable string
        return dt.strftime('%Y-%m-%d %H:%M:%S')
    except Exception as e:
        logging.error(f"Error formatting datetime: {e}")
        return dt_str

def format_reply(reply):
    """Format a reply for display"""
    author = reply.get("authorNickname", "Anonymous")
    text = reply.get("body", "No text")
    created_at = format_datetime(reply.get("createdAt"))
    
    reactions = reply.get("reactions", {})
    reaction_str = ", ".join([f"{count} {reaction_type}" for reaction_type, count in reactions.items()])
    if not reaction_str:
        reaction_str = "No reactions"
    
    return f"  Reply by {author} on {created_at}:\n  {text}\n  Reactions: {reaction_str}"

def extract_article_id_from_url(url):
    """Extract article ID from a URL"""
    # Implementation depends on the URL structure
    # This is a placeholder
    return url.split('/')[-1]

def save_comments_to_file(comments_data, output_file):
    """Save comments data to a file"""
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(comments_data, f, ensure_ascii=False, indent=2)
    logging.info(f"Comments data saved to {output_file}")

def search_comments(comments_data, search_terms, case_sensitive=False):
    """
    Search for comments containing specific keywords
    
    Args:
        comments_data: The comments data dict
        search_terms: List of terms to search for
        case_sensitive: Whether to perform case-sensitive search
    
    Returns:
        List of comments (and their replies) that match the search terms
    """
    if not comments_data or not search_terms:
        return []
    
    matched_comments = []
    comments = comments_data.get("comments", [])
    
    # Prepare search terms
    if not case_sensitive:
        search_terms = [term.lower() for term in search_terms]
    
    # Helper function to check if text contains any search term
    def contains_search_terms(text):
        if not text:
            return False
        
        if not case_sensitive:
            text = text.lower()
        
        for term in search_terms:
            if term in text:
                return True
        return False
    
    # Search in all comments and their replies
    for comment in comments:
        comment_text = comment.get("body", "")
        comment_author = comment.get("authorNickname", "")
        
        # Check if comment matches
        if contains_search_terms(comment_text) or contains_search_terms(comment_author):
            matched_comments.append(comment)
            continue
        
        # Check replies for matches
        matching_replies = False
        for reply in comment.get("replies", []):
            reply_text = reply.get("body", "")
            reply_author = reply.get("authorNickname", "")
            
            if contains_search_terms(reply_text) or contains_search_terms(reply_author):
                matching_replies = True
                break
        
        if matching_replies:
            matched_comments.append(comment)
    
    return matched_comments

def display_comment_summary(comments_data, search_terms=None, case_sensitive=False):
    """Display a summary of the fetched comments"""
    if not comments_data:
        print("No comments data available")
        return
    
    comments = comments_data.get("comments", [])
    commenting_enabled = comments_data.get("commentingEnabled", True)
    
    # Filter comments if search terms provided
    if search_terms and len(search_terms) > 0:
        filtered_comments = search_comments(comments_data, search_terms, case_sensitive)
        if filtered_comments:
            print(f"\nFound {len(filtered_comments)} comments matching search terms: {', '.join(search_terms)}")
            comments = filtered_comments
        else:
            print(f"\nNo comments found matching search terms: {', '.join(search_terms)}")
            return
    else:
        print(f"\nFound {len(comments)} comments. Commenting is {'enabled' if commenting_enabled else 'disabled'}.")
    
    # Display comments
    for i, comment in enumerate(comments[:5], 1):
        author = comment.get("authorNickname", "Anonymous")
        text = comment.get("body", "No text")
        created_at = format_datetime(comment.get("createdAt"))
        
        # Get reaction counts
        reactions = comment.get("reactions", {})
        reaction_str = ", ".join([f"{count} {reaction_type}" for reaction_type, count in reactions.items()])
        if not reaction_str:
            reaction_str = "No reactions"
        
        # Count replies
        replies = comment.get("replies", [])
        reply_count = len(replies)
        
        print("-" * 50)
        print(f"Comment #{i}:")
        print(f"Author: {author}")
        print(f"Posted: {created_at}")
        print(f"Text: {text}")
        print(f"Reactions: {reaction_str}")
        print(f"Replies: {reply_count}")
        
        # Display replies if any
        if reply_count > 0:
            print("\nReplies:")
            for reply in replies[:3]:  # Show up to 3 replies per comment
                print(format_reply(reply))
            
            if reply_count > 3:
                print(f"  ... and {reply_count - 3} more replies.")
    
    if len(comments) > 5:
        print("-" * 50)
        print(f"... and {len(comments) - 5} more comments.")
    
    # Calculate total reactions across all comments
    total_reactions = {}
    for comment in comments:
        reactions = comment.get("reactions", {})
        for reaction_type, count in reactions.items():
            total_reactions[reaction_type] = total_reactions.get(reaction_type, 0) + count
            
        # Include reactions from replies too
        for reply in comment.get("replies", []):
            reply_reactions = reply.get("reactions", {})
            for reaction_type, count in reply_reactions.items():
                total_reactions[reaction_type] = total_reactions.get(reaction_type, 0) + count
    
    if total_reactions:
        print("\nTotal reactions across all comments and replies:")
        for reaction_type, count in total_reactions.items():
            print(f"{reaction_type}: {count}")

def main():
    """Main function to parse arguments and execute the script"""
    parser = argparse.ArgumentParser(
        description="""
        20min.ch Comment Fetcher
        -----------------------
        Fetch comments from 20min.ch articles using their API endpoints.
        This script bypasses the need for web scraping by directly accessing the comment API.
        
        Examples:
        - Fetch comments by article ID:
          python comment_fetcher.py --id 103339848 --output comments.json
          
        - Search for specific keywords in comments:
          python comment_fetcher.py --id 103339848 --search EU SVP
          
        - Use case-sensitive search:
          python comment_fetcher.py --id 103339848 --search EU --case-sensitive
        """,
        formatter_class=argparse.RawDescriptionHelpFormatter
    )
    
    # Define mutually exclusive group for article identification
    article_group = parser.add_mutually_exclusive_group(required=True)
    article_group.add_argument("--url", help="URL of the 20min.ch article")
    article_group.add_argument("--id", help="ID of the 20min.ch article")
    
    # Other arguments
    parser.add_argument("--tenant", type=int, default=6, 
                       help="Tenant ID (default: 6 for German, 7 for French)")
    parser.add_argument("--output", default="comments.json", 
                       help="Output file path (default: comments.json)")
    parser.add_argument("--search", nargs="+", 
                       help="Search for comments containing specific terms")
    parser.add_argument("--case-sensitive", action="store_true", 
                       help="Make search case-sensitive")
    
    args = parser.parse_args()
    
    # Get article ID
    article_id = args.id if args.id else extract_article_id_from_url(args.url)
    
    # Fetch comments
    comments_data = fetch_comments(article_id, args.tenant)
    
    if comments_data:
        # Save to file
        save_comments_to_file(comments_data, args.output)
        
        # Display summary
        display_comment_summary(comments_data, args.search, args.case_sensitive)
    else:
        logging.error("Failed to fetch comments")
        sys.exit(1)

if __name__ == "__main__":
    main() 