#!/usr/bin/env python3
"""
Script to fetch comments for 20min.ch articles using their API endpoints.

This script extracts comments from 20min.ch articles by directly accessing 
the comment API, bypassing the need to scrape the website content.
"""

import requests
import json
import logging
import argparse
import time
import sys
from pathlib import Path

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler("comment_fetcher.log")
    ]
)

# Comment API endpoints found in the HTML
COMMENTS_API = "https://api.20min.ch/comment/v1/comments"
COMMENT_REACTIONS_API = "https://api.20min.ch/comment/v2/reactions"
USER_REACTIONS_API = "https://api.20min.ch/comment/v1/user-reactions"

# Browser-like headers
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
    "Accept-Language": "de-CH,de;q=0.9,en-US;q=0.8,en;q=0.7",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://www.20min.ch/",
    "DNT": "1",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "same-origin",
    "Sec-Fetch-User": "?1",
    "Cache-Control": "max-age=0"
}

def fetch_comments(article_id, tenant_id=6):
    """
    Fetch comments for a specific article using the 20min.ch API
    
    Args:
        article_id: The ID of the article (can be extracted from the URL)
        tenant_id: The tenant ID (6 for German, 7 for French)
        
    Returns:
        A dictionary containing the comments data
    """
    logging.info(f"Fetching comments for article ID: {article_id}")
    
    try:
        # Set up the request parameters
        params = {
            "tenantId": tenant_id,
            "contentId": article_id,
            "sort": "newest",
            "offset": 0,
            "limit": 100  # Try to get up to 100 comments
        }
        
        # Make the request
        logging.info(f"Requesting comments from: {COMMENTS_API}")
        response = requests.get(
            COMMENTS_API,
            params=params,
            headers=HEADERS,
            timeout=30
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            comments_data = response.json()
            comment_count = len(comments_data.get('comments', []))
            logging.info(f"Successfully fetched {comment_count} comments")
            return comments_data
        else:
            logging.error(f"Failed to fetch comments: HTTP {response.status_code}")
            logging.error(f"Response: {response.text}")
            return {"error": f"HTTP {response.status_code}", "comments": []}
            
    except Exception as e:
        logging.error(f"Exception while fetching comments: {str(e)}")
        return {"error": str(e), "comments": []}

def extract_article_id_from_url(url):
    """
    Extract the article ID from a 20min.ch URL
    
    Args:
        url: The full URL of the article
        
    Returns:
        The article ID as a string, or None if it couldn't be extracted
    """
    # Handle URLs like https://www.20min.ch/story/article-title-123456789
    if "/story/" in url:
        # The ID is the last part of the URL after the last dash
        try:
            return url.strip().split('-')[-1]
        except:
            return None
    return None

def save_comments_to_file(comments_data, output_file):
    """
    Save the comments data to a JSON file
    
    Args:
        comments_data: The comments data dictionary
        output_file: The path to the output file
    """
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(comments_data, f, ensure_ascii=False, indent=2)
        logging.info(f"Comments data saved to {output_file}")
    except Exception as e:
        logging.error(f"Error saving comments to file: {str(e)}")

def display_comment_summary(comments_data):
    """
    Display a summary of the comments
    
    Args:
        comments_data: The comments data dictionary
    """
    comments = comments_data.get('comments', [])
    if not comments:
        print("No comments found or error fetching comments.")
        return
        
    print(f"\nFound {len(comments)} comments:")
    print("-" * 50)
    
    # Display the first 5 comments (or fewer if there are less than 5)
    for i, comment in enumerate(comments[:5]):
        author = comment.get('author', {}).get('nickname', 'Anonymous')
        text = comment.get('text', 'No text')
        created = comment.get('created', 'Unknown date')
        likes = comment.get('reactions', {}).get('likes', 0)
        dislikes = comment.get('reactions', {}).get('dislikes', 0)
        
        print(f"Comment #{i+1}:")
        print(f"Author: {author}")
        print(f"Text: {text}")
        print(f"Created: {created}")
        print(f"Reactions: {likes} likes, {dislikes} dislikes")
        print("-" * 50)
    
    if len(comments) > 5:
        print(f"... and {len(comments) - 5} more comments.")

def main():
    parser = argparse.ArgumentParser(description='Fetch comments from 20min.ch articles')
    parser.add_argument('--url', type=str, help='URL of the article')
    parser.add_argument('--id', type=str, help='Article ID (extracted from URL)')
    parser.add_argument('--tenant', type=int, default=6, help='Tenant ID (6 for German, 7 for French)')
    parser.add_argument('--output', type=str, default='comments.json', help='Output file path')
    
    args = parser.parse_args()
    
    # Make sure we have an article ID
    article_id = args.id
    if not article_id and args.url:
        article_id = extract_article_id_from_url(args.url)
        
    if not article_id:
        logging.error("No article ID provided or could not extract from URL")
        parser.print_help()
        sys.exit(1)
    
    # Fetch the comments
    comments_data = fetch_comments(article_id, args.tenant)
    
    # Save to file
    save_comments_to_file(comments_data, args.output)
    
    # Display summary
    display_comment_summary(comments_data)

if __name__ == "__main__":
    main() 