#!/usr/bin/env python3
"""Scrape all tweets from @hrtfrg via Nitter instances and save as JSON."""

import json
import sys
from ntscraper import Nitter

INSTANCES = ["xcancel.com", "nitter.poast.org", "nitter.privacydev.net"]
USERNAME = "hrtfrg"
OUTPUT = "/home/josie/notes/hrtfrg_tweets.json"


def try_scrape(instance):
    print(f"Trying instance: {instance}")
    scraper = Nitter(skip_instance_check=True)
    try:
        result = scraper.get_tweets(
            USERNAME, mode="user", number=-1, instance=instance
        )
    except Exception as e:
        print(f"  Failed: {e}")
        return None

    if not result or "tweets" not in result or not result["tweets"]:
        print(f"  No tweets returned")
        return None

    print(f"  Got {len(result['tweets'])} tweets")
    return result["tweets"]


def format_tweet(t):
    link = t.get("link", "")
    tweet_id = link.rstrip("/").split("/")[-1] if link else ""

    entry = {
        "id": tweet_id,
        "url": f"https://x.com/{USERNAME}/status/{tweet_id}" if tweet_id else link,
        "text": t.get("text", ""),
        "date": t.get("date", ""),
        "likes": t.get("stats", {}).get("likes", 0),
        "retweets": t.get("stats", {}).get("retweets", 0),
        "replies": t.get("stats", {}).get("comments", 0),
        "media": [],
    }

    if t.get("pictures"):
        entry["media"].extend(
            {"type": "image", "url": url} for url in t["pictures"]
        )
    if t.get("videos"):
        entry["media"].extend(
            {"type": "video", "url": url} for url in t["videos"]
        )
    if t.get("gifs"):
        entry["media"].extend(
            {"type": "gif", "url": url} for url in t["gifs"]
        )

    return entry


def main():
    tweets = None
    for inst in INSTANCES:
        tweets = try_scrape(inst)
        if tweets:
            break

    if not tweets:
        print("Trying with automatic instance selection...")
        scraper = Nitter(skip_instance_check=False)
        try:
            result = scraper.get_tweets(USERNAME, mode="user", number=-1)
            if result and "tweets" in result and result["tweets"]:
                tweets = result["tweets"]
                print(f"Got {len(tweets)} tweets via auto-selected instance")
        except Exception as e:
            print(f"Auto-select also failed: {e}")

    if not tweets:
        print("All attempts failed. No tweets scraped.", file=sys.stderr)
        sys.exit(1)

    formatted = [format_tweet(t) for t in tweets]
    formatted.sort(key=lambda t: t["date"], reverse=True)

    with open(OUTPUT, "w") as f:
        json.dump(formatted, f, indent=2, ensure_ascii=False)

    print(f"Saved {len(formatted)} tweets to {OUTPUT}")


if __name__ == "__main__":
    main()
