#!/usr/bin/env python3
"""
Job Scraper - Monitor job openings from companies you're interested in.

Usage:
    python main.py              # Run once
    python main.py --schedule   # Run daily at configured time
    python main.py --list       # List all tracked jobs
"""

import argparse
import sys
import time
from datetime import datetime
from pathlib import Path

import yaml

from db import Database
from notify import ChangeReport, Notifier
from scrapers import AshbyScraper, GreenhouseScraper, LeverScraper
from scrapers.base import BaseScraper, Job
from dashboard import generate_dashboard


def load_config(config_path: str = "config.yaml") -> dict:
    """Load configuration from YAML file."""
    with open(config_path) as f:
        return yaml.safe_load(f)


def get_scraper(company_config: dict) -> BaseScraper:
    """Create a scraper instance based on company configuration."""
    platform = company_config["platform"]
    name = company_config["name"]

    if platform == "greenhouse":
        return GreenhouseScraper(name, company_config["board_token"])
    elif platform == "lever":
        return LeverScraper(name, company_config["lever_company"])
    elif platform == "ashby":
        return AshbyScraper(name, company_config["ashby_company"])
    else:
        raise ValueError(f"Unknown platform: {platform}")


def filter_jobs_by_title(jobs: list[Job], title_filters: list[str]) -> list[Job]:
    """Filter jobs to only include those matching title keywords."""
    if not title_filters:
        return jobs

    filtered = []
    for job in jobs:
        title_lower = job.title.lower()
        if any(keyword.lower() in title_lower for keyword in title_filters):
            filtered.append(job)
    return filtered


def scrape_company(company_config: dict, db: Database, config: dict) -> ChangeReport:
    """Scrape jobs for a single company and detect changes."""
    name = company_config["name"]
    print(f"\n🔍 Scraping {name}...", end=" ", flush=True)

    try:
        with get_scraper(company_config) as scraper:
            # Get current jobs from the career page
            all_jobs = scraper.scrape()

        # Filter by title keywords if configured
        title_filters = config.get("title_filters", [])
        current_jobs = filter_jobs_by_title(all_jobs, title_filters)

        print(f"found {len(current_jobs)} jobs (of {len(all_jobs)} total)")

        # Get or create company in database
        company_id = db.get_or_create_company(
            name,
            jobs_url=company_config.get("board_token", company_config.get("lever_company", "")),
            platform_type=company_config["platform"]
        )

        # Get stored jobs
        stored_jobs = db.get_active_jobs(company_id)

        # Detect changes
        current_ids = {job.external_id for job in current_jobs}
        stored_ids = set(stored_jobs.keys())

        new_ids = current_ids - stored_ids
        removed_ids = stored_ids - current_ids

        # Process new jobs
        new_jobs = []
        for job in current_jobs:
            is_new, _ = db.upsert_job(company_id, job)
            if is_new:
                new_jobs.append(job)

        # Mark removed jobs
        removed_jobs = db.mark_jobs_removed(company_id, removed_ids)

        # Update last scraped time
        db.update_company_scraped(company_id)

        # Apply location filters to highlight relevant jobs
        location_filters = config.get("location_filters", [])
        if location_filters and new_jobs:
            relevant_new = []
            for job in new_jobs:
                if job.location:
                    loc_lower = job.location.lower()
                    if any(f.lower() in loc_lower for f in location_filters):
                        relevant_new.append(job)
                elif job.remote_type == "remote":
                    relevant_new.append(job)

            if relevant_new:
                print(f"   ⭐ {len(relevant_new)} jobs match your location filters!")

        return ChangeReport(
            company_name=name,
            new_jobs=new_jobs,
            removed_jobs=removed_jobs,
            total_active=len(current_jobs)
        )

    except Exception as e:
        print(f"ERROR: {e}")
        return ChangeReport(
            company_name=name,
            new_jobs=[],
            removed_jobs=[],
            total_active=0
        )


def run_scraper(config: dict):
    """Run the scraper for all configured companies."""
    print(f"\n{'=' * 60}")
    print(f"Job Scraper - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"{'=' * 60}")

    db = Database()
    notifier = Notifier(config.get("notifications", {}))

    companies = config.get("companies", [])
    print(f"\nMonitoring {len(companies)} companies...")

    reports = []
    delay = config.get("scraper", {}).get("request_delay", 2)

    for i, company_config in enumerate(companies):
        report = scrape_company(company_config, db, config)
        reports.append(report)

        # Delay between companies (be respectful!)
        if i < len(companies) - 1:
            time.sleep(delay)

    # Send notifications
    notifier.notify(reports)

    # Summary
    total_jobs = sum(r.total_active for r in reports)
    total_new = sum(len(r.new_jobs) for r in reports)
    total_removed = sum(len(r.removed_jobs) for r in reports)

    print(f"\n📊 Total: {total_jobs} active jobs across {len(companies)} companies")
    print(f"   Changes: +{total_new} new, -{total_removed} removed")

    # Generate dashboard
    generate_dashboard()


def list_jobs(config: dict):
    """List all tracked jobs."""
    db = Database()
    jobs = db.get_all_active_jobs()

    if not jobs:
        print("No jobs tracked yet. Run the scraper first.")
        return

    print(f"\n{'=' * 60}")
    print(f"All Tracked Jobs ({len(jobs)} total)")
    print(f"{'=' * 60}")

    current_company = None
    for company_name, job in jobs:
        if company_name != current_company:
            print(f"\n📌 {company_name}")
            print("-" * 40)
            current_company = company_name

        location = f" [{job.location}]" if job.location else ""
        remote = " 🏠" if job.remote_type == "remote" else ""
        print(f"  • {job.title}{location}{remote}")
        print(f"    {job.url}")


def run_scheduled(config: dict):
    """Run the scraper on a schedule."""
    import schedule

    print("Starting scheduled job scraper...")
    print("Will run daily at 09:00")
    print("Press Ctrl+C to stop\n")

    # Run immediately on start
    run_scraper(config)

    # Schedule daily run
    schedule.every().day.at("09:00").do(run_scraper, config)

    while True:
        schedule.run_pending()
        time.sleep(60)


def main():
    parser = argparse.ArgumentParser(description="Job Scraper - Monitor job openings")
    parser.add_argument("--config", default="config.yaml", help="Path to config file")
    parser.add_argument("--schedule", action="store_true", help="Run on a schedule")
    parser.add_argument("--list", action="store_true", help="List all tracked jobs")

    args = parser.parse_args()

    # Load config
    config_path = Path(args.config)
    if not config_path.exists():
        print(f"Error: Config file not found: {config_path}")
        sys.exit(1)

    config = load_config(args.config)

    if args.list:
        list_jobs(config)
    elif args.schedule:
        run_scheduled(config)
    else:
        run_scraper(config)


if __name__ == "__main__":
    main()