#!/usr/bin/env python3 """ Generate a simple text-based HTML dashboard of all tracked jobs. """ import re from datetime import datetime from pathlib import Path from db import Database # Regions/locations we care about (case-insensitive matching) DESIRED_REGIONS = [ "canada", "toronto", "vancouver", "germany", "berlin", "munich", "emea", "americas", # includes North/South America "north america", "worldwide", "global", "anywhere", ] # Locations to explicitly exclude (on-site or remote restricted to these) EXCLUDED_LOCATIONS = [ # US cities/states (we don't want US-only jobs) "san francisco", "new york", "nyc", "seattle", "austin", "boston", "chicago", "denver", "los angeles", "atlanta", "dallas", "houston", "california", "washington", "texas", "massachusetts", "colorado", "united states", "usa", "u.s.", "us-", "usa-", # UK "london", "united kingdom", "uk", "dublin", "ireland", # Australia/APAC (not EMEA) "sydney", "melbourne", "australia", "singapore", "tokyo", "japan", "india", "bangalore", "bengaluru", "hyderabad", "delhi", "korea", "seoul", "taiwan", "taipei", "china", "beijing", "shenzhen", # Other excluded "israel", "tel aviv", "brazil", "sao paulo", "mexico", "netherlands", "amsterdam", "france", "paris", "spain", "madrid", "portugal", "lisbon", "poland", "warsaw", "italy", "czech", "prague", "serbia", "belgrade", "cyprus", "limassol", "austria", "vienna", "sweden", "stockholm", "denmark", "copenhagen", "switzerland", "romania", "bucharest", "hungary", "greece", "south africa", "indonesia", "jakarta", "malaysia", ] def is_location_relevant(location: str, remote_type: str) -> bool: """ Strict location filter. Only keeps jobs available in Canada, Germany, EMEA, or Worldwide. Filters out US-only jobs, UK jobs, APAC jobs, etc. """ if not location: return False # No location info = probably US-based, filter out loc_lower = location.lower() # Check if any desired region is mentioned FIRST has_desired = any(region in loc_lower for region in DESIRED_REGIONS) # If it has a desired region, keep it (even if it also mentions excluded locations) # e.g., "Remote (United States | Canada)" should be kept because of Canada if has_desired: return True # If it just says "Remote" with nothing else, keep it (truly remote) if loc_lower.strip() == "remote": return True # Check for excluded locations has_excluded = any(excl in loc_lower for excl in EXCLUDED_LOCATIONS) if has_excluded: return False # Check for patterns like "In-Office", "Hybrid", "On-site" without desired region if any(x in loc_lower for x in ["in-office", "hybrid", "on-site", "onsite", "office based"]): return False # If we can't determine, filter it out (safer) return False def extract_location_tags(location: str, remote_type: str) -> tuple[list[str], str]: """ Extract relevant location tags and a short display location. Returns (list of tag names, short location string) """ if not location: return [], "" loc_lower = location.lower() tags = [] short_loc = "" # Check for remote is_remote = remote_type == "remote" or "remote" in loc_lower if is_remote: tags.append("remote") # Check for Canada if any(x in loc_lower for x in ["canada", "toronto", "vancouver"]): tags.append("canada") short_loc = "Canada" # Check for Germany/Berlin if any(x in loc_lower for x in ["germany", "berlin", "munich"]): tags.append("germany") short_loc = "Germany" if "germany" in loc_lower else "Berlin" # Check for EMEA if "emea" in loc_lower: tags.append("emea") short_loc = "EMEA" # Check for Americas/North America if "americas" in loc_lower or "north america" in loc_lower: tags.append("americas") short_loc = "Americas" # Check for Worldwide if any(x in loc_lower for x in ["worldwide", "global", "anywhere"]): tags.append("worldwide") short_loc = "Worldwide" # If no specific region found but it's remote if not short_loc and is_remote: short_loc = "Remote" return tags, short_loc def generate_dashboard(output_path: str = "data/dashboard.html"): """Generate a static HTML dashboard.""" db = Database() jobs = db.get_all_active_jobs() # Group by company, filtering out irrelevant remote locations companies = {} filtered_count = 0 for company_name, job in jobs: if not is_location_relevant(job.location, job.remote_type): filtered_count += 1 continue if company_name not in companies: companies[company_name] = [] companies[company_name].append(job) total_shown = sum(len(jobs) for jobs in companies.values()) # Sort companies by name sorted_companies = sorted(companies.items()) html = f"""