Refine and add more companies

2026-01-20 18:08:11 +00:00 · 2026-01-20 18:08:11 +00:00 · 185b5ce2f1
commit 185b5ce2f1
parent e8eb9d3fcf
6 changed files with 5539 additions and 27 deletions
--- a/config.yaml
+++ b/config.yaml
@ -67,10 +67,6 @@ companies:
    platform: greenhouse
    board_token: tailscale
  - name: HashiCorp
    platform: greenhouse
    board_token: hashicorp
  # Developer Tools & Platforms
  - name: Automattic
    platform: greenhouse
@ -88,6 +84,71 @@ companies:
    platform: greenhouse
    board_token: cloudflare
  - name: Fastly
    platform: greenhouse
    board_token: fastly
  # Database & Data Infrastructure
  - name: Materialize
    platform: greenhouse
    board_token: materialize
  - name: PingCAP
    platform: greenhouse
    board_token: pingcap
  - name: CockroachLabs
    platform: greenhouse
    board_token: cockroachlabs
  - name: TigerData
    platform: ashby
    ashby_company: tigerdata
  # Observability & Monitoring
  - name: Honeycomb
    platform: greenhouse
    board_token: honeycomb
  - name: Datadog
    platform: greenhouse
    board_token: datadog
  - name: Sentry
    platform: ashby
    ashby_company: sentry
  # Cloud & Developer Platforms
  - name: Render
    platform: ashby
    ashby_company: render
  - name: Railway
    platform: ashby
    ashby_company: Railway
  - name: Stripe
    platform: greenhouse
    board_token: stripe
  - name: JetBrains
    platform: greenhouse
    board_token: jetbrains
  # Rust-heavy / Visualization
  - name: Rerun
    platform: ashby
    ashby_company: rerun
  # Big Tech (Selective)
  - name: Discord
    platform: greenhouse
    board_token: discord
  - name: Dropbox
    platform: greenhouse
    board_token: dropbox
 # Notification settings (optional - configure as needed)
 notifications:
  # Console output is always enabled
--- a/dashboard.py
+++ b/dashboard.py
@ -3,24 +3,149 @@
 Generate a simple text-based HTML dashboard of all tracked jobs.
 """
 import re
 from datetime import datetime
 from pathlib import Path
 from db import Database
 # Regions/locations we care about (case-insensitive matching)
 DESIRED_REGIONS = [
    "canada", "toronto", "vancouver",
    "germany", "berlin", "munich",
    "emea",
    "americas",  # includes North/South America
    "north america",
    "worldwide", "global", "anywhere",
 ]
 # Locations to explicitly exclude (on-site or remote restricted to these)
 EXCLUDED_LOCATIONS = [
    # US cities/states (we don't want US-only jobs)
    "san francisco", "new york", "nyc", "seattle", "austin", "boston",
    "chicago", "denver", "los angeles", "atlanta", "dallas", "houston",
    "california", "washington", "texas", "massachusetts", "colorado",
    "united states", "usa", "u.s.", "us-", "usa-",
    # UK
    "london", "united kingdom", "uk", "dublin", "ireland",
    # Australia/APAC (not EMEA)
    "sydney", "melbourne", "australia", "singapore", "tokyo", "japan",
    "india", "bangalore", "bengaluru", "hyderabad", "delhi",
    "korea", "seoul", "taiwan", "taipei", "china", "beijing", "shenzhen",
    # Other excluded
    "israel", "tel aviv", "brazil", "sao paulo", "mexico",
    "netherlands", "amsterdam", "france", "paris", "spain", "madrid",
    "portugal", "lisbon", "poland", "warsaw", "italy",
    "czech", "prague", "serbia", "belgrade", "cyprus", "limassol",
    "austria", "vienna", "sweden", "stockholm", "denmark", "copenhagen",
    "switzerland", "romania", "bucharest", "hungary", "greece",
    "south africa", "indonesia", "jakarta", "malaysia",
 ]
 def is_location_relevant(location: str, remote_type: str) -> bool:
    """
    Strict location filter. Only keeps jobs available in Canada, Germany, EMEA, or Worldwide.
    Filters out US-only jobs, UK jobs, APAC jobs, etc.
    """
    if not location:
        return False  # No location info = probably US-based, filter out
    loc_lower = location.lower()
    # Check if any desired region is mentioned FIRST
    has_desired = any(region in loc_lower for region in DESIRED_REGIONS)
    # If it has a desired region, keep it (even if it also mentions excluded locations)
    # e.g., "Remote (United States | Canada)" should be kept because of Canada
    if has_desired:
        return True
    # If it just says "Remote" with nothing else, keep it (truly remote)
    if loc_lower.strip() == "remote":
        return True
    # Check for excluded locations
    has_excluded = any(excl in loc_lower for excl in EXCLUDED_LOCATIONS)
    if has_excluded:
        return False
    # Check for patterns like "In-Office", "Hybrid", "On-site" without desired region
    if any(x in loc_lower for x in ["in-office", "hybrid", "on-site", "onsite", "office based"]):
        return False
    # If we can't determine, filter it out (safer)
    return False
 def extract_location_tags(location: str, remote_type: str) -> tuple[list[str], str]:
    """
    Extract relevant location tags and a short display location.
    Returns (list of tag names, short location string)
    """
    if not location:
        return [], ""
    loc_lower = location.lower()
    tags = []
    short_loc = ""
    # Check for remote
    is_remote = remote_type == "remote" or "remote" in loc_lower
    if is_remote:
        tags.append("remote")
    # Check for Canada
    if any(x in loc_lower for x in ["canada", "toronto", "vancouver"]):
        tags.append("canada")
        short_loc = "Canada"
    # Check for Germany/Berlin
    if any(x in loc_lower for x in ["germany", "berlin", "munich"]):
        tags.append("germany")
        short_loc = "Germany" if "germany" in loc_lower else "Berlin"
    # Check for EMEA
    if "emea" in loc_lower:
        tags.append("emea")
        short_loc = "EMEA"
    # Check for Americas/North America
    if "americas" in loc_lower or "north america" in loc_lower:
        tags.append("americas")
        short_loc = "Americas"
    # Check for Worldwide
    if any(x in loc_lower for x in ["worldwide", "global", "anywhere"]):
        tags.append("worldwide")
        short_loc = "Worldwide"
    # If no specific region found but it's remote
    if not short_loc and is_remote:
        short_loc = "Remote"
    return tags, short_loc
 def generate_dashboard(output_path: str = "data/dashboard.html"):
    """Generate a static HTML dashboard."""
    db = Database()
    jobs = db.get_all_active_jobs()
-    # Group by company
+    # Group by company, filtering out irrelevant remote locations
    companies = {}
    filtered_count = 0
    for company_name, job in jobs:
        if not is_location_relevant(job.location, job.remote_type):
            filtered_count += 1
            continue
        if company_name not in companies:
            companies[company_name] = []
        companies[company_name].append(job)
    total_shown = sum(len(jobs) for jobs in companies.values())
    # Sort companies by name
    sorted_companies = sorted(companies.items())
@ -116,14 +241,14 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
            font-size: 12px;
        }}
        .jobs {{
-            margin-left: 20px;
+            margin-left: 0;
        }}
        .job {{
            padding: 6px 0;
            border-bottom: 1px solid var(--border);
-            display: grid;
+            display: flex;
-            grid-template-columns: 1fr 180px;
+            justify-content: space-between;
-            gap: 10px;
+            gap: 20px;
            align-items: baseline;
        }}
        .job:last-child {{
@ -148,6 +273,8 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
            color: var(--muted);
            font-size: 12px;
            text-align: right;
            white-space: nowrap;
            flex-shrink: 0;
        }}
        .tag {{
            display: inline-block;
@ -168,6 +295,18 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
            background: #4a4a1a;
            color: #facc15;
        }}
        .tag-emea {{
            background: #1a3a4a;
            color: #60a5fa;
        }}
        .tag-americas {{
            background: #3a1a4a;
            color: #c084fc;
        }}
        .tag-worldwide {{
            background: #1a4a3a;
            color: #34d399;
        }}
        .hidden {{
            display: none;
        }}
@ -228,7 +367,7 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
        <h1>$ job-board</h1>
        <div class="meta">
            Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
-            {len(jobs)} jobs across {len(companies)} companies
+            {total_shown} jobs across {len(companies)} companies
        </div>
    </header>
@ -244,11 +383,14 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
            <button class="filter-btn" data-filter="design">Design</button>
            <button class="filter-btn" data-filter="security">Security</button>
            <button class="filter-btn" data-filter="remote">Remote</button>
-            <button class="filter-btn" data-filter="canada toronto vancouver">Canada</button>
+            <button class="filter-btn" data-filter="canada">Canada</button>
-            <button class="filter-btn" data-filter="berlin germany">Berlin</button>
+            <button class="filter-btn" data-filter="germany">Germany</button>
            <button class="filter-btn" data-filter="emea">EMEA</button>
            <button class="filter-btn" data-filter="americas">Americas</button>
            <button class="filter-btn" data-filter="worldwide">Worldwide</button>
        </div>
        <div class="stats">
-            <span id="visible-count">{len(jobs)} jobs shown</span>
+            <span id="visible-count">{total_shown} jobs shown</span>
        </div>
    </div>
@ -283,18 +425,27 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
            location = job.location or ""
            location_lower = location.lower()
-            # Tags
+            # Extract tags and short location
-            tags = ""
+            tag_list, short_loc = extract_location_tags(location, job.remote_type)
            if job.remote_type == "remote" or "remote" in location_lower:
                tags += '<span class="tag tag-remote">remote</span>'
            if "canada" in location_lower or "toronto" in location_lower or "vancouver" in location_lower:
                tags += '<span class="tag tag-canada">canada</span>'
            if "berlin" in location_lower or "germany" in location_lower:
                tags += '<span class="tag tag-berlin">berlin</span>'
-            html += f"""                <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()}">
+            # Build tag HTML
            tags = ""
            if "remote" in tag_list:
                tags += '<span class="tag tag-remote">remote</span>'
            if "canada" in tag_list:
                tags += '<span class="tag tag-canada">canada</span>'
            if "germany" in tag_list:
                tags += '<span class="tag tag-berlin">germany</span>'
            if "emea" in tag_list:
                tags += '<span class="tag tag-emea">emea</span>'
            if "americas" in tag_list:
                tags += '<span class="tag tag-americas">americas</span>'
            if "worldwide" in tag_list:
                tags += '<span class="tag tag-worldwide">worldwide</span>'
            html += f"""                <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()} {' '.join(tag_list)}">
                    <span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tags}</span>
-                    <span class="job-location">{location}</span>
+                    <span class="job-location">{short_loc}</span>
                </div>
 """
        html += """            </div>
--- a/data/dashboard.html
+++ b/data/dashboard.html
--- a/data/jobs.db-journal
+++ b/data/jobs.db-journal
--- a/docker-compose.dev.yaml
+++ b/docker-compose.dev.yaml
@ -0,0 +1,35 @@
 services:
  # Run scraper once (for manual/cron triggering)
  scraper:
    build: .
    container_name: job-scraper
    volumes:
      - ./data:/app/data
      - ./config.yaml:/app/config.yaml:ro
    environment:
      - TZ=America/Toronto
  # Scheduled scraper - runs daily at 9 AM
  scraper-scheduled:
    build: .
    container_name: job-scraper-scheduled
    volumes:
      - ./data:/app/data
      - ./config.yaml:/app/config.yaml:ro
    environment:
      - TZ=America/Toronto
    command: ["python", "main.py", "--schedule"]
    restart: unless-stopped
  # Web dashboard - lightweight static file server
  dashboard:
    image: nginx:alpine
    container_name: job-dashboard
    ports:
      - "8080:80"
    volumes:
      - ./data:/usr/share/nginx/html:ro
      - ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
    restart: unless-stopped
    depends_on:
      - scraper
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -1,5 +1,5 @@
 services:
-  # Run scraper once (for manual/cron triggering)
+  # Run scraper once (for manual triggering)
  scraper:
    build: .
    container_name: job-scraper
@ -20,16 +20,24 @@ services:
      - TZ=America/Toronto
    command: ["python", "main.py", "--schedule"]
    restart: unless-stopped
    logging:
      driver: json-file
      options:
        max-size: "10m"
        max-file: "3"
  # Web dashboard - lightweight static file server
  dashboard:
    image: nginx:alpine
    container_name: job-dashboard
    ports:
-      - "8080:80"
+      - "127.0.0.1:8085:80"
    volumes:
      - ./data:/usr/share/nginx/html:ro
      - ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
    restart: unless-stopped
-    depends_on:
+    logging:
-      - scraper
+      driver: json-file
      options:
        max-size: "10m"
        max-file: "3"