Refine and add more companies

This commit is contained in:
Bastian Gruber 2026-01-20 18:08:11 +00:00
parent e8eb9d3fcf
commit 185b5ce2f1
Signed by: gruberb
GPG key ID: 426AF1CBA0530691
6 changed files with 5539 additions and 27 deletions

View file

@ -67,10 +67,6 @@ companies:
platform: greenhouse platform: greenhouse
board_token: tailscale board_token: tailscale
- name: HashiCorp
platform: greenhouse
board_token: hashicorp
# Developer Tools & Platforms # Developer Tools & Platforms
- name: Automattic - name: Automattic
platform: greenhouse platform: greenhouse
@ -88,6 +84,71 @@ companies:
platform: greenhouse platform: greenhouse
board_token: cloudflare board_token: cloudflare
- name: Fastly
platform: greenhouse
board_token: fastly
# Database & Data Infrastructure
- name: Materialize
platform: greenhouse
board_token: materialize
- name: PingCAP
platform: greenhouse
board_token: pingcap
- name: CockroachLabs
platform: greenhouse
board_token: cockroachlabs
- name: TigerData
platform: ashby
ashby_company: tigerdata
# Observability & Monitoring
- name: Honeycomb
platform: greenhouse
board_token: honeycomb
- name: Datadog
platform: greenhouse
board_token: datadog
- name: Sentry
platform: ashby
ashby_company: sentry
# Cloud & Developer Platforms
- name: Render
platform: ashby
ashby_company: render
- name: Railway
platform: ashby
ashby_company: Railway
- name: Stripe
platform: greenhouse
board_token: stripe
- name: JetBrains
platform: greenhouse
board_token: jetbrains
# Rust-heavy / Visualization
- name: Rerun
platform: ashby
ashby_company: rerun
# Big Tech (Selective)
- name: Discord
platform: greenhouse
board_token: discord
- name: Dropbox
platform: greenhouse
board_token: dropbox
# Notification settings (optional - configure as needed) # Notification settings (optional - configure as needed)
notifications: notifications:
# Console output is always enabled # Console output is always enabled

View file

@ -3,24 +3,149 @@
Generate a simple text-based HTML dashboard of all tracked jobs. Generate a simple text-based HTML dashboard of all tracked jobs.
""" """
import re
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from db import Database from db import Database
# Regions/locations we care about (case-insensitive matching)
DESIRED_REGIONS = [
"canada", "toronto", "vancouver",
"germany", "berlin", "munich",
"emea",
"americas", # includes North/South America
"north america",
"worldwide", "global", "anywhere",
]
# Locations to explicitly exclude (on-site or remote restricted to these)
EXCLUDED_LOCATIONS = [
# US cities/states (we don't want US-only jobs)
"san francisco", "new york", "nyc", "seattle", "austin", "boston",
"chicago", "denver", "los angeles", "atlanta", "dallas", "houston",
"california", "washington", "texas", "massachusetts", "colorado",
"united states", "usa", "u.s.", "us-", "usa-",
# UK
"london", "united kingdom", "uk", "dublin", "ireland",
# Australia/APAC (not EMEA)
"sydney", "melbourne", "australia", "singapore", "tokyo", "japan",
"india", "bangalore", "bengaluru", "hyderabad", "delhi",
"korea", "seoul", "taiwan", "taipei", "china", "beijing", "shenzhen",
# Other excluded
"israel", "tel aviv", "brazil", "sao paulo", "mexico",
"netherlands", "amsterdam", "france", "paris", "spain", "madrid",
"portugal", "lisbon", "poland", "warsaw", "italy",
"czech", "prague", "serbia", "belgrade", "cyprus", "limassol",
"austria", "vienna", "sweden", "stockholm", "denmark", "copenhagen",
"switzerland", "romania", "bucharest", "hungary", "greece",
"south africa", "indonesia", "jakarta", "malaysia",
]
def is_location_relevant(location: str, remote_type: str) -> bool:
"""
Strict location filter. Only keeps jobs available in Canada, Germany, EMEA, or Worldwide.
Filters out US-only jobs, UK jobs, APAC jobs, etc.
"""
if not location:
return False # No location info = probably US-based, filter out
loc_lower = location.lower()
# Check if any desired region is mentioned FIRST
has_desired = any(region in loc_lower for region in DESIRED_REGIONS)
# If it has a desired region, keep it (even if it also mentions excluded locations)
# e.g., "Remote (United States | Canada)" should be kept because of Canada
if has_desired:
return True
# If it just says "Remote" with nothing else, keep it (truly remote)
if loc_lower.strip() == "remote":
return True
# Check for excluded locations
has_excluded = any(excl in loc_lower for excl in EXCLUDED_LOCATIONS)
if has_excluded:
return False
# Check for patterns like "In-Office", "Hybrid", "On-site" without desired region
if any(x in loc_lower for x in ["in-office", "hybrid", "on-site", "onsite", "office based"]):
return False
# If we can't determine, filter it out (safer)
return False
def extract_location_tags(location: str, remote_type: str) -> tuple[list[str], str]:
"""
Extract relevant location tags and a short display location.
Returns (list of tag names, short location string)
"""
if not location:
return [], ""
loc_lower = location.lower()
tags = []
short_loc = ""
# Check for remote
is_remote = remote_type == "remote" or "remote" in loc_lower
if is_remote:
tags.append("remote")
# Check for Canada
if any(x in loc_lower for x in ["canada", "toronto", "vancouver"]):
tags.append("canada")
short_loc = "Canada"
# Check for Germany/Berlin
if any(x in loc_lower for x in ["germany", "berlin", "munich"]):
tags.append("germany")
short_loc = "Germany" if "germany" in loc_lower else "Berlin"
# Check for EMEA
if "emea" in loc_lower:
tags.append("emea")
short_loc = "EMEA"
# Check for Americas/North America
if "americas" in loc_lower or "north america" in loc_lower:
tags.append("americas")
short_loc = "Americas"
# Check for Worldwide
if any(x in loc_lower for x in ["worldwide", "global", "anywhere"]):
tags.append("worldwide")
short_loc = "Worldwide"
# If no specific region found but it's remote
if not short_loc and is_remote:
short_loc = "Remote"
return tags, short_loc
def generate_dashboard(output_path: str = "data/dashboard.html"): def generate_dashboard(output_path: str = "data/dashboard.html"):
"""Generate a static HTML dashboard.""" """Generate a static HTML dashboard."""
db = Database() db = Database()
jobs = db.get_all_active_jobs() jobs = db.get_all_active_jobs()
# Group by company # Group by company, filtering out irrelevant remote locations
companies = {} companies = {}
filtered_count = 0
for company_name, job in jobs: for company_name, job in jobs:
if not is_location_relevant(job.location, job.remote_type):
filtered_count += 1
continue
if company_name not in companies: if company_name not in companies:
companies[company_name] = [] companies[company_name] = []
companies[company_name].append(job) companies[company_name].append(job)
total_shown = sum(len(jobs) for jobs in companies.values())
# Sort companies by name # Sort companies by name
sorted_companies = sorted(companies.items()) sorted_companies = sorted(companies.items())
@ -116,14 +241,14 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
font-size: 12px; font-size: 12px;
}} }}
.jobs {{ .jobs {{
margin-left: 20px; margin-left: 0;
}} }}
.job {{ .job {{
padding: 6px 0; padding: 6px 0;
border-bottom: 1px solid var(--border); border-bottom: 1px solid var(--border);
display: grid; display: flex;
grid-template-columns: 1fr 180px; justify-content: space-between;
gap: 10px; gap: 20px;
align-items: baseline; align-items: baseline;
}} }}
.job:last-child {{ .job:last-child {{
@ -148,6 +273,8 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
color: var(--muted); color: var(--muted);
font-size: 12px; font-size: 12px;
text-align: right; text-align: right;
white-space: nowrap;
flex-shrink: 0;
}} }}
.tag {{ .tag {{
display: inline-block; display: inline-block;
@ -168,6 +295,18 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
background: #4a4a1a; background: #4a4a1a;
color: #facc15; color: #facc15;
}} }}
.tag-emea {{
background: #1a3a4a;
color: #60a5fa;
}}
.tag-americas {{
background: #3a1a4a;
color: #c084fc;
}}
.tag-worldwide {{
background: #1a4a3a;
color: #34d399;
}}
.hidden {{ .hidden {{
display: none; display: none;
}} }}
@ -228,7 +367,7 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
<h1>$ job-board</h1> <h1>$ job-board</h1>
<div class="meta"> <div class="meta">
Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
{len(jobs)} jobs across {len(companies)} companies {total_shown} jobs across {len(companies)} companies
</div> </div>
</header> </header>
@ -244,11 +383,14 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
<button class="filter-btn" data-filter="design">Design</button> <button class="filter-btn" data-filter="design">Design</button>
<button class="filter-btn" data-filter="security">Security</button> <button class="filter-btn" data-filter="security">Security</button>
<button class="filter-btn" data-filter="remote">Remote</button> <button class="filter-btn" data-filter="remote">Remote</button>
<button class="filter-btn" data-filter="canada toronto vancouver">Canada</button> <button class="filter-btn" data-filter="canada">Canada</button>
<button class="filter-btn" data-filter="berlin germany">Berlin</button> <button class="filter-btn" data-filter="germany">Germany</button>
<button class="filter-btn" data-filter="emea">EMEA</button>
<button class="filter-btn" data-filter="americas">Americas</button>
<button class="filter-btn" data-filter="worldwide">Worldwide</button>
</div> </div>
<div class="stats"> <div class="stats">
<span id="visible-count">{len(jobs)} jobs shown</span> <span id="visible-count">{total_shown} jobs shown</span>
</div> </div>
</div> </div>
@ -283,18 +425,27 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
location = job.location or "" location = job.location or ""
location_lower = location.lower() location_lower = location.lower()
# Tags # Extract tags and short location
tags = "" tag_list, short_loc = extract_location_tags(location, job.remote_type)
if job.remote_type == "remote" or "remote" in location_lower:
tags += '<span class="tag tag-remote">remote</span>'
if "canada" in location_lower or "toronto" in location_lower or "vancouver" in location_lower:
tags += '<span class="tag tag-canada">canada</span>'
if "berlin" in location_lower or "germany" in location_lower:
tags += '<span class="tag tag-berlin">berlin</span>'
html += f""" <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()}"> # Build tag HTML
tags = ""
if "remote" in tag_list:
tags += '<span class="tag tag-remote">remote</span>'
if "canada" in tag_list:
tags += '<span class="tag tag-canada">canada</span>'
if "germany" in tag_list:
tags += '<span class="tag tag-berlin">germany</span>'
if "emea" in tag_list:
tags += '<span class="tag tag-emea">emea</span>'
if "americas" in tag_list:
tags += '<span class="tag tag-americas">americas</span>'
if "worldwide" in tag_list:
tags += '<span class="tag tag-worldwide">worldwide</span>'
html += f""" <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()} {' '.join(tag_list)}">
<span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tags}</span> <span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tags}</span>
<span class="job-location">{location}</span> <span class="job-location">{short_loc}</span>
</div> </div>
""" """
html += """ </div> html += """ </div>

5257
data/dashboard.html Normal file

File diff suppressed because it is too large Load diff

BIN
data/jobs.db-journal Normal file

Binary file not shown.

35
docker-compose.dev.yaml Normal file
View file

@ -0,0 +1,35 @@
services:
# Run scraper once (for manual/cron triggering)
scraper:
build: .
container_name: job-scraper
volumes:
- ./data:/app/data
- ./config.yaml:/app/config.yaml:ro
environment:
- TZ=America/Toronto
# Scheduled scraper - runs daily at 9 AM
scraper-scheduled:
build: .
container_name: job-scraper-scheduled
volumes:
- ./data:/app/data
- ./config.yaml:/app/config.yaml:ro
environment:
- TZ=America/Toronto
command: ["python", "main.py", "--schedule"]
restart: unless-stopped
# Web dashboard - lightweight static file server
dashboard:
image: nginx:alpine
container_name: job-dashboard
ports:
- "8080:80"
volumes:
- ./data:/usr/share/nginx/html:ro
- ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
restart: unless-stopped
depends_on:
- scraper

View file

@ -1,5 +1,5 @@
services: services:
# Run scraper once (for manual/cron triggering) # Run scraper once (for manual triggering)
scraper: scraper:
build: . build: .
container_name: job-scraper container_name: job-scraper
@ -20,16 +20,24 @@ services:
- TZ=America/Toronto - TZ=America/Toronto
command: ["python", "main.py", "--schedule"] command: ["python", "main.py", "--schedule"]
restart: unless-stopped restart: unless-stopped
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
# Web dashboard - lightweight static file server # Web dashboard - lightweight static file server
dashboard: dashboard:
image: nginx:alpine image: nginx:alpine
container_name: job-dashboard container_name: job-dashboard
ports: ports:
- "8080:80" - "127.0.0.1:8085:80"
volumes: volumes:
- ./data:/usr/share/nginx/html:ro - ./data:/usr/share/nginx/html:ro
- ./nginx.conf:/etc/nginx/conf.d/default.conf:ro - ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
restart: unless-stopped restart: unless-stopped
depends_on: logging:
- scraper driver: json-file
options:
max-size: "10m"
max-file: "3"