Refine and add more companies
This commit is contained in:
parent
e8eb9d3fcf
commit
185b5ce2f1
6 changed files with 5539 additions and 27 deletions
69
config.yaml
69
config.yaml
|
|
@ -67,10 +67,6 @@ companies:
|
||||||
platform: greenhouse
|
platform: greenhouse
|
||||||
board_token: tailscale
|
board_token: tailscale
|
||||||
|
|
||||||
- name: HashiCorp
|
|
||||||
platform: greenhouse
|
|
||||||
board_token: hashicorp
|
|
||||||
|
|
||||||
# Developer Tools & Platforms
|
# Developer Tools & Platforms
|
||||||
- name: Automattic
|
- name: Automattic
|
||||||
platform: greenhouse
|
platform: greenhouse
|
||||||
|
|
@ -88,6 +84,71 @@ companies:
|
||||||
platform: greenhouse
|
platform: greenhouse
|
||||||
board_token: cloudflare
|
board_token: cloudflare
|
||||||
|
|
||||||
|
- name: Fastly
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: fastly
|
||||||
|
|
||||||
|
# Database & Data Infrastructure
|
||||||
|
- name: Materialize
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: materialize
|
||||||
|
|
||||||
|
- name: PingCAP
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: pingcap
|
||||||
|
|
||||||
|
- name: CockroachLabs
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: cockroachlabs
|
||||||
|
|
||||||
|
- name: TigerData
|
||||||
|
platform: ashby
|
||||||
|
ashby_company: tigerdata
|
||||||
|
|
||||||
|
# Observability & Monitoring
|
||||||
|
- name: Honeycomb
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: honeycomb
|
||||||
|
|
||||||
|
- name: Datadog
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: datadog
|
||||||
|
|
||||||
|
- name: Sentry
|
||||||
|
platform: ashby
|
||||||
|
ashby_company: sentry
|
||||||
|
|
||||||
|
# Cloud & Developer Platforms
|
||||||
|
- name: Render
|
||||||
|
platform: ashby
|
||||||
|
ashby_company: render
|
||||||
|
|
||||||
|
- name: Railway
|
||||||
|
platform: ashby
|
||||||
|
ashby_company: Railway
|
||||||
|
|
||||||
|
- name: Stripe
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: stripe
|
||||||
|
|
||||||
|
- name: JetBrains
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: jetbrains
|
||||||
|
|
||||||
|
# Rust-heavy / Visualization
|
||||||
|
- name: Rerun
|
||||||
|
platform: ashby
|
||||||
|
ashby_company: rerun
|
||||||
|
|
||||||
|
# Big Tech (Selective)
|
||||||
|
- name: Discord
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: discord
|
||||||
|
|
||||||
|
- name: Dropbox
|
||||||
|
platform: greenhouse
|
||||||
|
board_token: dropbox
|
||||||
|
|
||||||
# Notification settings (optional - configure as needed)
|
# Notification settings (optional - configure as needed)
|
||||||
notifications:
|
notifications:
|
||||||
# Console output is always enabled
|
# Console output is always enabled
|
||||||
|
|
|
||||||
189
dashboard.py
189
dashboard.py
|
|
@ -3,24 +3,149 @@
|
||||||
Generate a simple text-based HTML dashboard of all tracked jobs.
|
Generate a simple text-based HTML dashboard of all tracked jobs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from db import Database
|
from db import Database
|
||||||
|
|
||||||
|
|
||||||
|
# Regions/locations we care about (case-insensitive matching)
|
||||||
|
DESIRED_REGIONS = [
|
||||||
|
"canada", "toronto", "vancouver",
|
||||||
|
"germany", "berlin", "munich",
|
||||||
|
"emea",
|
||||||
|
"americas", # includes North/South America
|
||||||
|
"north america",
|
||||||
|
"worldwide", "global", "anywhere",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Locations to explicitly exclude (on-site or remote restricted to these)
|
||||||
|
EXCLUDED_LOCATIONS = [
|
||||||
|
# US cities/states (we don't want US-only jobs)
|
||||||
|
"san francisco", "new york", "nyc", "seattle", "austin", "boston",
|
||||||
|
"chicago", "denver", "los angeles", "atlanta", "dallas", "houston",
|
||||||
|
"california", "washington", "texas", "massachusetts", "colorado",
|
||||||
|
"united states", "usa", "u.s.", "us-", "usa-",
|
||||||
|
# UK
|
||||||
|
"london", "united kingdom", "uk", "dublin", "ireland",
|
||||||
|
# Australia/APAC (not EMEA)
|
||||||
|
"sydney", "melbourne", "australia", "singapore", "tokyo", "japan",
|
||||||
|
"india", "bangalore", "bengaluru", "hyderabad", "delhi",
|
||||||
|
"korea", "seoul", "taiwan", "taipei", "china", "beijing", "shenzhen",
|
||||||
|
# Other excluded
|
||||||
|
"israel", "tel aviv", "brazil", "sao paulo", "mexico",
|
||||||
|
"netherlands", "amsterdam", "france", "paris", "spain", "madrid",
|
||||||
|
"portugal", "lisbon", "poland", "warsaw", "italy",
|
||||||
|
"czech", "prague", "serbia", "belgrade", "cyprus", "limassol",
|
||||||
|
"austria", "vienna", "sweden", "stockholm", "denmark", "copenhagen",
|
||||||
|
"switzerland", "romania", "bucharest", "hungary", "greece",
|
||||||
|
"south africa", "indonesia", "jakarta", "malaysia",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def is_location_relevant(location: str, remote_type: str) -> bool:
|
||||||
|
"""
|
||||||
|
Strict location filter. Only keeps jobs available in Canada, Germany, EMEA, or Worldwide.
|
||||||
|
Filters out US-only jobs, UK jobs, APAC jobs, etc.
|
||||||
|
"""
|
||||||
|
if not location:
|
||||||
|
return False # No location info = probably US-based, filter out
|
||||||
|
|
||||||
|
loc_lower = location.lower()
|
||||||
|
|
||||||
|
# Check if any desired region is mentioned FIRST
|
||||||
|
has_desired = any(region in loc_lower for region in DESIRED_REGIONS)
|
||||||
|
|
||||||
|
# If it has a desired region, keep it (even if it also mentions excluded locations)
|
||||||
|
# e.g., "Remote (United States | Canada)" should be kept because of Canada
|
||||||
|
if has_desired:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# If it just says "Remote" with nothing else, keep it (truly remote)
|
||||||
|
if loc_lower.strip() == "remote":
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check for excluded locations
|
||||||
|
has_excluded = any(excl in loc_lower for excl in EXCLUDED_LOCATIONS)
|
||||||
|
if has_excluded:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for patterns like "In-Office", "Hybrid", "On-site" without desired region
|
||||||
|
if any(x in loc_lower for x in ["in-office", "hybrid", "on-site", "onsite", "office based"]):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If we can't determine, filter it out (safer)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_location_tags(location: str, remote_type: str) -> tuple[list[str], str]:
|
||||||
|
"""
|
||||||
|
Extract relevant location tags and a short display location.
|
||||||
|
Returns (list of tag names, short location string)
|
||||||
|
"""
|
||||||
|
if not location:
|
||||||
|
return [], ""
|
||||||
|
|
||||||
|
loc_lower = location.lower()
|
||||||
|
tags = []
|
||||||
|
short_loc = ""
|
||||||
|
|
||||||
|
# Check for remote
|
||||||
|
is_remote = remote_type == "remote" or "remote" in loc_lower
|
||||||
|
if is_remote:
|
||||||
|
tags.append("remote")
|
||||||
|
|
||||||
|
# Check for Canada
|
||||||
|
if any(x in loc_lower for x in ["canada", "toronto", "vancouver"]):
|
||||||
|
tags.append("canada")
|
||||||
|
short_loc = "Canada"
|
||||||
|
|
||||||
|
# Check for Germany/Berlin
|
||||||
|
if any(x in loc_lower for x in ["germany", "berlin", "munich"]):
|
||||||
|
tags.append("germany")
|
||||||
|
short_loc = "Germany" if "germany" in loc_lower else "Berlin"
|
||||||
|
|
||||||
|
# Check for EMEA
|
||||||
|
if "emea" in loc_lower:
|
||||||
|
tags.append("emea")
|
||||||
|
short_loc = "EMEA"
|
||||||
|
|
||||||
|
# Check for Americas/North America
|
||||||
|
if "americas" in loc_lower or "north america" in loc_lower:
|
||||||
|
tags.append("americas")
|
||||||
|
short_loc = "Americas"
|
||||||
|
|
||||||
|
# Check for Worldwide
|
||||||
|
if any(x in loc_lower for x in ["worldwide", "global", "anywhere"]):
|
||||||
|
tags.append("worldwide")
|
||||||
|
short_loc = "Worldwide"
|
||||||
|
|
||||||
|
# If no specific region found but it's remote
|
||||||
|
if not short_loc and is_remote:
|
||||||
|
short_loc = "Remote"
|
||||||
|
|
||||||
|
return tags, short_loc
|
||||||
|
|
||||||
|
|
||||||
def generate_dashboard(output_path: str = "data/dashboard.html"):
|
def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
"""Generate a static HTML dashboard."""
|
"""Generate a static HTML dashboard."""
|
||||||
db = Database()
|
db = Database()
|
||||||
jobs = db.get_all_active_jobs()
|
jobs = db.get_all_active_jobs()
|
||||||
|
|
||||||
# Group by company
|
# Group by company, filtering out irrelevant remote locations
|
||||||
companies = {}
|
companies = {}
|
||||||
|
filtered_count = 0
|
||||||
for company_name, job in jobs:
|
for company_name, job in jobs:
|
||||||
|
if not is_location_relevant(job.location, job.remote_type):
|
||||||
|
filtered_count += 1
|
||||||
|
continue
|
||||||
if company_name not in companies:
|
if company_name not in companies:
|
||||||
companies[company_name] = []
|
companies[company_name] = []
|
||||||
companies[company_name].append(job)
|
companies[company_name].append(job)
|
||||||
|
|
||||||
|
total_shown = sum(len(jobs) for jobs in companies.values())
|
||||||
|
|
||||||
# Sort companies by name
|
# Sort companies by name
|
||||||
sorted_companies = sorted(companies.items())
|
sorted_companies = sorted(companies.items())
|
||||||
|
|
||||||
|
|
@ -116,14 +241,14 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
font-size: 12px;
|
font-size: 12px;
|
||||||
}}
|
}}
|
||||||
.jobs {{
|
.jobs {{
|
||||||
margin-left: 20px;
|
margin-left: 0;
|
||||||
}}
|
}}
|
||||||
.job {{
|
.job {{
|
||||||
padding: 6px 0;
|
padding: 6px 0;
|
||||||
border-bottom: 1px solid var(--border);
|
border-bottom: 1px solid var(--border);
|
||||||
display: grid;
|
display: flex;
|
||||||
grid-template-columns: 1fr 180px;
|
justify-content: space-between;
|
||||||
gap: 10px;
|
gap: 20px;
|
||||||
align-items: baseline;
|
align-items: baseline;
|
||||||
}}
|
}}
|
||||||
.job:last-child {{
|
.job:last-child {{
|
||||||
|
|
@ -148,6 +273,8 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
color: var(--muted);
|
color: var(--muted);
|
||||||
font-size: 12px;
|
font-size: 12px;
|
||||||
text-align: right;
|
text-align: right;
|
||||||
|
white-space: nowrap;
|
||||||
|
flex-shrink: 0;
|
||||||
}}
|
}}
|
||||||
.tag {{
|
.tag {{
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
|
|
@ -168,6 +295,18 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
background: #4a4a1a;
|
background: #4a4a1a;
|
||||||
color: #facc15;
|
color: #facc15;
|
||||||
}}
|
}}
|
||||||
|
.tag-emea {{
|
||||||
|
background: #1a3a4a;
|
||||||
|
color: #60a5fa;
|
||||||
|
}}
|
||||||
|
.tag-americas {{
|
||||||
|
background: #3a1a4a;
|
||||||
|
color: #c084fc;
|
||||||
|
}}
|
||||||
|
.tag-worldwide {{
|
||||||
|
background: #1a4a3a;
|
||||||
|
color: #34d399;
|
||||||
|
}}
|
||||||
.hidden {{
|
.hidden {{
|
||||||
display: none;
|
display: none;
|
||||||
}}
|
}}
|
||||||
|
|
@ -228,7 +367,7 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
<h1>$ job-board</h1>
|
<h1>$ job-board</h1>
|
||||||
<div class="meta">
|
<div class="meta">
|
||||||
Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
|
Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
|
||||||
{len(jobs)} jobs across {len(companies)} companies
|
{total_shown} jobs across {len(companies)} companies
|
||||||
</div>
|
</div>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
|
|
@ -244,11 +383,14 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
<button class="filter-btn" data-filter="design">Design</button>
|
<button class="filter-btn" data-filter="design">Design</button>
|
||||||
<button class="filter-btn" data-filter="security">Security</button>
|
<button class="filter-btn" data-filter="security">Security</button>
|
||||||
<button class="filter-btn" data-filter="remote">Remote</button>
|
<button class="filter-btn" data-filter="remote">Remote</button>
|
||||||
<button class="filter-btn" data-filter="canada toronto vancouver">Canada</button>
|
<button class="filter-btn" data-filter="canada">Canada</button>
|
||||||
<button class="filter-btn" data-filter="berlin germany">Berlin</button>
|
<button class="filter-btn" data-filter="germany">Germany</button>
|
||||||
|
<button class="filter-btn" data-filter="emea">EMEA</button>
|
||||||
|
<button class="filter-btn" data-filter="americas">Americas</button>
|
||||||
|
<button class="filter-btn" data-filter="worldwide">Worldwide</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="stats">
|
<div class="stats">
|
||||||
<span id="visible-count">{len(jobs)} jobs shown</span>
|
<span id="visible-count">{total_shown} jobs shown</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
@ -283,18 +425,27 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
location = job.location or ""
|
location = job.location or ""
|
||||||
location_lower = location.lower()
|
location_lower = location.lower()
|
||||||
|
|
||||||
# Tags
|
# Extract tags and short location
|
||||||
tags = ""
|
tag_list, short_loc = extract_location_tags(location, job.remote_type)
|
||||||
if job.remote_type == "remote" or "remote" in location_lower:
|
|
||||||
tags += '<span class="tag tag-remote">remote</span>'
|
|
||||||
if "canada" in location_lower or "toronto" in location_lower or "vancouver" in location_lower:
|
|
||||||
tags += '<span class="tag tag-canada">canada</span>'
|
|
||||||
if "berlin" in location_lower or "germany" in location_lower:
|
|
||||||
tags += '<span class="tag tag-berlin">berlin</span>'
|
|
||||||
|
|
||||||
html += f""" <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()}">
|
# Build tag HTML
|
||||||
|
tags = ""
|
||||||
|
if "remote" in tag_list:
|
||||||
|
tags += '<span class="tag tag-remote">remote</span>'
|
||||||
|
if "canada" in tag_list:
|
||||||
|
tags += '<span class="tag tag-canada">canada</span>'
|
||||||
|
if "germany" in tag_list:
|
||||||
|
tags += '<span class="tag tag-berlin">germany</span>'
|
||||||
|
if "emea" in tag_list:
|
||||||
|
tags += '<span class="tag tag-emea">emea</span>'
|
||||||
|
if "americas" in tag_list:
|
||||||
|
tags += '<span class="tag tag-americas">americas</span>'
|
||||||
|
if "worldwide" in tag_list:
|
||||||
|
tags += '<span class="tag tag-worldwide">worldwide</span>'
|
||||||
|
|
||||||
|
html += f""" <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()} {' '.join(tag_list)}">
|
||||||
<span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tags}</span>
|
<span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tags}</span>
|
||||||
<span class="job-location">{location}</span>
|
<span class="job-location">{short_loc}</span>
|
||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
html += """ </div>
|
html += """ </div>
|
||||||
|
|
|
||||||
5257
data/dashboard.html
Normal file
5257
data/dashboard.html
Normal file
File diff suppressed because it is too large
Load diff
BIN
data/jobs.db-journal
Normal file
BIN
data/jobs.db-journal
Normal file
Binary file not shown.
35
docker-compose.dev.yaml
Normal file
35
docker-compose.dev.yaml
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
services:
|
||||||
|
# Run scraper once (for manual/cron triggering)
|
||||||
|
scraper:
|
||||||
|
build: .
|
||||||
|
container_name: job-scraper
|
||||||
|
volumes:
|
||||||
|
- ./data:/app/data
|
||||||
|
- ./config.yaml:/app/config.yaml:ro
|
||||||
|
environment:
|
||||||
|
- TZ=America/Toronto
|
||||||
|
|
||||||
|
# Scheduled scraper - runs daily at 9 AM
|
||||||
|
scraper-scheduled:
|
||||||
|
build: .
|
||||||
|
container_name: job-scraper-scheduled
|
||||||
|
volumes:
|
||||||
|
- ./data:/app/data
|
||||||
|
- ./config.yaml:/app/config.yaml:ro
|
||||||
|
environment:
|
||||||
|
- TZ=America/Toronto
|
||||||
|
command: ["python", "main.py", "--schedule"]
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# Web dashboard - lightweight static file server
|
||||||
|
dashboard:
|
||||||
|
image: nginx:alpine
|
||||||
|
container_name: job-dashboard
|
||||||
|
ports:
|
||||||
|
- "8080:80"
|
||||||
|
volumes:
|
||||||
|
- ./data:/usr/share/nginx/html:ro
|
||||||
|
- ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
|
||||||
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- scraper
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
services:
|
services:
|
||||||
# Run scraper once (for manual/cron triggering)
|
# Run scraper once (for manual triggering)
|
||||||
scraper:
|
scraper:
|
||||||
build: .
|
build: .
|
||||||
container_name: job-scraper
|
container_name: job-scraper
|
||||||
|
|
@ -20,16 +20,24 @@ services:
|
||||||
- TZ=America/Toronto
|
- TZ=America/Toronto
|
||||||
command: ["python", "main.py", "--schedule"]
|
command: ["python", "main.py", "--schedule"]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
|
||||||
# Web dashboard - lightweight static file server
|
# Web dashboard - lightweight static file server
|
||||||
dashboard:
|
dashboard:
|
||||||
image: nginx:alpine
|
image: nginx:alpine
|
||||||
container_name: job-dashboard
|
container_name: job-dashboard
|
||||||
ports:
|
ports:
|
||||||
- "8080:80"
|
- "127.0.0.1:8085:80"
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/usr/share/nginx/html:ro
|
- ./data:/usr/share/nginx/html:ro
|
||||||
- ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
|
- ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
depends_on:
|
logging:
|
||||||
- scraper
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue