- Scrapes job listings from Greenhouse, Lever, and Ashby platforms - Tracks 14 companies (1Password, DuckDuckGo, GitLab, etc.) - SQLite database for change detection - Filters by engineering job titles and location preferences - Generates static HTML dashboard with search/filter - Docker support for deployment to Debian server
42 lines
1.4 KiB
Python
42 lines
1.4 KiB
Python
from .base import BaseScraper, Job
|
|
|
|
|
|
class GreenhouseScraper(BaseScraper):
|
|
"""
|
|
Scraper for companies using Greenhouse.
|
|
Greenhouse provides a JSON API at /embed/job_board/jobs endpoint.
|
|
|
|
Example: https://boards-api.greenhouse.io/v1/boards/{company}/jobs
|
|
"""
|
|
|
|
def __init__(self, company_name: str, board_token: str, **kwargs):
|
|
# Greenhouse API endpoint
|
|
jobs_url = f"https://boards-api.greenhouse.io/v1/boards/{board_token}/jobs"
|
|
super().__init__(company_name, jobs_url, **kwargs)
|
|
self.board_token = board_token
|
|
|
|
def scrape(self) -> list[Job]:
|
|
"""Scrape jobs from Greenhouse API."""
|
|
data = self.fetch_json()
|
|
jobs = []
|
|
|
|
for job_data in data.get("jobs", []):
|
|
job_id = str(job_data.get("id", ""))
|
|
title = job_data.get("title", "")
|
|
location = job_data.get("location", {}).get("name", "")
|
|
absolute_url = job_data.get("absolute_url", "")
|
|
|
|
# Get department if available
|
|
departments = job_data.get("departments", [])
|
|
department = departments[0].get("name") if departments else None
|
|
|
|
jobs.append(Job(
|
|
external_id=job_id,
|
|
title=title,
|
|
url=absolute_url,
|
|
location=location,
|
|
department=department,
|
|
remote_type=self.classify_remote(location)
|
|
))
|
|
|
|
return jobs
|