job-scraper/scrapers/greenhouse.py
Bastian Gruber e8eb9d3fcf
Initial commit: Job scraper for privacy/open-source companies
- Scrapes job listings from Greenhouse, Lever, and Ashby platforms
- Tracks 14 companies (1Password, DuckDuckGo, GitLab, etc.)
- SQLite database for change detection
- Filters by engineering job titles and location preferences
- Generates static HTML dashboard with search/filter
- Docker support for deployment to Debian server
2026-01-20 12:40:33 -04:00

42 lines
1.4 KiB
Python

from .base import BaseScraper, Job
class GreenhouseScraper(BaseScraper):
"""
Scraper for companies using Greenhouse.
Greenhouse provides a JSON API at /embed/job_board/jobs endpoint.
Example: https://boards-api.greenhouse.io/v1/boards/{company}/jobs
"""
def __init__(self, company_name: str, board_token: str, **kwargs):
# Greenhouse API endpoint
jobs_url = f"https://boards-api.greenhouse.io/v1/boards/{board_token}/jobs"
super().__init__(company_name, jobs_url, **kwargs)
self.board_token = board_token
def scrape(self) -> list[Job]:
"""Scrape jobs from Greenhouse API."""
data = self.fetch_json()
jobs = []
for job_data in data.get("jobs", []):
job_id = str(job_data.get("id", ""))
title = job_data.get("title", "")
location = job_data.get("location", {}).get("name", "")
absolute_url = job_data.get("absolute_url", "")
# Get department if available
departments = job_data.get("departments", [])
department = departments[0].get("name") if departments else None
jobs.append(Job(
external_id=job_id,
title=title,
url=absolute_url,
location=location,
department=department,
remote_type=self.classify_remote(location)
))
return jobs