job-scraper/scrapers/greenhouse.py

43 lines
1.4 KiB
Python
Raw Normal View History

from .base import BaseScraper, Job
class GreenhouseScraper(BaseScraper):
"""
Scraper for companies using Greenhouse.
Greenhouse provides a JSON API at /embed/job_board/jobs endpoint.
Example: https://boards-api.greenhouse.io/v1/boards/{company}/jobs
"""
def __init__(self, company_name: str, board_token: str, **kwargs):
# Greenhouse API endpoint
jobs_url = f"https://boards-api.greenhouse.io/v1/boards/{board_token}/jobs"
super().__init__(company_name, jobs_url, **kwargs)
self.board_token = board_token
def scrape(self) -> list[Job]:
"""Scrape jobs from Greenhouse API."""
data = self.fetch_json()
jobs = []
for job_data in data.get("jobs", []):
job_id = str(job_data.get("id", ""))
title = job_data.get("title", "")
location = job_data.get("location", {}).get("name", "")
absolute_url = job_data.get("absolute_url", "")
# Get department if available
departments = job_data.get("departments", [])
department = departments[0].get("name") if departments else None
jobs.append(Job(
external_id=job_id,
title=title,
url=absolute_url,
location=location,
department=department,
remote_type=self.classify_remote(location)
))
return jobs