diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..d1f2847
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,65 @@
+# Job Scraper
+
+Job board monitoring for privacy-focused and open-source companies.
+
+## Quick Reference
+
+| Item | Value |
+|------|-------|
+| URL | https://jobs.novanexus.ca |
+| Port | 8085 |
+| Containers | job-scraper-scheduled, job-dashboard |
+| Data | ./data/jobs.db, ./data/dashboard.html |
+
+## Development Workflow
+
+After making code changes, rebuild and deploy:
+
+```bash
+cd ~/job-scraper && sudo docker compose build && sudo docker compose up -d
+```
+
+## Common Commands
+
+```bash
+# View status
+sudo docker compose ps
+
+# View logs
+sudo docker compose logs -f scraper-scheduled
+
+# Run scraper manually (one-time)
+sudo docker compose run --rm scraper
+
+# Restart services
+sudo docker compose restart
+
+# Rebuild without cache (if having issues)
+sudo docker compose build --no-cache && sudo docker compose up -d
+```
+
+## Configuration
+
+- `config.yaml` - Companies to monitor, filters, notifications
+- After config changes: `sudo docker compose restart scraper-scheduled`
+
+## Email Notifications
+
+Uses msmtp with system config (`~/.msmtprc`). The container mounts this file.
+
+To test email manually:
+```bash
+sudo docker compose run --rm scraper
+```
+
+## Files
+
+- `main.py` - CLI entry point, scheduling
+- `db.py` - SQLite database operations
+- `notify.py` - Console, msmtp, email, Slack notifications
+- `dashboard.py` - HTML dashboard generator
+- `scrapers/` - Platform-specific scrapers (greenhouse, lever, ashby)
+
+## Documentation
+
+Full docs: ~/maple-docs/docs/04-services/job-scraper.md
diff --git a/Dockerfile b/Dockerfile
index 9b9bdb3..d9d5d89 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,6 +2,12 @@ FROM python:3.12-slim
WORKDIR /app
+# Install msmtp for email notifications
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ msmtp \
+ ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
diff --git a/config.yaml b/config.yaml
index 0f71664..e7a412a 100644
--- a/config.yaml
+++ b/config.yaml
@@ -154,7 +154,12 @@ notifications:
# Console output is always enabled
console: true
- # Uncomment and configure for email notifications
+ # msmtp - uses system msmtp config (~/.msmtprc)
+ msmtp:
+ from_addr: admin@novanexus.ca
+ to_addr: me@bastiangruber.ca
+
+ # Uncomment and configure for SMTP email notifications (alternative to msmtp)
# email:
# smtp_host: smtp.gmail.com
# smtp_port: 587
diff --git a/dashboard.py b/dashboard.py
index dc746a0..cbe77d2 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -133,6 +133,14 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
db = Database()
jobs = db.get_all_active_jobs()
+ # Get all monitored companies
+ all_company_names = db.get_all_companies()
+
+ # Track total jobs per company (before location filtering)
+ total_per_company = {}
+ for company_name, job in jobs:
+ total_per_company[company_name] = total_per_company.get(company_name, 0) + 1
+
# Group by company, filtering out irrelevant remote locations
companies = {}
filtered_count = 0
@@ -144,7 +152,15 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
companies[company_name] = []
companies[company_name].append(job)
+ # Ensure all monitored companies are in the dict (even with 0 jobs)
+ for name in all_company_names:
+ if name not in companies:
+ companies[name] = []
+ if name not in total_per_company:
+ total_per_company[name] = 0
+
total_shown = sum(len(jobs) for jobs in companies.values())
+ total_scraped = sum(total_per_company.values())
# Sort companies by name
sorted_companies = sorted(companies.items())
@@ -334,6 +350,13 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
.toc-links a:hover {{
text-decoration: underline;
}}
+ .toc-links .empty {{
+ color: var(--muted);
+ cursor: default;
+ }}
+ .toc-links .empty:hover {{
+ text-decoration: none;
+ }}
.filter-buttons {{
display: flex;
flex-wrap: wrap;
@@ -367,7 +390,7 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
$ job-board
Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
- {total_shown} jobs across {len(companies)} companies
+ {total_shown}/{total_scraped} jobs (location filtered) | Monitoring {len(all_company_names)} companies
@@ -402,7 +425,12 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
# Table of contents
for company_name, company_jobs in sorted_companies:
anchor = company_name.lower().replace(" ", "-")
- html += f' {company_name} ({len(company_jobs)})\n'
+ filtered = len(company_jobs)
+ total = total_per_company.get(company_name, 0)
+ if filtered > 0:
+ html += f' {company_name} ({filtered}/{total})\n'
+ else:
+ html += f' {company_name} (0/{total})\n'
html += """
@@ -410,8 +438,10 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
"""
- # Job listings
+ # Job listings (only for companies with jobs)
for company_name, company_jobs in sorted_companies:
+ if not company_jobs:
+ continue # Skip companies with no jobs after filtering
anchor = company_name.lower().replace(" ", "-")
html += f"""