Update filters and add cleanup

This commit is contained in:
Bastian Gruber 2026-01-29 16:24:44 +00:00
parent fd4254df3e
commit c655f2e078
Signed by: gruberb
GPG key ID: 426AF1CBA0530691
7 changed files with 4694 additions and 1242 deletions

View file

@ -72,10 +72,6 @@ companies:
platform: greenhouse platform: greenhouse
board_token: automatticcareers board_token: automatticcareers
- name: Canonical
platform: greenhouse
board_token: canonical
- name: ClickHouse - name: ClickHouse
platform: greenhouse platform: greenhouse
board_token: clickhouse board_token: clickhouse

View file

@ -3,168 +3,193 @@
Generate a simple text-based HTML dashboard of all tracked jobs. Generate a simple text-based HTML dashboard of all tracked jobs.
""" """
import re
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from collections import Counter
from db import Database from db import Database
# Regions/locations we care about (case-insensitive matching) # Location grouping rules: keyword -> (group_id, display_name)
DESIRED_REGIONS = [ # Order matters - first match wins
"canada", "toronto", "vancouver", LOCATION_RULES = [
"germany", "berlin", "munich", # Canada
"emea", (["canada", "toronto", "vancouver", "montreal", "ottawa", "calgary", "waterloo"], "canada", "Canada"),
"americas", # includes North/South America # Germany
"north america", (["germany", "berlin", "munich", "frankfurt", "hamburg"], "germany", "Germany"),
"worldwide", "global", "anywhere",
]
# Locations to explicitly exclude (on-site or remote restricted to these)
EXCLUDED_LOCATIONS = [
# US cities/states (we don't want US-only jobs)
"san francisco", "new york", "nyc", "seattle", "austin", "boston",
"chicago", "denver", "los angeles", "atlanta", "dallas", "houston",
"california", "washington", "texas", "massachusetts", "colorado",
"united states", "usa", "u.s.", "us-", "usa-",
# UK # UK
"london", "united kingdom", "uk", "dublin", "ireland", (["united kingdom", " uk", "uk ", "london", "england", "manchester", "edinburgh"], "uk", "UK"),
# Australia/APAC (not EMEA) # Ireland
"sydney", "melbourne", "australia", "singapore", "tokyo", "japan", (["ireland", "dublin"], "ireland", "Ireland"),
"india", "bangalore", "bengaluru", "hyderabad", "delhi", # Netherlands
"korea", "seoul", "taiwan", "taipei", "china", "beijing", "shenzhen", (["netherlands", "amsterdam", "rotterdam"], "netherlands", "Netherlands"),
# Other excluded # France
"israel", "tel aviv", "brazil", "sao paulo", "mexico", (["france", "paris"], "france", "France"),
"netherlands", "amsterdam", "france", "paris", "spain", "madrid", # Spain
"portugal", "lisbon", "poland", "warsaw", "italy", (["spain", "madrid", "barcelona"], "spain", "Spain"),
"czech", "prague", "serbia", "belgrade", "cyprus", "limassol", # Poland
"austria", "vienna", "sweden", "stockholm", "denmark", "copenhagen", (["poland", "warsaw", "krakow", "wroclaw"], "poland", "Poland"),
"switzerland", "romania", "bucharest", "hungary", "greece", # Sweden
"south africa", "indonesia", "jakarta", "malaysia", (["sweden", "stockholm"], "sweden", "Sweden"),
# Switzerland
(["switzerland", "zurich", "geneva"], "switzerland", "Switzerland"),
# Australia
(["australia", "sydney", "melbourne"], "australia", "Australia"),
# India
(["india", "bangalore", "bengaluru", "hyderabad", "delhi", "mumbai", "pune"], "india", "India"),
# Japan
(["japan", "tokyo"], "japan", "Japan"),
# Singapore
(["singapore"], "singapore", "Singapore"),
# Israel
(["israel", "tel aviv"], "israel", "Israel"),
# Brazil
(["brazil", "sao paulo"], "brazil", "Brazil"),
# US (must be after other countries to avoid false matches)
(["united states", "usa", "u.s.", "san francisco", "new york", "nyc", "seattle",
"austin", "boston", "chicago", "denver", "los angeles", "atlanta", "dallas",
"houston", "california", "washington", "texas", "massachusetts", "colorado",
"portland", "miami", "phoenix", "san diego", "san jose", "palo alto",
"mountain view", "sunnyvale", "menlo park", "cupertino"], "us", "US"),
# Regions
(["emea"], "emea", "EMEA"),
(["americas", "north america", "latam"], "americas", "Americas"),
(["apac", "asia pacific", "asia-pacific"], "apac", "APAC"),
(["worldwide", "global", "anywhere", "earth"], "worldwide", "Worldwide"),
] ]
def is_location_relevant(location: str, remote_type: str) -> bool: def extract_location_info(location: str, remote_type: str) -> tuple[list[str], str]:
""" """
Strict location filter. Only keeps jobs available in Canada, Germany, EMEA, or Worldwide. Extract location tags and short display text from a job's location.
Filters out US-only jobs, UK jobs, APAC jobs, etc. Returns (list of tag ids, short display location)
""" """
if not location:
return False # No location info = probably US-based, filter out
loc_lower = location.lower()
# Check if any desired region is mentioned FIRST
has_desired = any(region in loc_lower for region in DESIRED_REGIONS)
# If it has a desired region, keep it (even if it also mentions excluded locations)
# e.g., "Remote (United States | Canada)" should be kept because of Canada
if has_desired:
return True
# If it just says "Remote" with nothing else, keep it (truly remote)
if loc_lower.strip() == "remote":
return True
# Check for excluded locations
has_excluded = any(excl in loc_lower for excl in EXCLUDED_LOCATIONS)
if has_excluded:
return False
# Check for patterns like "In-Office", "Hybrid", "On-site" without desired region
if any(x in loc_lower for x in ["in-office", "hybrid", "on-site", "onsite", "office based"]):
return False
# If we can't determine, filter it out (safer)
return False
def extract_location_tags(location: str, remote_type: str) -> tuple[list[str], str]:
"""
Extract relevant location tags and a short display location.
Returns (list of tag names, short location string)
"""
if not location:
return [], ""
loc_lower = location.lower()
tags = [] tags = []
short_loc = "" display = ""
if not location:
return tags, display
loc_lower = location.lower()
# Check for remote # Check for remote
is_remote = remote_type == "remote" or "remote" in loc_lower is_remote = remote_type == "remote" or "remote" in loc_lower
if is_remote: if is_remote:
tags.append("remote") tags.append("remote")
# Check for Canada # Check against location rules
if any(x in loc_lower for x in ["canada", "toronto", "vancouver"]): for keywords, tag_id, display_name in LOCATION_RULES:
tags.append("canada") if any(kw in loc_lower for kw in keywords):
short_loc = "Canada" if tag_id not in tags:
tags.append(tag_id)
if not display:
display = display_name
# Check for Germany/Berlin # Fallback display
if any(x in loc_lower for x in ["germany", "berlin", "munich"]): if not display:
tags.append("germany") if is_remote:
short_loc = "Germany" if "germany" in loc_lower else "Berlin" display = "Remote"
elif location:
display = location[:25] + "..." if len(location) > 25 else location
# Check for EMEA return tags, display
if "emea" in loc_lower:
tags.append("emea")
short_loc = "EMEA"
# Check for Americas/North America
if "americas" in loc_lower or "north america" in loc_lower:
tags.append("americas")
short_loc = "Americas"
# Check for Worldwide
if any(x in loc_lower for x in ["worldwide", "global", "anywhere"]):
tags.append("worldwide")
short_loc = "Worldwide"
# If no specific region found but it's remote
if not short_loc and is_remote:
short_loc = "Remote"
return tags, short_loc
def generate_dashboard(output_path: str = "data/dashboard.html"): def generate_dashboard(output_path: str = "data/dashboard.html"):
"""Generate a static HTML dashboard.""" """Generate a static HTML dashboard."""
db = Database() db = Database()
jobs = db.get_all_active_jobs() jobs = db.get_all_active_jobs()
# Get all monitored companies
all_company_names = db.get_all_companies() all_company_names = db.get_all_companies()
# Track total jobs per company (before location filtering) # Process all jobs and collect location data
total_per_company = {}
for company_name, job in jobs:
total_per_company[company_name] = total_per_company.get(company_name, 0) + 1
# Group by company, filtering out irrelevant remote locations
companies = {} companies = {}
filtered_count = 0 location_counts = Counter()
for company_name, job in jobs: for company_name, job in jobs:
if not is_location_relevant(job.location, job.remote_type): # Extract location info
filtered_count += 1 tags, display = extract_location_info(job.location, job.remote_type)
continue
# Count locations for filter generation
for tag in tags:
location_counts[tag] += 1
# Store processed job data
if company_name not in companies: if company_name not in companies:
companies[company_name] = [] companies[company_name] = []
companies[company_name].append(job)
# Ensure all monitored companies are in the dict (even with 0 jobs) companies[company_name].append({
"job": job,
"tags": tags,
"display": display,
"search_text": f"{job.title.lower()} {(job.location or '').lower()} {(job.department or '').lower()} {' '.join(tags)}"
})
# Ensure all companies exist (even with 0 jobs)
for name in all_company_names: for name in all_company_names:
if name not in companies: if name not in companies:
companies[name] = [] companies[name] = []
if name not in total_per_company:
total_per_company[name] = 0
total_shown = sum(len(jobs) for jobs in companies.values()) total_jobs = sum(len(j) for j in companies.values())
total_scraped = sum(total_per_company.values())
# Sort companies by name
sorted_companies = sorted(companies.items()) sorted_companies = sorted(companies.items())
# Generate dynamic location filters (only show locations that exist in data)
# Order: Remote first, then by count descending
location_filters = []
if "remote" in location_counts:
location_filters.append(("remote", "Remote", location_counts["remote"]))
# Add other locations sorted by count
other_locations = [(tag, count) for tag, count in location_counts.items() if tag != "remote"]
other_locations.sort(key=lambda x: -x[1])
# Map tag_id to display name
tag_display = {tag_id: display for keywords, tag_id, display in LOCATION_RULES}
tag_display["remote"] = "Remote"
for tag_id, count in other_locations:
display = tag_display.get(tag_id, tag_id.title())
location_filters.append((tag_id, display, count))
# Generate location filter buttons HTML
location_buttons = ""
for tag_id, display, count in location_filters:
location_buttons += f' <button class="filter-btn" data-filter="{tag_id}" data-category="location">{display} ({count})</button>\n'
# Generate tag colors dynamically
tag_colors = {
"remote": ("#1a4a1a", "#4ade80"),
"canada": ("#4a1a1a", "#f87171"),
"germany": ("#4a4a1a", "#facc15"),
"uk": ("#2a1a3a", "#a78bfa"),
"us": ("#3a2a1a", "#fb923c"),
"emea": ("#1a3a4a", "#60a5fa"),
"americas": ("#3a1a4a", "#c084fc"),
"worldwide": ("#1a4a3a", "#34d399"),
"apac": ("#1a2a4a", "#38bdf8"),
"ireland": ("#1a4a2a", "#4ade80"),
"netherlands": ("#3a3a1a", "#fbbf24"),
"france": ("#2a2a4a", "#818cf8"),
"spain": ("#4a2a1a", "#fb7185"),
"poland": ("#3a1a2a", "#f472b6"),
"sweden": ("#1a3a3a", "#2dd4bf"),
"switzerland": ("#4a1a2a", "#fb7185"),
"australia": ("#2a3a1a", "#a3e635"),
"india": ("#4a3a1a", "#fcd34d"),
"japan": ("#4a1a3a", "#e879f9"),
"singapore": ("#1a4a4a", "#22d3d1"),
"israel": ("#3a2a2a", "#fca5a5"),
"brazil": ("#2a4a1a", "#86efac"),
}
# Generate CSS for tags
tag_css = ""
for tag_id, (bg, fg) in tag_colors.items():
tag_css += f""" .tag-{tag_id} {{
background: {bg};
color: {fg};
}}
"""
html = f"""<!DOCTYPE html> html = f"""<!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
@ -299,30 +324,7 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
font-size: 11px; font-size: 11px;
margin-left: 5px; margin-left: 5px;
}} }}
.tag-remote {{ {tag_css}
background: #1a4a1a;
color: #4ade80;
}}
.tag-canada {{
background: #4a1a1a;
color: #f87171;
}}
.tag-berlin {{
background: #4a4a1a;
color: #facc15;
}}
.tag-emea {{
background: #1a3a4a;
color: #60a5fa;
}}
.tag-americas {{
background: #3a1a4a;
color: #c084fc;
}}
.tag-worldwide {{
background: #1a4a3a;
color: #34d399;
}}
.hidden {{ .hidden {{
display: none; display: none;
}} }}
@ -342,26 +344,32 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
flex-wrap: wrap; flex-wrap: wrap;
gap: 10px; gap: 10px;
}} }}
.toc-links a {{ .toc-link {{
color: var(--accent); color: var(--accent);
text-decoration: none; text-decoration: none;
font-size: 13px; font-size: 13px;
}} }}
.toc-links a:hover {{ .toc-link:hover {{
text-decoration: underline; text-decoration: underline;
}} }}
.toc-links .empty {{ .toc-link.empty {{
color: var(--muted); color: var(--muted);
cursor: default;
}} }}
.toc-links .empty:hover {{ .toc-link.hidden {{
text-decoration: none; display: none;
}} }}
.filter-buttons {{ .filter-section {{
display: flex; display: flex;
flex-wrap: wrap; flex-wrap: wrap;
gap: 8px; gap: 8px;
margin-top: 10px; margin-top: 10px;
align-items: center;
}}
.filter-label {{
color: var(--muted);
font-size: 12px;
margin-right: 4px;
min-width: 60px;
}} }}
.filter-btn {{ .filter-btn {{
background: var(--bg); background: var(--bg);
@ -383,6 +391,13 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
border-color: var(--accent); border-color: var(--accent);
color: var(--bg); color: var(--bg);
}} }}
.clear-btn {{
border-color: #666;
}}
.clear-btn:hover {{
border-color: #f87171;
color: #f87171;
}}
</style> </style>
</head> </head>
<body> <body>
@ -390,47 +405,47 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
<h1>$ job-board</h1> <h1>$ job-board</h1>
<div class="meta"> <div class="meta">
Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
{total_shown}/{total_scraped} jobs (location filtered) | Monitoring {len(all_company_names)} companies {total_jobs} jobs | {len(all_company_names)} companies
</div> </div>
</header> </header>
<div class="filters"> <div class="filters">
<input type="text" id="search" placeholder="Filter jobs... (e.g. 'senior engineer', 'remote', 'canada')" autofocus> <input type="text" id="search" placeholder="Filter jobs... (press / to focus, Esc to clear)" autofocus>
<div class="filter-buttons"> <div class="filter-section">
<button class="filter-btn" data-filter="">All</button> <span class="filter-label">Quick:</span>
<button class="filter-btn" data-filter="engineer">Engineering</button> <button class="filter-btn" data-filter="" data-category="all">All ({total_jobs})</button>
<button class="filter-btn" data-filter="senior engineer">Senior Eng</button> <button class="filter-btn clear-btn" data-action="clear">Clear Filters</button>
<button class="filter-btn" data-filter="staff principal">Staff+</button> </div>
<button class="filter-btn" data-filter="manager director">Management</button> <div class="filter-section">
<button class="filter-btn" data-filter="product">Product</button> <span class="filter-label">Location:</span>
<button class="filter-btn" data-filter="design">Design</button> {location_buttons} </div>
<button class="filter-btn" data-filter="security">Security</button> <div class="filter-section">
<button class="filter-btn" data-filter="remote">Remote</button> <span class="filter-label">Role:</span>
<button class="filter-btn" data-filter="canada">Canada</button> <button class="filter-btn" data-filter="engineer" data-category="role">Engineering</button>
<button class="filter-btn" data-filter="germany">Germany</button> <button class="filter-btn" data-filter="senior" data-category="role">Senior</button>
<button class="filter-btn" data-filter="emea">EMEA</button> <button class="filter-btn" data-filter="staff principal" data-category="role">Staff+</button>
<button class="filter-btn" data-filter="americas">Americas</button> <button class="filter-btn" data-filter="backend" data-category="role">Backend</button>
<button class="filter-btn" data-filter="worldwide">Worldwide</button> <button class="filter-btn" data-filter="frontend" data-category="role">Frontend</button>
<button class="filter-btn" data-filter="infrastructure platform sre" data-category="role">Infra/Platform</button>
<button class="filter-btn" data-filter="security" data-category="role">Security</button>
<button class="filter-btn" data-filter="manager director" data-category="role">Management</button>
</div> </div>
<div class="stats"> <div class="stats">
<span id="visible-count">{total_shown} jobs shown</span> <span id="visible-count">{total_jobs} jobs shown</span>
</div> </div>
</div> </div>
<div class="toc"> <div class="toc">
<div class="toc-title">Jump to company:</div> <div class="toc-title">Jump to company:</div>
<div class="toc-links"> <div class="toc-links" id="toc-links">
""" """
# Table of contents # Table of contents with data attributes for JS updating
for company_name, company_jobs in sorted_companies: for company_name, company_jobs in sorted_companies:
anchor = company_name.lower().replace(" ", "-") anchor = company_name.lower().replace(" ", "-").replace("'", "")
filtered = len(company_jobs) count = len(company_jobs)
total = total_per_company.get(company_name, 0) css_class = "toc-link" if count > 0 else "toc-link empty"
if filtered > 0: html += f' <a href="#{anchor}" class="{css_class}" data-company="{anchor}" data-total="{count}">{company_name} ({count})</a>\n'
html += f' <a href="#{anchor}">{company_name} ({filtered}/{total})</a>\n'
else:
html += f' <span class="empty">{company_name} (0/{total})</span>\n'
html += """ </div> html += """ </div>
</div> </div>
@ -438,44 +453,34 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
<main id="job-list"> <main id="job-list">
""" """
# Job listings (only for companies with jobs) # Job listings
for company_name, company_jobs in sorted_companies: for company_name, company_jobs in sorted_companies:
if not company_jobs: if not company_jobs:
continue # Skip companies with no jobs after filtering continue
anchor = company_name.lower().replace(" ", "-") anchor = company_name.lower().replace(" ", "-").replace("'", "")
total = len(company_jobs)
html += f""" html += f"""
<div class="company" id="{anchor}"> <div class="company" id="{anchor}" data-company="{anchor}" data-total="{total}">
<div class="company-header"> <div class="company-header">
<span class="company-name">{company_name}</span> <span class="company-name">{company_name}</span>
<span class="company-count">{len(company_jobs)} positions</span> <span class="company-count" data-total="{total}">{total} positions</span>
</div> </div>
<div class="jobs"> <div class="jobs">
""" """
for job in sorted(company_jobs, key=lambda j: j.title): for job_data in sorted(company_jobs, key=lambda j: j["job"].title):
location = job.location or "" job = job_data["job"]
location_lower = location.lower() tags = job_data["tags"]
display = job_data["display"]
# Extract tags and short location search_text = job_data["search_text"]
tag_list, short_loc = extract_location_tags(location, job.remote_type)
# Build tag HTML # Build tag HTML
tags = "" tag_html = ""
if "remote" in tag_list: for tag in tags:
tags += '<span class="tag tag-remote">remote</span>' tag_html += f'<span class="tag tag-{tag}">{tag}</span>'
if "canada" in tag_list:
tags += '<span class="tag tag-canada">canada</span>'
if "germany" in tag_list:
tags += '<span class="tag tag-berlin">germany</span>'
if "emea" in tag_list:
tags += '<span class="tag tag-emea">emea</span>'
if "americas" in tag_list:
tags += '<span class="tag tag-americas">americas</span>'
if "worldwide" in tag_list:
tags += '<span class="tag tag-worldwide">worldwide</span>'
html += f""" <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()} {' '.join(tag_list)}"> html += f""" <div class="job" data-search="{search_text}">
<span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tags}</span> <span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tag_html}</span>
<span class="job-location">{short_loc}</span> <span class="job-location">{display}</span>
</div> </div>
""" """
html += """ </div> html += """ </div>
@ -488,67 +493,155 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
const search = document.getElementById('search'); const search = document.getElementById('search');
const jobs = document.querySelectorAll('.job'); const jobs = document.querySelectorAll('.job');
const companies = document.querySelectorAll('.company'); const companies = document.querySelectorAll('.company');
const tocLinks = document.querySelectorAll('.toc-link');
const visibleCount = document.getElementById('visible-count'); const visibleCount = document.getElementById('visible-count');
const filterBtns = document.querySelectorAll('.filter-btn'); const filterBtns = document.querySelectorAll('.filter-btn');
const clearBtn = document.querySelector('.clear-btn');
function filterJobs(query) { // Track active filters by category
let visible = 0; const activeFilters = {
const terms = query.toLowerCase().trim().split(/\\s+/).filter(t => t); location: null,
role: null
};
function applyFilters() {
let totalVisible = 0;
const searchTerms = search.value.toLowerCase().trim().split(/\\s+/).filter(t => t);
// Build filter terms from active category filters
const locationTerms = activeFilters.location ? activeFilters.location.split(/\\s+/) : [];
const roleTerms = activeFilters.role ? activeFilters.role.split(/\\s+/) : [];
const hasFilters = searchTerms.length > 0 || locationTerms.length > 0 || roleTerms.length > 0;
// Track visible counts per company
const companyCounts = {};
companies.forEach(company => { companies.forEach(company => {
const companyId = company.dataset.company;
const companyJobs = company.querySelectorAll('.job'); const companyJobs = company.querySelectorAll('.job');
let companyVisible = 0; let companyVisible = 0;
companyJobs.forEach(job => { companyJobs.forEach(job => {
const searchText = job.dataset.search; const searchText = job.dataset.search;
// Match if ANY term matches (OR logic for filter buttons)
const matches = terms.length === 0 || terms.some(term => searchText.includes(term)); // Match logic: AND between categories, OR within each category
let matches = true;
// Search box (OR within terms)
if (searchTerms.length > 0) {
matches = matches && searchTerms.some(term => searchText.includes(term));
}
// Location filter (OR within terms)
if (locationTerms.length > 0) {
matches = matches && locationTerms.some(term => searchText.includes(term));
}
// Role filter (OR within terms)
if (roleTerms.length > 0) {
matches = matches && roleTerms.some(term => searchText.includes(term));
}
job.classList.toggle('hidden', !matches); job.classList.toggle('hidden', !matches);
if (matches) { if (matches) {
companyVisible++; companyVisible++;
visible++; totalVisible++;
} }
}); });
company.classList.toggle('hidden', companyVisible === 0); company.classList.toggle('hidden', companyVisible === 0);
companyCounts[companyId] = companyVisible;
// Update company header count
const countSpan = company.querySelector('.company-count');
const total = parseInt(countSpan.dataset.total);
if (!hasFilters) {
countSpan.textContent = `${total} positions`;
} else {
countSpan.textContent = `${companyVisible}/${total} positions`;
}
}); });
visibleCount.textContent = `${visible} jobs shown`; // Update TOC links - always show all, grey out empty ones
tocLinks.forEach(link => {
const companyId = link.dataset.company;
const total = parseInt(link.dataset.total);
const visible = companyCounts[companyId] || 0;
const name = link.textContent.replace(/\\s*\\(.*\\)/, '');
if (!hasFilters) {
link.textContent = `${name} (${total})`;
link.classList.toggle('empty', total === 0);
} else {
link.textContent = `${name} (${visible}/${total})`;
link.classList.toggle('empty', visible === 0);
}
// Always show the link, never hide
link.classList.remove('hidden');
});
visibleCount.textContent = `${totalVisible} jobs shown`;
} }
search.addEventListener('input', (e) => { function clearAllFilters() {
// Clear active button when typing search.value = '';
activeFilters.location = null;
activeFilters.role = null;
filterBtns.forEach(btn => btn.classList.remove('active')); filterBtns.forEach(btn => btn.classList.remove('active'));
filterJobs(e.target.value); applyFilters();
}
search.addEventListener('input', () => {
applyFilters();
}); });
// Filter buttons
filterBtns.forEach(btn => { filterBtns.forEach(btn => {
btn.addEventListener('click', () => { btn.addEventListener('click', () => {
const filter = btn.dataset.filter; const filter = btn.dataset.filter;
search.value = filter; const category = btn.dataset.category;
filterBtns.forEach(b => b.classList.remove('active')); const action = btn.dataset.action;
btn.classList.add('active');
filterJobs(filter); // Handle clear button
if (action === 'clear') {
clearAllFilters();
return;
}
// Handle "All" button
if (category === 'all') {
clearAllFilters();
return;
}
// Toggle filter in category
const categoryBtns = document.querySelectorAll(`.filter-btn[data-category="${category}"]`);
if (btn.classList.contains('active')) {
// Deselect
btn.classList.remove('active');
activeFilters[category] = null;
} else {
// Select (deselect others in same category)
categoryBtns.forEach(b => b.classList.remove('active'));
btn.classList.add('active');
activeFilters[category] = filter;
}
applyFilters();
}); });
}); });
// Keyboard shortcut: / to focus search
document.addEventListener('keydown', (e) => { document.addEventListener('keydown', (e) => {
if (e.key === '/' && document.activeElement !== search) { if (e.key === '/' && document.activeElement !== search) {
e.preventDefault(); e.preventDefault();
search.focus(); search.focus();
} }
if (e.key === 'Escape') { if (e.key === 'Escape') {
search.value = ''; clearAllFilters();
filterBtns.forEach(b => b.classList.remove('active'));
filterJobs('');
search.blur(); search.blur();
} }
}); });
// Set "All" as active by default
filterBtns[0].classList.add('active');
</script> </script>
</body> </body>
</html> </html>

File diff suppressed because it is too large Load diff

23
db.py
View file

@ -247,3 +247,26 @@ class Database:
"SELECT name FROM companies WHERE active = TRUE ORDER BY name" "SELECT name FROM companies WHERE active = TRUE ORDER BY name"
) )
return [row["name"] for row in cursor.fetchall()] return [row["name"] for row in cursor.fetchall()]
def cleanup_removed_companies(self, active_company_names: list[str]) -> list[str]:
"""
Remove companies (and their jobs) that are no longer in the config.
Returns list of removed company names.
"""
with self._get_conn() as conn:
# Get companies in DB but not in config
placeholders = ",".join("?" * len(active_company_names))
cursor = conn.execute(
f"SELECT id, name FROM companies WHERE name NOT IN ({placeholders})",
active_company_names
)
removed = []
for row in cursor.fetchall():
company_id = row["id"]
company_name = row["name"]
# Delete jobs first (foreign key)
conn.execute("DELETE FROM jobs WHERE company_id = ?", (company_id,))
# Delete company
conn.execute("DELETE FROM companies WHERE id = ?", (company_id,))
removed.append(company_name)
return removed

View file

@ -20,6 +20,7 @@ services:
- /home/gruberb/.msmtprc:/root/.msmtprc:ro - /home/gruberb/.msmtprc:/root/.msmtprc:ro
environment: environment:
- TZ=America/Toronto - TZ=America/Toronto
- PYTHONUNBUFFERED=1
command: ["python", "main.py", "--schedule"] command: ["python", "main.py", "--schedule"]
restart: unless-stopped restart: unless-stopped
logging: logging:

View file

@ -145,6 +145,13 @@ def run_scraper(config: dict):
notifier = Notifier(config.get("notifications", {})) notifier = Notifier(config.get("notifications", {}))
companies = config.get("companies", []) companies = config.get("companies", [])
# Cleanup companies no longer in config
active_names = [c["name"] for c in companies]
removed = db.cleanup_removed_companies(active_names)
if removed:
print(f"\n🧹 Removed {len(removed)} companies no longer in config: {', '.join(removed)}")
print(f"\nMonitoring {len(companies)} companies...") print(f"\nMonitoring {len(companies)} companies...")
reports = [] reports = []

108
notify.py
View file

@ -28,24 +28,23 @@ class Notifier:
if not reports_with_changes: if not reports_with_changes:
print("\n✓ No changes detected across all companies.") print("\n✓ No changes detected across all companies.")
return else:
# Console output for changes
self._notify_console(reports_with_changes)
# Console output (always) # Email (if configured) - only sends when there are changes
self._notify_console(reports_with_changes)
# Email (if configured)
email_config = self.config.get("email") email_config = self.config.get("email")
if email_config: if email_config and reports_with_changes:
self._notify_email(reports_with_changes, email_config) self._notify_email(reports_with_changes, email_config)
# msmtp (if configured - uses system msmtp config) # msmtp (if configured - sends daily summary always)
msmtp_config = self.config.get("msmtp") msmtp_config = self.config.get("msmtp")
if msmtp_config: if msmtp_config:
self._notify_msmtp(reports_with_changes, msmtp_config) self._notify_msmtp_daily_summary(reports, msmtp_config)
# Slack (if configured) # Slack (if configured) - only sends when there are changes
slack_config = self.config.get("slack") slack_config = self.config.get("slack")
if slack_config: if slack_config and reports_with_changes:
self._notify_slack(reports_with_changes, slack_config) self._notify_slack(reports_with_changes, slack_config)
def _notify_console(self, reports: list[ChangeReport]): def _notify_console(self, reports: list[ChangeReport]):
@ -180,6 +179,95 @@ Content-Type: text/plain; charset=UTF-8
except Exception as e: except Exception as e:
print(f"✗ Failed to send msmtp notification: {e}") print(f"✗ Failed to send msmtp notification: {e}")
def _notify_msmtp_daily_summary(self, reports: list[ChangeReport], config: dict):
"""Send daily summary email via system msmtp (always sends)."""
import subprocess
from datetime import datetime
to_addr = config.get("to_addr", "me@bastiangruber.ca")
from_addr = config.get("from_addr", "admin@novanexus.ca")
# Calculate totals
total_companies = len([r for r in reports if r.total_active > 0])
total_jobs = sum(r.total_active for r in reports)
total_new = sum(len(r.new_jobs) for r in reports)
total_removed = sum(len(r.removed_jobs) for r in reports)
# Build subject line
if total_new or total_removed:
changes = []
if total_new:
changes.append(f"+{total_new}")
if total_removed:
changes.append(f"-{total_removed}")
subject = f"Job Board: {', '.join(changes)} | {total_jobs} jobs"
else:
subject = f"Job Board: No changes | {total_jobs} jobs"
# Build plain text body
body_lines = [
"JOB BOARD DAILY SUMMARY",
f"{datetime.now().strftime('%Y-%m-%d %H:%M')}",
"",
"OVERVIEW",
f" Companies with jobs: {total_companies}",
f" Total jobs tracked: {total_jobs}",
"",
]
# Changes section
reports_with_changes = [r for r in reports if r.new_jobs or r.removed_jobs]
if reports_with_changes:
body_lines.append(f"CHANGES: +{total_new} new, -{total_removed} removed")
body_lines.append("-" * 40)
for report in reports_with_changes:
if report.new_jobs:
for job in report.new_jobs:
location_str = f" [{job.location}]" if job.location else ""
remote_str = " (Remote)" if job.remote_type == "remote" else ""
body_lines.append(f" + {report.company_name}: {job.title}{location_str}{remote_str}")
if report.removed_jobs:
for job in report.removed_jobs:
body_lines.append(f" - {report.company_name}: {job.title}")
body_lines.append("")
else:
body_lines.append("CHANGES: No changes detected")
body_lines.append("")
body_lines.append("---")
body_lines.append("https://jobs.novanexus.ca")
body = "\n".join(body_lines)
# Build email message
email_msg = f"""Subject: {subject}
From: {from_addr}
To: {to_addr}
Content-Type: text/plain; charset=UTF-8
{body}
"""
try:
result = subprocess.run(
["msmtp", to_addr],
input=email_msg,
capture_output=True,
text=True,
)
if result.returncode == 0:
print("✓ Daily summary email sent")
else:
print(f"✗ msmtp failed: {result.stderr}")
except FileNotFoundError:
print("✗ msmtp not found - install with: apt install msmtp")
except Exception as e:
print(f"✗ Failed to send daily summary: {e}")
def _notify_slack(self, reports: list[ChangeReport], config: dict): def _notify_slack(self, reports: list[ChangeReport], config: dict):
"""Send Slack notification.""" """Send Slack notification."""
import httpx import httpx