job-scraper/dashboard.py

659 lines
22 KiB
Python

#!/usr/bin/env python3
"""
Generate a simple text-based HTML dashboard of all tracked jobs.
"""
from datetime import datetime
from pathlib import Path
from collections import Counter
from db import Database
# Location grouping rules: keyword -> (group_id, display_name)
# Order matters - first match wins
LOCATION_RULES = [
# Canada
(["canada", "toronto", "vancouver", "montreal", "ottawa", "calgary", "waterloo"], "canada", "Canada"),
# Germany
(["germany", "berlin", "munich", "frankfurt", "hamburg"], "germany", "Germany"),
# UK
(["united kingdom", " uk", "uk ", "london", "england", "manchester", "edinburgh"], "uk", "UK"),
# Ireland
(["ireland", "dublin"], "ireland", "Ireland"),
# Netherlands
(["netherlands", "amsterdam", "rotterdam"], "netherlands", "Netherlands"),
# France
(["france", "paris"], "france", "France"),
# Spain
(["spain", "madrid", "barcelona"], "spain", "Spain"),
# Poland
(["poland", "warsaw", "krakow", "wroclaw"], "poland", "Poland"),
# Sweden
(["sweden", "stockholm"], "sweden", "Sweden"),
# Switzerland
(["switzerland", "zurich", "geneva"], "switzerland", "Switzerland"),
# Australia
(["australia", "sydney", "melbourne"], "australia", "Australia"),
# India
(["india", "bangalore", "bengaluru", "hyderabad", "delhi", "mumbai", "pune"], "india", "India"),
# Japan
(["japan", "tokyo"], "japan", "Japan"),
# Singapore
(["singapore"], "singapore", "Singapore"),
# Israel
(["israel", "tel aviv"], "israel", "Israel"),
# Brazil
(["brazil", "sao paulo"], "brazil", "Brazil"),
# US (must be after other countries to avoid false matches)
(["united states", "usa", "u.s.", "san francisco", "new york", "nyc", "seattle",
"austin", "boston", "chicago", "denver", "los angeles", "atlanta", "dallas",
"houston", "california", "washington", "texas", "massachusetts", "colorado",
"portland", "miami", "phoenix", "san diego", "san jose", "palo alto",
"mountain view", "sunnyvale", "menlo park", "cupertino"], "us", "US"),
# Regions
(["emea"], "emea", "EMEA"),
(["americas", "north america", "latam"], "americas", "Americas"),
(["apac", "asia pacific", "asia-pacific"], "apac", "APAC"),
(["worldwide", "global", "anywhere", "earth"], "worldwide", "Worldwide"),
]
def extract_location_info(location: str, remote_type: str) -> tuple[list[str], str]:
"""
Extract location tags and short display text from a job's location.
Returns (list of tag ids, short display location)
"""
tags = []
display = ""
if not location:
return tags, display
loc_lower = location.lower()
# Check for remote
is_remote = remote_type == "remote" or "remote" in loc_lower
if is_remote:
tags.append("remote")
# Check against location rules
for keywords, tag_id, display_name in LOCATION_RULES:
if any(kw in loc_lower for kw in keywords):
if tag_id not in tags:
tags.append(tag_id)
if not display:
display = display_name
# Fallback display
if not display:
if is_remote:
display = "Remote"
elif location:
display = location[:25] + "..." if len(location) > 25 else location
return tags, display
def generate_dashboard(output_path: str = "data/dashboard.html"):
"""Generate a static HTML dashboard."""
db = Database()
jobs = db.get_all_active_jobs()
all_company_names = db.get_all_companies()
# Process all jobs and collect location data
companies = {}
location_counts = Counter()
for company_name, job in jobs:
# Extract location info
tags, display = extract_location_info(job.location, job.remote_type)
# Count locations for filter generation
for tag in tags:
location_counts[tag] += 1
# Store processed job data
if company_name not in companies:
companies[company_name] = []
companies[company_name].append({
"job": job,
"tags": tags,
"display": display,
"search_text": f"{job.title.lower()} {(job.location or '').lower()} {(job.department or '').lower()} {' '.join(tags)}"
})
# Ensure all companies exist (even with 0 jobs)
for name in all_company_names:
if name not in companies:
companies[name] = []
total_jobs = sum(len(j) for j in companies.values())
sorted_companies = sorted(companies.items())
# Generate dynamic location filters (only show locations that exist in data)
# Order: Remote first, then by count descending
location_filters = []
if "remote" in location_counts:
location_filters.append(("remote", "Remote", location_counts["remote"]))
# Add other locations sorted by count
other_locations = [(tag, count) for tag, count in location_counts.items() if tag != "remote"]
other_locations.sort(key=lambda x: -x[1])
# Map tag_id to display name
tag_display = {tag_id: display for keywords, tag_id, display in LOCATION_RULES}
tag_display["remote"] = "Remote"
for tag_id, count in other_locations:
display = tag_display.get(tag_id, tag_id.title())
location_filters.append((tag_id, display, count))
# Generate location filter buttons HTML
location_buttons = ""
for tag_id, display, count in location_filters:
location_buttons += f' <button class="filter-btn" data-filter="{tag_id}" data-category="location">{display} ({count})</button>\n'
# Generate tag colors dynamically
tag_colors = {
"remote": ("#1a4a1a", "#4ade80"),
"canada": ("#4a1a1a", "#f87171"),
"germany": ("#4a4a1a", "#facc15"),
"uk": ("#2a1a3a", "#a78bfa"),
"us": ("#3a2a1a", "#fb923c"),
"emea": ("#1a3a4a", "#60a5fa"),
"americas": ("#3a1a4a", "#c084fc"),
"worldwide": ("#1a4a3a", "#34d399"),
"apac": ("#1a2a4a", "#38bdf8"),
"ireland": ("#1a4a2a", "#4ade80"),
"netherlands": ("#3a3a1a", "#fbbf24"),
"france": ("#2a2a4a", "#818cf8"),
"spain": ("#4a2a1a", "#fb7185"),
"poland": ("#3a1a2a", "#f472b6"),
"sweden": ("#1a3a3a", "#2dd4bf"),
"switzerland": ("#4a1a2a", "#fb7185"),
"australia": ("#2a3a1a", "#a3e635"),
"india": ("#4a3a1a", "#fcd34d"),
"japan": ("#4a1a3a", "#e879f9"),
"singapore": ("#1a4a4a", "#22d3d1"),
"israel": ("#3a2a2a", "#fca5a5"),
"brazil": ("#2a4a1a", "#86efac"),
}
# Generate CSS for tags
tag_css = ""
for tag_id, (bg, fg) in tag_colors.items():
tag_css += f""" .tag-{tag_id} {{
background: {bg};
color: {fg};
}}
"""
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Job Board</title>
<style>
:root {{
--bg: #1a1a1a;
--fg: #e0e0e0;
--accent: #4a9eff;
--muted: #888;
--border: #333;
--highlight: #2a2a2a;
}}
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
body {{
font-family: "SF Mono", "Monaco", "Inconsolata", "Fira Code", monospace;
font-size: 14px;
line-height: 1.6;
background: var(--bg);
color: var(--fg);
padding: 20px;
max-width: 1200px;
margin: 0 auto;
}}
header {{
border-bottom: 1px solid var(--border);
padding-bottom: 15px;
margin-bottom: 20px;
}}
h1 {{
font-size: 18px;
font-weight: normal;
color: var(--accent);
}}
.meta {{
color: var(--muted);
font-size: 12px;
margin-top: 5px;
}}
.filters {{
margin: 15px 0;
padding: 10px;
background: var(--highlight);
border-radius: 4px;
}}
.filters input {{
background: var(--bg);
border: 1px solid var(--border);
color: var(--fg);
padding: 8px 12px;
width: 100%;
max-width: 400px;
font-family: inherit;
font-size: 14px;
border-radius: 4px;
}}
.filters input:focus {{
outline: none;
border-color: var(--accent);
}}
.stats {{
display: flex;
gap: 20px;
margin: 10px 0;
font-size: 12px;
color: var(--muted);
}}
.company {{
margin-bottom: 25px;
}}
.company-header {{
display: flex;
align-items: baseline;
gap: 10px;
padding: 8px 0;
border-bottom: 1px solid var(--border);
cursor: pointer;
}}
.company-header:hover {{
color: var(--accent);
}}
.company-name {{
font-weight: bold;
color: var(--accent);
}}
.company-count {{
color: var(--muted);
font-size: 12px;
}}
.jobs {{
margin-left: 0;
}}
.job {{
padding: 6px 0;
border-bottom: 1px solid var(--border);
display: flex;
justify-content: space-between;
gap: 20px;
align-items: baseline;
}}
.job:last-child {{
border-bottom: none;
}}
.job:hover {{
background: var(--highlight);
}}
.job-title {{
overflow: hidden;
text-overflow: ellipsis;
}}
.job-title a {{
color: var(--fg);
text-decoration: none;
}}
.job-title a:hover {{
color: var(--accent);
text-decoration: underline;
}}
.job-location {{
color: var(--muted);
font-size: 12px;
text-align: right;
white-space: nowrap;
flex-shrink: 0;
}}
.tag {{
display: inline-block;
padding: 2px 6px;
border-radius: 3px;
font-size: 11px;
margin-left: 5px;
}}
{tag_css}
.hidden {{
display: none;
}}
.toc {{
margin: 20px 0;
padding: 15px;
background: var(--highlight);
border-radius: 4px;
}}
.toc-title {{
font-size: 12px;
color: var(--muted);
margin-bottom: 10px;
}}
.toc-links {{
display: flex;
flex-wrap: wrap;
gap: 10px;
}}
.toc-link {{
color: var(--accent);
text-decoration: none;
font-size: 13px;
}}
.toc-link:hover {{
text-decoration: underline;
}}
.toc-link.empty {{
color: var(--muted);
}}
.toc-link.hidden {{
display: none;
}}
.filter-section {{
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 10px;
align-items: center;
}}
.filter-label {{
color: var(--muted);
font-size: 12px;
margin-right: 4px;
min-width: 60px;
}}
.filter-btn {{
background: var(--bg);
border: 1px solid var(--border);
color: var(--muted);
padding: 4px 12px;
font-family: inherit;
font-size: 12px;
border-radius: 4px;
cursor: pointer;
transition: all 0.15s;
}}
.filter-btn:hover {{
border-color: var(--accent);
color: var(--fg);
}}
.filter-btn.active {{
background: var(--accent);
border-color: var(--accent);
color: var(--bg);
}}
.clear-btn {{
border-color: #666;
}}
.clear-btn:hover {{
border-color: #f87171;
color: #f87171;
}}
</style>
</head>
<body>
<header>
<h1>$ job-board</h1>
<div class="meta">
Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
{total_jobs} jobs | {len(all_company_names)} companies
</div>
</header>
<div class="filters">
<input type="text" id="search" placeholder="Filter jobs... (press / to focus, Esc to clear)" autofocus>
<div class="filter-section">
<span class="filter-label">Quick:</span>
<button class="filter-btn" data-filter="" data-category="all">All ({total_jobs})</button>
<button class="filter-btn clear-btn" data-action="clear">Clear Filters</button>
</div>
<div class="filter-section">
<span class="filter-label">Location:</span>
{location_buttons} </div>
<div class="filter-section">
<span class="filter-label">Role:</span>
<button class="filter-btn" data-filter="engineer" data-category="role">Engineering</button>
<button class="filter-btn" data-filter="senior" data-category="role">Senior</button>
<button class="filter-btn" data-filter="staff principal" data-category="role">Staff+</button>
<button class="filter-btn" data-filter="backend" data-category="role">Backend</button>
<button class="filter-btn" data-filter="frontend" data-category="role">Frontend</button>
<button class="filter-btn" data-filter="infrastructure platform sre" data-category="role">Infra/Platform</button>
<button class="filter-btn" data-filter="security" data-category="role">Security</button>
<button class="filter-btn" data-filter="manager director" data-category="role">Management</button>
</div>
<div class="stats">
<span id="visible-count">{total_jobs} jobs shown</span>
</div>
</div>
<div class="toc">
<div class="toc-title">Jump to company:</div>
<div class="toc-links" id="toc-links">
"""
# Table of contents with data attributes for JS updating
for company_name, company_jobs in sorted_companies:
anchor = company_name.lower().replace(" ", "-").replace("'", "")
count = len(company_jobs)
css_class = "toc-link" if count > 0 else "toc-link empty"
html += f' <a href="#{anchor}" class="{css_class}" data-company="{anchor}" data-total="{count}">{company_name} ({count})</a>\n'
html += """ </div>
</div>
<main id="job-list">
"""
# Job listings
for company_name, company_jobs in sorted_companies:
if not company_jobs:
continue
anchor = company_name.lower().replace(" ", "-").replace("'", "")
total = len(company_jobs)
html += f"""
<div class="company" id="{anchor}" data-company="{anchor}" data-total="{total}">
<div class="company-header">
<span class="company-name">{company_name}</span>
<span class="company-count" data-total="{total}">{total} positions</span>
</div>
<div class="jobs">
"""
for job_data in sorted(company_jobs, key=lambda j: j["job"].title):
job = job_data["job"]
tags = job_data["tags"]
display = job_data["display"]
search_text = job_data["search_text"]
# Build tag HTML
tag_html = ""
for tag in tags:
tag_html += f'<span class="tag tag-{tag}">{tag}</span>'
html += f""" <div class="job" data-search="{search_text}">
<span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tag_html}</span>
<span class="job-location">{display}</span>
</div>
"""
html += """ </div>
</div>
"""
html += """ </main>
<script>
const search = document.getElementById('search');
const jobs = document.querySelectorAll('.job');
const companies = document.querySelectorAll('.company');
const tocLinks = document.querySelectorAll('.toc-link');
const visibleCount = document.getElementById('visible-count');
const filterBtns = document.querySelectorAll('.filter-btn');
const clearBtn = document.querySelector('.clear-btn');
// Track active filters by category
const activeFilters = {
location: null,
role: null
};
function applyFilters() {
let totalVisible = 0;
const searchTerms = search.value.toLowerCase().trim().split(/\\s+/).filter(t => t);
// Build filter terms from active category filters
const locationTerms = activeFilters.location ? activeFilters.location.split(/\\s+/) : [];
const roleTerms = activeFilters.role ? activeFilters.role.split(/\\s+/) : [];
const hasFilters = searchTerms.length > 0 || locationTerms.length > 0 || roleTerms.length > 0;
// Track visible counts per company
const companyCounts = {};
companies.forEach(company => {
const companyId = company.dataset.company;
const companyJobs = company.querySelectorAll('.job');
let companyVisible = 0;
companyJobs.forEach(job => {
const searchText = job.dataset.search;
// Match logic: AND between categories, OR within each category
let matches = true;
// Search box (OR within terms)
if (searchTerms.length > 0) {
matches = matches && searchTerms.some(term => searchText.includes(term));
}
// Location filter (OR within terms)
if (locationTerms.length > 0) {
matches = matches && locationTerms.some(term => searchText.includes(term));
}
// Role filter (OR within terms)
if (roleTerms.length > 0) {
matches = matches && roleTerms.some(term => searchText.includes(term));
}
job.classList.toggle('hidden', !matches);
if (matches) {
companyVisible++;
totalVisible++;
}
});
company.classList.toggle('hidden', companyVisible === 0);
companyCounts[companyId] = companyVisible;
// Update company header count
const countSpan = company.querySelector('.company-count');
const total = parseInt(countSpan.dataset.total);
if (!hasFilters) {
countSpan.textContent = `${total} positions`;
} else {
countSpan.textContent = `${companyVisible}/${total} positions`;
}
});
// Update TOC links - always show all, grey out empty ones
tocLinks.forEach(link => {
const companyId = link.dataset.company;
const total = parseInt(link.dataset.total);
const visible = companyCounts[companyId] || 0;
const name = link.textContent.replace(/\\s*\\(.*\\)/, '');
if (!hasFilters) {
link.textContent = `${name} (${total})`;
link.classList.toggle('empty', total === 0);
} else {
link.textContent = `${name} (${visible}/${total})`;
link.classList.toggle('empty', visible === 0);
}
// Always show the link, never hide
link.classList.remove('hidden');
});
visibleCount.textContent = `${totalVisible} jobs shown`;
}
function clearAllFilters() {
search.value = '';
activeFilters.location = null;
activeFilters.role = null;
filterBtns.forEach(btn => btn.classList.remove('active'));
applyFilters();
}
search.addEventListener('input', () => {
applyFilters();
});
filterBtns.forEach(btn => {
btn.addEventListener('click', () => {
const filter = btn.dataset.filter;
const category = btn.dataset.category;
const action = btn.dataset.action;
// Handle clear button
if (action === 'clear') {
clearAllFilters();
return;
}
// Handle "All" button
if (category === 'all') {
clearAllFilters();
return;
}
// Toggle filter in category
const categoryBtns = document.querySelectorAll(`.filter-btn[data-category="${category}"]`);
if (btn.classList.contains('active')) {
// Deselect
btn.classList.remove('active');
activeFilters[category] = null;
} else {
// Select (deselect others in same category)
categoryBtns.forEach(b => b.classList.remove('active'));
btn.classList.add('active');
activeFilters[category] = filter;
}
applyFilters();
});
});
document.addEventListener('keydown', (e) => {
if (e.key === '/' && document.activeElement !== search) {
e.preventDefault();
search.focus();
}
if (e.key === 'Escape') {
clearAllFilters();
search.blur();
}
});
</script>
</body>
</html>
"""
# Write the file
output = Path(output_path)
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(html)
print(f"Dashboard generated: {output_path}")
return output_path
if __name__ == "__main__":
generate_dashboard()