Update filters and add cleanup

2026-01-29 16:24:44 +00:00 · 2026-01-29 16:24:44 +00:00 · c655f2e078
commit c655f2e078
parent fd4254df3e
7 changed files with 4694 additions and 1242 deletions
--- a/config.yaml
+++ b/config.yaml
@ -72,10 +72,6 @@ companies:
    platform: greenhouse
    board_token: automatticcareers

-  - name: Canonical
-    platform: greenhouse
-    board_token: canonical
-
  - name: ClickHouse
    platform: greenhouse
    board_token: clickhouse
--- a/dashboard.py
+++ b/dashboard.py
@ -3,168 +3,193 @@
 Generate a simple text-based HTML dashboard of all tracked jobs.
 """

-import re
 from datetime import datetime
 from pathlib import Path
+from collections import Counter

 from db import Database


-# Regions/locations we care about (case-insensitive matching)
-DESIRED_REGIONS = [
-    "canada", "toronto", "vancouver",
-    "germany", "berlin", "munich",
-    "emea",
-    "americas",  # includes North/South America
-    "north america",
-    "worldwide", "global", "anywhere",
-]
-
-# Locations to explicitly exclude (on-site or remote restricted to these)
-EXCLUDED_LOCATIONS = [
-    # US cities/states (we don't want US-only jobs)
-    "san francisco", "new york", "nyc", "seattle", "austin", "boston",
-    "chicago", "denver", "los angeles", "atlanta", "dallas", "houston",
-    "california", "washington", "texas", "massachusetts", "colorado",
-    "united states", "usa", "u.s.", "us-", "usa-",
+# Location grouping rules: keyword -> (group_id, display_name)
+# Order matters - first match wins
+LOCATION_RULES = [
+    # Canada
+    (["canada", "toronto", "vancouver", "montreal", "ottawa", "calgary", "waterloo"], "canada", "Canada"),
+    # Germany
+    (["germany", "berlin", "munich", "frankfurt", "hamburg"], "germany", "Germany"),
    # UK
-    "london", "united kingdom", "uk", "dublin", "ireland",
-    # Australia/APAC (not EMEA)
-    "sydney", "melbourne", "australia", "singapore", "tokyo", "japan",
-    "india", "bangalore", "bengaluru", "hyderabad", "delhi",
-    "korea", "seoul", "taiwan", "taipei", "china", "beijing", "shenzhen",
-    # Other excluded
-    "israel", "tel aviv", "brazil", "sao paulo", "mexico",
-    "netherlands", "amsterdam", "france", "paris", "spain", "madrid",
-    "portugal", "lisbon", "poland", "warsaw", "italy",
-    "czech", "prague", "serbia", "belgrade", "cyprus", "limassol",
-    "austria", "vienna", "sweden", "stockholm", "denmark", "copenhagen",
-    "switzerland", "romania", "bucharest", "hungary", "greece",
-    "south africa", "indonesia", "jakarta", "malaysia",
+    (["united kingdom", " uk", "uk ", "london", "england", "manchester", "edinburgh"], "uk", "UK"),
+    # Ireland
+    (["ireland", "dublin"], "ireland", "Ireland"),
+    # Netherlands
+    (["netherlands", "amsterdam", "rotterdam"], "netherlands", "Netherlands"),
+    # France
+    (["france", "paris"], "france", "France"),
+    # Spain
+    (["spain", "madrid", "barcelona"], "spain", "Spain"),
+    # Poland
+    (["poland", "warsaw", "krakow", "wroclaw"], "poland", "Poland"),
+    # Sweden
+    (["sweden", "stockholm"], "sweden", "Sweden"),
+    # Switzerland
+    (["switzerland", "zurich", "geneva"], "switzerland", "Switzerland"),
+    # Australia
+    (["australia", "sydney", "melbourne"], "australia", "Australia"),
+    # India
+    (["india", "bangalore", "bengaluru", "hyderabad", "delhi", "mumbai", "pune"], "india", "India"),
+    # Japan
+    (["japan", "tokyo"], "japan", "Japan"),
+    # Singapore
+    (["singapore"], "singapore", "Singapore"),
+    # Israel
+    (["israel", "tel aviv"], "israel", "Israel"),
+    # Brazil
+    (["brazil", "sao paulo"], "brazil", "Brazil"),
+    # US (must be after other countries to avoid false matches)
+    (["united states", "usa", "u.s.", "san francisco", "new york", "nyc", "seattle",
+      "austin", "boston", "chicago", "denver", "los angeles", "atlanta", "dallas",
+      "houston", "california", "washington", "texas", "massachusetts", "colorado",
+      "portland", "miami", "phoenix", "san diego", "san jose", "palo alto",
+      "mountain view", "sunnyvale", "menlo park", "cupertino"], "us", "US"),
+    # Regions
+    (["emea"], "emea", "EMEA"),
+    (["americas", "north america", "latam"], "americas", "Americas"),
+    (["apac", "asia pacific", "asia-pacific"], "apac", "APAC"),
+    (["worldwide", "global", "anywhere", "earth"], "worldwide", "Worldwide"),
 ]


-def is_location_relevant(location: str, remote_type: str) -> bool:
+def extract_location_info(location: str, remote_type: str) -> tuple[list[str], str]:
    """
-    Strict location filter. Only keeps jobs available in Canada, Germany, EMEA, or Worldwide.
-    Filters out US-only jobs, UK jobs, APAC jobs, etc.
+    Extract location tags and short display text from a job's location.
+    Returns (list of tag ids, short display location)
    """
-    if not location:
-        return False  # No location info = probably US-based, filter out
-
-    loc_lower = location.lower()
-
-    # Check if any desired region is mentioned FIRST
-    has_desired = any(region in loc_lower for region in DESIRED_REGIONS)
-
-    # If it has a desired region, keep it (even if it also mentions excluded locations)
-    # e.g., "Remote (United States | Canada)" should be kept because of Canada
-    if has_desired:
-        return True
-
-    # If it just says "Remote" with nothing else, keep it (truly remote)
-    if loc_lower.strip() == "remote":
-        return True
-
-    # Check for excluded locations
-    has_excluded = any(excl in loc_lower for excl in EXCLUDED_LOCATIONS)
-    if has_excluded:
-        return False
-
-    # Check for patterns like "In-Office", "Hybrid", "On-site" without desired region
-    if any(x in loc_lower for x in ["in-office", "hybrid", "on-site", "onsite", "office based"]):
-        return False
-
-    # If we can't determine, filter it out (safer)
-    return False
-
-
-def extract_location_tags(location: str, remote_type: str) -> tuple[list[str], str]:
-    """
-    Extract relevant location tags and a short display location.
-    Returns (list of tag names, short location string)
-    """
-    if not location:
-        return [], ""
-
-    loc_lower = location.lower()
    tags = []
-    short_loc = ""
+    display = ""
+
+    if not location:
+        return tags, display
+
+    loc_lower = location.lower()

    # Check for remote
    is_remote = remote_type == "remote" or "remote" in loc_lower
    if is_remote:
        tags.append("remote")

-    # Check for Canada
-    if any(x in loc_lower for x in ["canada", "toronto", "vancouver"]):
-        tags.append("canada")
-        short_loc = "Canada"
+    # Check against location rules
+    for keywords, tag_id, display_name in LOCATION_RULES:
+        if any(kw in loc_lower for kw in keywords):
+            if tag_id not in tags:
+                tags.append(tag_id)
+            if not display:
+                display = display_name

-    # Check for Germany/Berlin
-    if any(x in loc_lower for x in ["germany", "berlin", "munich"]):
-        tags.append("germany")
-        short_loc = "Germany" if "germany" in loc_lower else "Berlin"
+    # Fallback display
+    if not display:
+        if is_remote:
+            display = "Remote"
+        elif location:
+            display = location[:25] + "..." if len(location) > 25 else location

-    # Check for EMEA
-    if "emea" in loc_lower:
-        tags.append("emea")
-        short_loc = "EMEA"
-
-    # Check for Americas/North America
-    if "americas" in loc_lower or "north america" in loc_lower:
-        tags.append("americas")
-        short_loc = "Americas"
-
-    # Check for Worldwide
-    if any(x in loc_lower for x in ["worldwide", "global", "anywhere"]):
-        tags.append("worldwide")
-        short_loc = "Worldwide"
-
-    # If no specific region found but it's remote
-    if not short_loc and is_remote:
-        short_loc = "Remote"
-
-    return tags, short_loc
+    return tags, display


 def generate_dashboard(output_path: str = "data/dashboard.html"):
    """Generate a static HTML dashboard."""
    db = Database()
    jobs = db.get_all_active_jobs()
-
-    # Get all monitored companies
    all_company_names = db.get_all_companies()

-    # Track total jobs per company (before location filtering)
-    total_per_company = {}
-    for company_name, job in jobs:
-        total_per_company[company_name] = total_per_company.get(company_name, 0) + 1
-
-    # Group by company, filtering out irrelevant remote locations
+    # Process all jobs and collect location data
    companies = {}
-    filtered_count = 0
+    location_counts = Counter()
+
    for company_name, job in jobs:
-        if not is_location_relevant(job.location, job.remote_type):
-            filtered_count += 1
-            continue
+        # Extract location info
+        tags, display = extract_location_info(job.location, job.remote_type)
+
+        # Count locations for filter generation
+        for tag in tags:
+            location_counts[tag] += 1
+
+        # Store processed job data
        if company_name not in companies:
            companies[company_name] = []
-        companies[company_name].append(job)

-    # Ensure all monitored companies are in the dict (even with 0 jobs)
+        companies[company_name].append({
+            "job": job,
+            "tags": tags,
+            "display": display,
+            "search_text": f"{job.title.lower()} {(job.location or '').lower()} {(job.department or '').lower()} {' '.join(tags)}"
+        })
+
+    # Ensure all companies exist (even with 0 jobs)
    for name in all_company_names:
        if name not in companies:
            companies[name] = []
-        if name not in total_per_company:
-            total_per_company[name] = 0

-    total_shown = sum(len(jobs) for jobs in companies.values())
-    total_scraped = sum(total_per_company.values())
-
-    # Sort companies by name
+    total_jobs = sum(len(j) for j in companies.values())
    sorted_companies = sorted(companies.items())

+    # Generate dynamic location filters (only show locations that exist in data)
+    # Order: Remote first, then by count descending
+    location_filters = []
+    if "remote" in location_counts:
+        location_filters.append(("remote", "Remote", location_counts["remote"]))
+
+    # Add other locations sorted by count
+    other_locations = [(tag, count) for tag, count in location_counts.items() if tag != "remote"]
+    other_locations.sort(key=lambda x: -x[1])
+
+    # Map tag_id to display name
+    tag_display = {tag_id: display for keywords, tag_id, display in LOCATION_RULES}
+    tag_display["remote"] = "Remote"
+
+    for tag_id, count in other_locations:
+        display = tag_display.get(tag_id, tag_id.title())
+        location_filters.append((tag_id, display, count))
+
+    # Generate location filter buttons HTML
+    location_buttons = ""
+    for tag_id, display, count in location_filters:
+        location_buttons += f'            <button class="filter-btn" data-filter="{tag_id}" data-category="location">{display} ({count})</button>\n'
+
+    # Generate tag colors dynamically
+    tag_colors = {
+        "remote": ("#1a4a1a", "#4ade80"),
+        "canada": ("#4a1a1a", "#f87171"),
+        "germany": ("#4a4a1a", "#facc15"),
+        "uk": ("#2a1a3a", "#a78bfa"),
+        "us": ("#3a2a1a", "#fb923c"),
+        "emea": ("#1a3a4a", "#60a5fa"),
+        "americas": ("#3a1a4a", "#c084fc"),
+        "worldwide": ("#1a4a3a", "#34d399"),
+        "apac": ("#1a2a4a", "#38bdf8"),
+        "ireland": ("#1a4a2a", "#4ade80"),
+        "netherlands": ("#3a3a1a", "#fbbf24"),
+        "france": ("#2a2a4a", "#818cf8"),
+        "spain": ("#4a2a1a", "#fb7185"),
+        "poland": ("#3a1a2a", "#f472b6"),
+        "sweden": ("#1a3a3a", "#2dd4bf"),
+        "switzerland": ("#4a1a2a", "#fb7185"),
+        "australia": ("#2a3a1a", "#a3e635"),
+        "india": ("#4a3a1a", "#fcd34d"),
+        "japan": ("#4a1a3a", "#e879f9"),
+        "singapore": ("#1a4a4a", "#22d3d1"),
+        "israel": ("#3a2a2a", "#fca5a5"),
+        "brazil": ("#2a4a1a", "#86efac"),
+    }
+
+    # Generate CSS for tags
+    tag_css = ""
+    for tag_id, (bg, fg) in tag_colors.items():
+        tag_css += f"""        .tag-{tag_id} {{
+            background: {bg};
+            color: {fg};
+        }}
+"""
+
    html = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
@ -299,30 +324,7 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
            font-size: 11px;
            margin-left: 5px;
        }}
-        .tag-remote {{
-            background: #1a4a1a;
-            color: #4ade80;
-        }}
-        .tag-canada {{
-            background: #4a1a1a;
-            color: #f87171;
-        }}
-        .tag-berlin {{
-            background: #4a4a1a;
-            color: #facc15;
-        }}
-        .tag-emea {{
-            background: #1a3a4a;
-            color: #60a5fa;
-        }}
-        .tag-americas {{
-            background: #3a1a4a;
-            color: #c084fc;
-        }}
-        .tag-worldwide {{
-            background: #1a4a3a;
-            color: #34d399;
-        }}
+{tag_css}
        .hidden {{
            display: none;
        }}
@ -342,26 +344,32 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
            flex-wrap: wrap;
            gap: 10px;
        }}
-        .toc-links a {{
+        .toc-link {{
            color: var(--accent);
            text-decoration: none;
            font-size: 13px;
        }}
-        .toc-links a:hover {{
+        .toc-link:hover {{
            text-decoration: underline;
        }}
-        .toc-links .empty {{
+        .toc-link.empty {{
            color: var(--muted);
-            cursor: default;
        }}
-        .toc-links .empty:hover {{
-            text-decoration: none;
+        .toc-link.hidden {{
+            display: none;
        }}
-        .filter-buttons {{
+        .filter-section {{
            display: flex;
            flex-wrap: wrap;
            gap: 8px;
            margin-top: 10px;
+            align-items: center;
+        }}
+        .filter-label {{
+            color: var(--muted);
+            font-size: 12px;
+            margin-right: 4px;
+            min-width: 60px;
        }}
        .filter-btn {{
            background: var(--bg);
@ -383,6 +391,13 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
            border-color: var(--accent);
            color: var(--bg);
        }}
+        .clear-btn {{
+            border-color: #666;
+        }}
+        .clear-btn:hover {{
+            border-color: #f87171;
+            color: #f87171;
+        }}
    </style>
 </head>
 <body>
@ -390,47 +405,47 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
        <h1>$ job-board</h1>
        <div class="meta">
            Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
-            {total_shown}/{total_scraped} jobs (location filtered) | Monitoring {len(all_company_names)} companies
+            {total_jobs} jobs | {len(all_company_names)} companies
        </div>
    </header>

    <div class="filters">
-        <input type="text" id="search" placeholder="Filter jobs... (e.g. 'senior engineer', 'remote', 'canada')" autofocus>
-        <div class="filter-buttons">
-            <button class="filter-btn" data-filter="">All</button>
-            <button class="filter-btn" data-filter="engineer">Engineering</button>
-            <button class="filter-btn" data-filter="senior engineer">Senior Eng</button>
-            <button class="filter-btn" data-filter="staff principal">Staff+</button>
-            <button class="filter-btn" data-filter="manager director">Management</button>
-            <button class="filter-btn" data-filter="product">Product</button>
-            <button class="filter-btn" data-filter="design">Design</button>
-            <button class="filter-btn" data-filter="security">Security</button>
-            <button class="filter-btn" data-filter="remote">Remote</button>
-            <button class="filter-btn" data-filter="canada">Canada</button>
-            <button class="filter-btn" data-filter="germany">Germany</button>
-            <button class="filter-btn" data-filter="emea">EMEA</button>
-            <button class="filter-btn" data-filter="americas">Americas</button>
-            <button class="filter-btn" data-filter="worldwide">Worldwide</button>
+        <input type="text" id="search" placeholder="Filter jobs... (press / to focus, Esc to clear)" autofocus>
+        <div class="filter-section">
+            <span class="filter-label">Quick:</span>
+            <button class="filter-btn" data-filter="" data-category="all">All ({total_jobs})</button>
+            <button class="filter-btn clear-btn" data-action="clear">Clear Filters</button>
+        </div>
+        <div class="filter-section">
+            <span class="filter-label">Location:</span>
+{location_buttons}        </div>
+        <div class="filter-section">
+            <span class="filter-label">Role:</span>
+            <button class="filter-btn" data-filter="engineer" data-category="role">Engineering</button>
+            <button class="filter-btn" data-filter="senior" data-category="role">Senior</button>
+            <button class="filter-btn" data-filter="staff principal" data-category="role">Staff+</button>
+            <button class="filter-btn" data-filter="backend" data-category="role">Backend</button>
+            <button class="filter-btn" data-filter="frontend" data-category="role">Frontend</button>
+            <button class="filter-btn" data-filter="infrastructure platform sre" data-category="role">Infra/Platform</button>
+            <button class="filter-btn" data-filter="security" data-category="role">Security</button>
+            <button class="filter-btn" data-filter="manager director" data-category="role">Management</button>
        </div>
        <div class="stats">
-            <span id="visible-count">{total_shown} jobs shown</span>
+            <span id="visible-count">{total_jobs} jobs shown</span>
        </div>
    </div>

    <div class="toc">
        <div class="toc-title">Jump to company:</div>
-        <div class="toc-links">
+        <div class="toc-links" id="toc-links">
 """

-    # Table of contents
+    # Table of contents with data attributes for JS updating
    for company_name, company_jobs in sorted_companies:
-        anchor = company_name.lower().replace(" ", "-")
-        filtered = len(company_jobs)
-        total = total_per_company.get(company_name, 0)
-        if filtered > 0:
-            html += f'            <a href="#{anchor}">{company_name} ({filtered}/{total})</a>\n'
-        else:
-            html += f'            <span class="empty">{company_name} (0/{total})</span>\n'
+        anchor = company_name.lower().replace(" ", "-").replace("'", "")
+        count = len(company_jobs)
+        css_class = "toc-link" if count > 0 else "toc-link empty"
+        html += f'            <a href="#{anchor}" class="{css_class}" data-company="{anchor}" data-total="{count}">{company_name} ({count})</a>\n'

    html += """        </div>
    </div>
@ -438,44 +453,34 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
    <main id="job-list">
 """

-    # Job listings (only for companies with jobs)
+    # Job listings
    for company_name, company_jobs in sorted_companies:
        if not company_jobs:
-            continue  # Skip companies with no jobs after filtering
-        anchor = company_name.lower().replace(" ", "-")
+            continue
+        anchor = company_name.lower().replace(" ", "-").replace("'", "")
+        total = len(company_jobs)
        html += f"""
-        <div class="company" id="{anchor}">
+        <div class="company" id="{anchor}" data-company="{anchor}" data-total="{total}">
            <div class="company-header">
                <span class="company-name">{company_name}</span>
-                <span class="company-count">{len(company_jobs)} positions</span>
+                <span class="company-count" data-total="{total}">{total} positions</span>
            </div>
            <div class="jobs">
 """
-        for job in sorted(company_jobs, key=lambda j: j.title):
-            location = job.location or ""
-            location_lower = location.lower()
-
-            # Extract tags and short location
-            tag_list, short_loc = extract_location_tags(location, job.remote_type)
+        for job_data in sorted(company_jobs, key=lambda j: j["job"].title):
+            job = job_data["job"]
+            tags = job_data["tags"]
+            display = job_data["display"]
+            search_text = job_data["search_text"]

            # Build tag HTML
-            tags = ""
-            if "remote" in tag_list:
-                tags += '<span class="tag tag-remote">remote</span>'
-            if "canada" in tag_list:
-                tags += '<span class="tag tag-canada">canada</span>'
-            if "germany" in tag_list:
-                tags += '<span class="tag tag-berlin">germany</span>'
-            if "emea" in tag_list:
-                tags += '<span class="tag tag-emea">emea</span>'
-            if "americas" in tag_list:
-                tags += '<span class="tag tag-americas">americas</span>'
-            if "worldwide" in tag_list:
-                tags += '<span class="tag tag-worldwide">worldwide</span>'
+            tag_html = ""
+            for tag in tags:
+                tag_html += f'<span class="tag tag-{tag}">{tag}</span>'

-            html += f"""                <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()} {' '.join(tag_list)}">
-                    <span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tags}</span>
-                    <span class="job-location">{short_loc}</span>
+            html += f"""                <div class="job" data-search="{search_text}">
+                    <span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tag_html}</span>
+                    <span class="job-location">{display}</span>
                </div>
 """
        html += """            </div>
@ -488,67 +493,155 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
        const search = document.getElementById('search');
        const jobs = document.querySelectorAll('.job');
        const companies = document.querySelectorAll('.company');
+        const tocLinks = document.querySelectorAll('.toc-link');
        const visibleCount = document.getElementById('visible-count');
        const filterBtns = document.querySelectorAll('.filter-btn');
+        const clearBtn = document.querySelector('.clear-btn');

-        function filterJobs(query) {
-            let visible = 0;
-            const terms = query.toLowerCase().trim().split(/\\s+/).filter(t => t);
+        // Track active filters by category
+        const activeFilters = {
+            location: null,
+            role: null
+        };
+
+        function applyFilters() {
+            let totalVisible = 0;
+            const searchTerms = search.value.toLowerCase().trim().split(/\\s+/).filter(t => t);
+
+            // Build filter terms from active category filters
+            const locationTerms = activeFilters.location ? activeFilters.location.split(/\\s+/) : [];
+            const roleTerms = activeFilters.role ? activeFilters.role.split(/\\s+/) : [];
+
+            const hasFilters = searchTerms.length > 0 || locationTerms.length > 0 || roleTerms.length > 0;
+
+            // Track visible counts per company
+            const companyCounts = {};

            companies.forEach(company => {
+                const companyId = company.dataset.company;
                const companyJobs = company.querySelectorAll('.job');
                let companyVisible = 0;

                companyJobs.forEach(job => {
                    const searchText = job.dataset.search;
-                    // Match if ANY term matches (OR logic for filter buttons)
-                    const matches = terms.length === 0 || terms.some(term => searchText.includes(term));
+
+                    // Match logic: AND between categories, OR within each category
+                    let matches = true;
+
+                    // Search box (OR within terms)
+                    if (searchTerms.length > 0) {
+                        matches = matches && searchTerms.some(term => searchText.includes(term));
+                    }
+
+                    // Location filter (OR within terms)
+                    if (locationTerms.length > 0) {
+                        matches = matches && locationTerms.some(term => searchText.includes(term));
+                    }
+
+                    // Role filter (OR within terms)
+                    if (roleTerms.length > 0) {
+                        matches = matches && roleTerms.some(term => searchText.includes(term));
+                    }
+
                    job.classList.toggle('hidden', !matches);
                    if (matches) {
                        companyVisible++;
-                        visible++;
+                        totalVisible++;
                    }
                });

                company.classList.toggle('hidden', companyVisible === 0);
+                companyCounts[companyId] = companyVisible;
+
+                // Update company header count
+                const countSpan = company.querySelector('.company-count');
+                const total = parseInt(countSpan.dataset.total);
+                if (!hasFilters) {
+                    countSpan.textContent = `${total} positions`;
+                } else {
+                    countSpan.textContent = `${companyVisible}/${total} positions`;
+                }
            });

-            visibleCount.textContent = `${visible} jobs shown`;
+            // Update TOC links - always show all, grey out empty ones
+            tocLinks.forEach(link => {
+                const companyId = link.dataset.company;
+                const total = parseInt(link.dataset.total);
+                const visible = companyCounts[companyId] || 0;
+                const name = link.textContent.replace(/\\s*\\(.*\\)/, '');
+
+                if (!hasFilters) {
+                    link.textContent = `${name} (${total})`;
+                    link.classList.toggle('empty', total === 0);
+                } else {
+                    link.textContent = `${name} (${visible}/${total})`;
+                    link.classList.toggle('empty', visible === 0);
+                }
+                // Always show the link, never hide
+                link.classList.remove('hidden');
+            });
+
+            visibleCount.textContent = `${totalVisible} jobs shown`;
        }

-        search.addEventListener('input', (e) => {
-            // Clear active button when typing
+        function clearAllFilters() {
+            search.value = '';
+            activeFilters.location = null;
+            activeFilters.role = null;
            filterBtns.forEach(btn => btn.classList.remove('active'));
-            filterJobs(e.target.value);
+            applyFilters();
+        }
+
+        search.addEventListener('input', () => {
+            applyFilters();
        });

-        // Filter buttons
        filterBtns.forEach(btn => {
            btn.addEventListener('click', () => {
                const filter = btn.dataset.filter;
-                search.value = filter;
-                filterBtns.forEach(b => b.classList.remove('active'));
+                const category = btn.dataset.category;
+                const action = btn.dataset.action;
+
+                // Handle clear button
+                if (action === 'clear') {
+                    clearAllFilters();
+                    return;
+                }
+
+                // Handle "All" button
+                if (category === 'all') {
+                    clearAllFilters();
+                    return;
+                }
+
+                // Toggle filter in category
+                const categoryBtns = document.querySelectorAll(`.filter-btn[data-category="${category}"]`);
+
+                if (btn.classList.contains('active')) {
+                    // Deselect
+                    btn.classList.remove('active');
+                    activeFilters[category] = null;
+                } else {
+                    // Select (deselect others in same category)
+                    categoryBtns.forEach(b => b.classList.remove('active'));
                    btn.classList.add('active');
-                filterJobs(filter);
+                    activeFilters[category] = filter;
+                }
+
+                applyFilters();
            });
        });

-        // Keyboard shortcut: / to focus search
        document.addEventListener('keydown', (e) => {
            if (e.key === '/' && document.activeElement !== search) {
                e.preventDefault();
                search.focus();
            }
            if (e.key === 'Escape') {
-                search.value = '';
-                filterBtns.forEach(b => b.classList.remove('active'));
-                filterJobs('');
+                clearAllFilters();
                search.blur();
            }
        });
-
-        // Set "All" as active by default
-        filterBtns[0].classList.add('active');
    </script>
 </body>
 </html>
--- a/data/dashboard.html
+++ b/data/dashboard.html
--- a/db.py
+++ b/db.py
@ -247,3 +247,26 @@ class Database:
                "SELECT name FROM companies WHERE active = TRUE ORDER BY name"
            )
            return [row["name"] for row in cursor.fetchall()]
+
+    def cleanup_removed_companies(self, active_company_names: list[str]) -> list[str]:
+        """
+        Remove companies (and their jobs) that are no longer in the config.
+        Returns list of removed company names.
+        """
+        with self._get_conn() as conn:
+            # Get companies in DB but not in config
+            placeholders = ",".join("?" * len(active_company_names))
+            cursor = conn.execute(
+                f"SELECT id, name FROM companies WHERE name NOT IN ({placeholders})",
+                active_company_names
+            )
+            removed = []
+            for row in cursor.fetchall():
+                company_id = row["id"]
+                company_name = row["name"]
+                # Delete jobs first (foreign key)
+                conn.execute("DELETE FROM jobs WHERE company_id = ?", (company_id,))
+                # Delete company
+                conn.execute("DELETE FROM companies WHERE id = ?", (company_id,))
+                removed.append(company_name)
+            return removed
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -20,6 +20,7 @@ services:
      - /home/gruberb/.msmtprc:/root/.msmtprc:ro
    environment:
      - TZ=America/Toronto
+      - PYTHONUNBUFFERED=1
    command: ["python", "main.py", "--schedule"]
    restart: unless-stopped
    logging:
--- a/main.py
+++ b/main.py
@ -145,6 +145,13 @@ def run_scraper(config: dict):
    notifier = Notifier(config.get("notifications", {}))

    companies = config.get("companies", [])
+
+    # Cleanup companies no longer in config
+    active_names = [c["name"] for c in companies]
+    removed = db.cleanup_removed_companies(active_names)
+    if removed:
+        print(f"\n🧹 Removed {len(removed)} companies no longer in config: {', '.join(removed)}")
+
    print(f"\nMonitoring {len(companies)} companies...")

    reports = []
--- a/notify.py
+++ b/notify.py
@ -28,24 +28,23 @@ class Notifier:

        if not reports_with_changes:
            print("\n✓ No changes detected across all companies.")
-            return
-
-        # Console output (always)
+        else:
+            # Console output for changes
            self._notify_console(reports_with_changes)

-        # Email (if configured)
+        # Email (if configured) - only sends when there are changes
        email_config = self.config.get("email")
-        if email_config:
+        if email_config and reports_with_changes:
            self._notify_email(reports_with_changes, email_config)

-        # msmtp (if configured - uses system msmtp config)
+        # msmtp (if configured - sends daily summary always)
        msmtp_config = self.config.get("msmtp")
        if msmtp_config:
-            self._notify_msmtp(reports_with_changes, msmtp_config)
+            self._notify_msmtp_daily_summary(reports, msmtp_config)

-        # Slack (if configured)
+        # Slack (if configured) - only sends when there are changes
        slack_config = self.config.get("slack")
-        if slack_config:
+        if slack_config and reports_with_changes:
            self._notify_slack(reports_with_changes, slack_config)

    def _notify_console(self, reports: list[ChangeReport]):
@ -180,6 +179,95 @@ Content-Type: text/plain; charset=UTF-8
        except Exception as e:
            print(f"✗ Failed to send msmtp notification: {e}")

+    def _notify_msmtp_daily_summary(self, reports: list[ChangeReport], config: dict):
+        """Send daily summary email via system msmtp (always sends)."""
+        import subprocess
+        from datetime import datetime
+
+        to_addr = config.get("to_addr", "me@bastiangruber.ca")
+        from_addr = config.get("from_addr", "admin@novanexus.ca")
+
+        # Calculate totals
+        total_companies = len([r for r in reports if r.total_active > 0])
+        total_jobs = sum(r.total_active for r in reports)
+        total_new = sum(len(r.new_jobs) for r in reports)
+        total_removed = sum(len(r.removed_jobs) for r in reports)
+
+        # Build subject line
+        if total_new or total_removed:
+            changes = []
+            if total_new:
+                changes.append(f"+{total_new}")
+            if total_removed:
+                changes.append(f"-{total_removed}")
+            subject = f"Job Board: {', '.join(changes)} | {total_jobs} jobs"
+        else:
+            subject = f"Job Board: No changes | {total_jobs} jobs"
+
+        # Build plain text body
+        body_lines = [
+            "JOB BOARD DAILY SUMMARY",
+            f"{datetime.now().strftime('%Y-%m-%d %H:%M')}",
+            "",
+            "OVERVIEW",
+            f"  Companies with jobs: {total_companies}",
+            f"  Total jobs tracked:  {total_jobs}",
+            "",
+        ]
+
+        # Changes section
+        reports_with_changes = [r for r in reports if r.new_jobs or r.removed_jobs]
+
+        if reports_with_changes:
+            body_lines.append(f"CHANGES: +{total_new} new, -{total_removed} removed")
+            body_lines.append("-" * 40)
+
+            for report in reports_with_changes:
+                if report.new_jobs:
+                    for job in report.new_jobs:
+                        location_str = f" [{job.location}]" if job.location else ""
+                        remote_str = " (Remote)" if job.remote_type == "remote" else ""
+                        body_lines.append(f"  + {report.company_name}: {job.title}{location_str}{remote_str}")
+
+                if report.removed_jobs:
+                    for job in report.removed_jobs:
+                        body_lines.append(f"  - {report.company_name}: {job.title}")
+
+            body_lines.append("")
+        else:
+            body_lines.append("CHANGES: No changes detected")
+            body_lines.append("")
+
+        body_lines.append("---")
+        body_lines.append("https://jobs.novanexus.ca")
+
+        body = "\n".join(body_lines)
+
+        # Build email message
+        email_msg = f"""Subject: {subject}
+From: {from_addr}
+To: {to_addr}
+Content-Type: text/plain; charset=UTF-8
+
+{body}
+"""
+
+        try:
+            result = subprocess.run(
+                ["msmtp", to_addr],
+                input=email_msg,
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode == 0:
+                print("✓ Daily summary email sent")
+            else:
+                print(f"✗ msmtp failed: {result.stderr}")
+        except FileNotFoundError:
+            print("✗ msmtp not found - install with: apt install msmtp")
+        except Exception as e:
+            print(f"✗ Failed to send daily summary: {e}")
+
    def _notify_slack(self, reports: list[ChangeReport], config: dict):
        """Send Slack notification."""
        import httpx