Update filters and add cleanup
This commit is contained in:
parent
fd4254df3e
commit
c655f2e078
7 changed files with 4694 additions and 1242 deletions
|
|
@ -72,10 +72,6 @@ companies:
|
||||||
platform: greenhouse
|
platform: greenhouse
|
||||||
board_token: automatticcareers
|
board_token: automatticcareers
|
||||||
|
|
||||||
- name: Canonical
|
|
||||||
platform: greenhouse
|
|
||||||
board_token: canonical
|
|
||||||
|
|
||||||
- name: ClickHouse
|
- name: ClickHouse
|
||||||
platform: greenhouse
|
platform: greenhouse
|
||||||
board_token: clickhouse
|
board_token: clickhouse
|
||||||
|
|
|
||||||
551
dashboard.py
551
dashboard.py
|
|
@ -3,168 +3,193 @@
|
||||||
Generate a simple text-based HTML dashboard of all tracked jobs.
|
Generate a simple text-based HTML dashboard of all tracked jobs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
from db import Database
|
from db import Database
|
||||||
|
|
||||||
|
|
||||||
# Regions/locations we care about (case-insensitive matching)
|
# Location grouping rules: keyword -> (group_id, display_name)
|
||||||
DESIRED_REGIONS = [
|
# Order matters - first match wins
|
||||||
"canada", "toronto", "vancouver",
|
LOCATION_RULES = [
|
||||||
"germany", "berlin", "munich",
|
# Canada
|
||||||
"emea",
|
(["canada", "toronto", "vancouver", "montreal", "ottawa", "calgary", "waterloo"], "canada", "Canada"),
|
||||||
"americas", # includes North/South America
|
# Germany
|
||||||
"north america",
|
(["germany", "berlin", "munich", "frankfurt", "hamburg"], "germany", "Germany"),
|
||||||
"worldwide", "global", "anywhere",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Locations to explicitly exclude (on-site or remote restricted to these)
|
|
||||||
EXCLUDED_LOCATIONS = [
|
|
||||||
# US cities/states (we don't want US-only jobs)
|
|
||||||
"san francisco", "new york", "nyc", "seattle", "austin", "boston",
|
|
||||||
"chicago", "denver", "los angeles", "atlanta", "dallas", "houston",
|
|
||||||
"california", "washington", "texas", "massachusetts", "colorado",
|
|
||||||
"united states", "usa", "u.s.", "us-", "usa-",
|
|
||||||
# UK
|
# UK
|
||||||
"london", "united kingdom", "uk", "dublin", "ireland",
|
(["united kingdom", " uk", "uk ", "london", "england", "manchester", "edinburgh"], "uk", "UK"),
|
||||||
# Australia/APAC (not EMEA)
|
# Ireland
|
||||||
"sydney", "melbourne", "australia", "singapore", "tokyo", "japan",
|
(["ireland", "dublin"], "ireland", "Ireland"),
|
||||||
"india", "bangalore", "bengaluru", "hyderabad", "delhi",
|
# Netherlands
|
||||||
"korea", "seoul", "taiwan", "taipei", "china", "beijing", "shenzhen",
|
(["netherlands", "amsterdam", "rotterdam"], "netherlands", "Netherlands"),
|
||||||
# Other excluded
|
# France
|
||||||
"israel", "tel aviv", "brazil", "sao paulo", "mexico",
|
(["france", "paris"], "france", "France"),
|
||||||
"netherlands", "amsterdam", "france", "paris", "spain", "madrid",
|
# Spain
|
||||||
"portugal", "lisbon", "poland", "warsaw", "italy",
|
(["spain", "madrid", "barcelona"], "spain", "Spain"),
|
||||||
"czech", "prague", "serbia", "belgrade", "cyprus", "limassol",
|
# Poland
|
||||||
"austria", "vienna", "sweden", "stockholm", "denmark", "copenhagen",
|
(["poland", "warsaw", "krakow", "wroclaw"], "poland", "Poland"),
|
||||||
"switzerland", "romania", "bucharest", "hungary", "greece",
|
# Sweden
|
||||||
"south africa", "indonesia", "jakarta", "malaysia",
|
(["sweden", "stockholm"], "sweden", "Sweden"),
|
||||||
|
# Switzerland
|
||||||
|
(["switzerland", "zurich", "geneva"], "switzerland", "Switzerland"),
|
||||||
|
# Australia
|
||||||
|
(["australia", "sydney", "melbourne"], "australia", "Australia"),
|
||||||
|
# India
|
||||||
|
(["india", "bangalore", "bengaluru", "hyderabad", "delhi", "mumbai", "pune"], "india", "India"),
|
||||||
|
# Japan
|
||||||
|
(["japan", "tokyo"], "japan", "Japan"),
|
||||||
|
# Singapore
|
||||||
|
(["singapore"], "singapore", "Singapore"),
|
||||||
|
# Israel
|
||||||
|
(["israel", "tel aviv"], "israel", "Israel"),
|
||||||
|
# Brazil
|
||||||
|
(["brazil", "sao paulo"], "brazil", "Brazil"),
|
||||||
|
# US (must be after other countries to avoid false matches)
|
||||||
|
(["united states", "usa", "u.s.", "san francisco", "new york", "nyc", "seattle",
|
||||||
|
"austin", "boston", "chicago", "denver", "los angeles", "atlanta", "dallas",
|
||||||
|
"houston", "california", "washington", "texas", "massachusetts", "colorado",
|
||||||
|
"portland", "miami", "phoenix", "san diego", "san jose", "palo alto",
|
||||||
|
"mountain view", "sunnyvale", "menlo park", "cupertino"], "us", "US"),
|
||||||
|
# Regions
|
||||||
|
(["emea"], "emea", "EMEA"),
|
||||||
|
(["americas", "north america", "latam"], "americas", "Americas"),
|
||||||
|
(["apac", "asia pacific", "asia-pacific"], "apac", "APAC"),
|
||||||
|
(["worldwide", "global", "anywhere", "earth"], "worldwide", "Worldwide"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def is_location_relevant(location: str, remote_type: str) -> bool:
|
def extract_location_info(location: str, remote_type: str) -> tuple[list[str], str]:
|
||||||
"""
|
"""
|
||||||
Strict location filter. Only keeps jobs available in Canada, Germany, EMEA, or Worldwide.
|
Extract location tags and short display text from a job's location.
|
||||||
Filters out US-only jobs, UK jobs, APAC jobs, etc.
|
Returns (list of tag ids, short display location)
|
||||||
"""
|
"""
|
||||||
if not location:
|
|
||||||
return False # No location info = probably US-based, filter out
|
|
||||||
|
|
||||||
loc_lower = location.lower()
|
|
||||||
|
|
||||||
# Check if any desired region is mentioned FIRST
|
|
||||||
has_desired = any(region in loc_lower for region in DESIRED_REGIONS)
|
|
||||||
|
|
||||||
# If it has a desired region, keep it (even if it also mentions excluded locations)
|
|
||||||
# e.g., "Remote (United States | Canada)" should be kept because of Canada
|
|
||||||
if has_desired:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# If it just says "Remote" with nothing else, keep it (truly remote)
|
|
||||||
if loc_lower.strip() == "remote":
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check for excluded locations
|
|
||||||
has_excluded = any(excl in loc_lower for excl in EXCLUDED_LOCATIONS)
|
|
||||||
if has_excluded:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check for patterns like "In-Office", "Hybrid", "On-site" without desired region
|
|
||||||
if any(x in loc_lower for x in ["in-office", "hybrid", "on-site", "onsite", "office based"]):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# If we can't determine, filter it out (safer)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def extract_location_tags(location: str, remote_type: str) -> tuple[list[str], str]:
|
|
||||||
"""
|
|
||||||
Extract relevant location tags and a short display location.
|
|
||||||
Returns (list of tag names, short location string)
|
|
||||||
"""
|
|
||||||
if not location:
|
|
||||||
return [], ""
|
|
||||||
|
|
||||||
loc_lower = location.lower()
|
|
||||||
tags = []
|
tags = []
|
||||||
short_loc = ""
|
display = ""
|
||||||
|
|
||||||
|
if not location:
|
||||||
|
return tags, display
|
||||||
|
|
||||||
|
loc_lower = location.lower()
|
||||||
|
|
||||||
# Check for remote
|
# Check for remote
|
||||||
is_remote = remote_type == "remote" or "remote" in loc_lower
|
is_remote = remote_type == "remote" or "remote" in loc_lower
|
||||||
if is_remote:
|
if is_remote:
|
||||||
tags.append("remote")
|
tags.append("remote")
|
||||||
|
|
||||||
# Check for Canada
|
# Check against location rules
|
||||||
if any(x in loc_lower for x in ["canada", "toronto", "vancouver"]):
|
for keywords, tag_id, display_name in LOCATION_RULES:
|
||||||
tags.append("canada")
|
if any(kw in loc_lower for kw in keywords):
|
||||||
short_loc = "Canada"
|
if tag_id not in tags:
|
||||||
|
tags.append(tag_id)
|
||||||
|
if not display:
|
||||||
|
display = display_name
|
||||||
|
|
||||||
# Check for Germany/Berlin
|
# Fallback display
|
||||||
if any(x in loc_lower for x in ["germany", "berlin", "munich"]):
|
if not display:
|
||||||
tags.append("germany")
|
if is_remote:
|
||||||
short_loc = "Germany" if "germany" in loc_lower else "Berlin"
|
display = "Remote"
|
||||||
|
elif location:
|
||||||
|
display = location[:25] + "..." if len(location) > 25 else location
|
||||||
|
|
||||||
# Check for EMEA
|
return tags, display
|
||||||
if "emea" in loc_lower:
|
|
||||||
tags.append("emea")
|
|
||||||
short_loc = "EMEA"
|
|
||||||
|
|
||||||
# Check for Americas/North America
|
|
||||||
if "americas" in loc_lower or "north america" in loc_lower:
|
|
||||||
tags.append("americas")
|
|
||||||
short_loc = "Americas"
|
|
||||||
|
|
||||||
# Check for Worldwide
|
|
||||||
if any(x in loc_lower for x in ["worldwide", "global", "anywhere"]):
|
|
||||||
tags.append("worldwide")
|
|
||||||
short_loc = "Worldwide"
|
|
||||||
|
|
||||||
# If no specific region found but it's remote
|
|
||||||
if not short_loc and is_remote:
|
|
||||||
short_loc = "Remote"
|
|
||||||
|
|
||||||
return tags, short_loc
|
|
||||||
|
|
||||||
|
|
||||||
def generate_dashboard(output_path: str = "data/dashboard.html"):
|
def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
"""Generate a static HTML dashboard."""
|
"""Generate a static HTML dashboard."""
|
||||||
db = Database()
|
db = Database()
|
||||||
jobs = db.get_all_active_jobs()
|
jobs = db.get_all_active_jobs()
|
||||||
|
|
||||||
# Get all monitored companies
|
|
||||||
all_company_names = db.get_all_companies()
|
all_company_names = db.get_all_companies()
|
||||||
|
|
||||||
# Track total jobs per company (before location filtering)
|
# Process all jobs and collect location data
|
||||||
total_per_company = {}
|
|
||||||
for company_name, job in jobs:
|
|
||||||
total_per_company[company_name] = total_per_company.get(company_name, 0) + 1
|
|
||||||
|
|
||||||
# Group by company, filtering out irrelevant remote locations
|
|
||||||
companies = {}
|
companies = {}
|
||||||
filtered_count = 0
|
location_counts = Counter()
|
||||||
|
|
||||||
for company_name, job in jobs:
|
for company_name, job in jobs:
|
||||||
if not is_location_relevant(job.location, job.remote_type):
|
# Extract location info
|
||||||
filtered_count += 1
|
tags, display = extract_location_info(job.location, job.remote_type)
|
||||||
continue
|
|
||||||
|
# Count locations for filter generation
|
||||||
|
for tag in tags:
|
||||||
|
location_counts[tag] += 1
|
||||||
|
|
||||||
|
# Store processed job data
|
||||||
if company_name not in companies:
|
if company_name not in companies:
|
||||||
companies[company_name] = []
|
companies[company_name] = []
|
||||||
companies[company_name].append(job)
|
|
||||||
|
|
||||||
# Ensure all monitored companies are in the dict (even with 0 jobs)
|
companies[company_name].append({
|
||||||
|
"job": job,
|
||||||
|
"tags": tags,
|
||||||
|
"display": display,
|
||||||
|
"search_text": f"{job.title.lower()} {(job.location or '').lower()} {(job.department or '').lower()} {' '.join(tags)}"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Ensure all companies exist (even with 0 jobs)
|
||||||
for name in all_company_names:
|
for name in all_company_names:
|
||||||
if name not in companies:
|
if name not in companies:
|
||||||
companies[name] = []
|
companies[name] = []
|
||||||
if name not in total_per_company:
|
|
||||||
total_per_company[name] = 0
|
|
||||||
|
|
||||||
total_shown = sum(len(jobs) for jobs in companies.values())
|
total_jobs = sum(len(j) for j in companies.values())
|
||||||
total_scraped = sum(total_per_company.values())
|
|
||||||
|
|
||||||
# Sort companies by name
|
|
||||||
sorted_companies = sorted(companies.items())
|
sorted_companies = sorted(companies.items())
|
||||||
|
|
||||||
|
# Generate dynamic location filters (only show locations that exist in data)
|
||||||
|
# Order: Remote first, then by count descending
|
||||||
|
location_filters = []
|
||||||
|
if "remote" in location_counts:
|
||||||
|
location_filters.append(("remote", "Remote", location_counts["remote"]))
|
||||||
|
|
||||||
|
# Add other locations sorted by count
|
||||||
|
other_locations = [(tag, count) for tag, count in location_counts.items() if tag != "remote"]
|
||||||
|
other_locations.sort(key=lambda x: -x[1])
|
||||||
|
|
||||||
|
# Map tag_id to display name
|
||||||
|
tag_display = {tag_id: display for keywords, tag_id, display in LOCATION_RULES}
|
||||||
|
tag_display["remote"] = "Remote"
|
||||||
|
|
||||||
|
for tag_id, count in other_locations:
|
||||||
|
display = tag_display.get(tag_id, tag_id.title())
|
||||||
|
location_filters.append((tag_id, display, count))
|
||||||
|
|
||||||
|
# Generate location filter buttons HTML
|
||||||
|
location_buttons = ""
|
||||||
|
for tag_id, display, count in location_filters:
|
||||||
|
location_buttons += f' <button class="filter-btn" data-filter="{tag_id}" data-category="location">{display} ({count})</button>\n'
|
||||||
|
|
||||||
|
# Generate tag colors dynamically
|
||||||
|
tag_colors = {
|
||||||
|
"remote": ("#1a4a1a", "#4ade80"),
|
||||||
|
"canada": ("#4a1a1a", "#f87171"),
|
||||||
|
"germany": ("#4a4a1a", "#facc15"),
|
||||||
|
"uk": ("#2a1a3a", "#a78bfa"),
|
||||||
|
"us": ("#3a2a1a", "#fb923c"),
|
||||||
|
"emea": ("#1a3a4a", "#60a5fa"),
|
||||||
|
"americas": ("#3a1a4a", "#c084fc"),
|
||||||
|
"worldwide": ("#1a4a3a", "#34d399"),
|
||||||
|
"apac": ("#1a2a4a", "#38bdf8"),
|
||||||
|
"ireland": ("#1a4a2a", "#4ade80"),
|
||||||
|
"netherlands": ("#3a3a1a", "#fbbf24"),
|
||||||
|
"france": ("#2a2a4a", "#818cf8"),
|
||||||
|
"spain": ("#4a2a1a", "#fb7185"),
|
||||||
|
"poland": ("#3a1a2a", "#f472b6"),
|
||||||
|
"sweden": ("#1a3a3a", "#2dd4bf"),
|
||||||
|
"switzerland": ("#4a1a2a", "#fb7185"),
|
||||||
|
"australia": ("#2a3a1a", "#a3e635"),
|
||||||
|
"india": ("#4a3a1a", "#fcd34d"),
|
||||||
|
"japan": ("#4a1a3a", "#e879f9"),
|
||||||
|
"singapore": ("#1a4a4a", "#22d3d1"),
|
||||||
|
"israel": ("#3a2a2a", "#fca5a5"),
|
||||||
|
"brazil": ("#2a4a1a", "#86efac"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generate CSS for tags
|
||||||
|
tag_css = ""
|
||||||
|
for tag_id, (bg, fg) in tag_colors.items():
|
||||||
|
tag_css += f""" .tag-{tag_id} {{
|
||||||
|
background: {bg};
|
||||||
|
color: {fg};
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
html = f"""<!DOCTYPE html>
|
html = f"""<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
|
|
@ -299,30 +324,7 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
font-size: 11px;
|
font-size: 11px;
|
||||||
margin-left: 5px;
|
margin-left: 5px;
|
||||||
}}
|
}}
|
||||||
.tag-remote {{
|
{tag_css}
|
||||||
background: #1a4a1a;
|
|
||||||
color: #4ade80;
|
|
||||||
}}
|
|
||||||
.tag-canada {{
|
|
||||||
background: #4a1a1a;
|
|
||||||
color: #f87171;
|
|
||||||
}}
|
|
||||||
.tag-berlin {{
|
|
||||||
background: #4a4a1a;
|
|
||||||
color: #facc15;
|
|
||||||
}}
|
|
||||||
.tag-emea {{
|
|
||||||
background: #1a3a4a;
|
|
||||||
color: #60a5fa;
|
|
||||||
}}
|
|
||||||
.tag-americas {{
|
|
||||||
background: #3a1a4a;
|
|
||||||
color: #c084fc;
|
|
||||||
}}
|
|
||||||
.tag-worldwide {{
|
|
||||||
background: #1a4a3a;
|
|
||||||
color: #34d399;
|
|
||||||
}}
|
|
||||||
.hidden {{
|
.hidden {{
|
||||||
display: none;
|
display: none;
|
||||||
}}
|
}}
|
||||||
|
|
@ -342,26 +344,32 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
gap: 10px;
|
gap: 10px;
|
||||||
}}
|
}}
|
||||||
.toc-links a {{
|
.toc-link {{
|
||||||
color: var(--accent);
|
color: var(--accent);
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
font-size: 13px;
|
font-size: 13px;
|
||||||
}}
|
}}
|
||||||
.toc-links a:hover {{
|
.toc-link:hover {{
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}}
|
}}
|
||||||
.toc-links .empty {{
|
.toc-link.empty {{
|
||||||
color: var(--muted);
|
color: var(--muted);
|
||||||
cursor: default;
|
|
||||||
}}
|
}}
|
||||||
.toc-links .empty:hover {{
|
.toc-link.hidden {{
|
||||||
text-decoration: none;
|
display: none;
|
||||||
}}
|
}}
|
||||||
.filter-buttons {{
|
.filter-section {{
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
gap: 8px;
|
gap: 8px;
|
||||||
margin-top: 10px;
|
margin-top: 10px;
|
||||||
|
align-items: center;
|
||||||
|
}}
|
||||||
|
.filter-label {{
|
||||||
|
color: var(--muted);
|
||||||
|
font-size: 12px;
|
||||||
|
margin-right: 4px;
|
||||||
|
min-width: 60px;
|
||||||
}}
|
}}
|
||||||
.filter-btn {{
|
.filter-btn {{
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
|
|
@ -383,6 +391,13 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
border-color: var(--accent);
|
border-color: var(--accent);
|
||||||
color: var(--bg);
|
color: var(--bg);
|
||||||
}}
|
}}
|
||||||
|
.clear-btn {{
|
||||||
|
border-color: #666;
|
||||||
|
}}
|
||||||
|
.clear-btn:hover {{
|
||||||
|
border-color: #f87171;
|
||||||
|
color: #f87171;
|
||||||
|
}}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
|
@ -390,47 +405,47 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
<h1>$ job-board</h1>
|
<h1>$ job-board</h1>
|
||||||
<div class="meta">
|
<div class="meta">
|
||||||
Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
|
Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
|
||||||
{total_shown}/{total_scraped} jobs (location filtered) | Monitoring {len(all_company_names)} companies
|
{total_jobs} jobs | {len(all_company_names)} companies
|
||||||
</div>
|
</div>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<div class="filters">
|
<div class="filters">
|
||||||
<input type="text" id="search" placeholder="Filter jobs... (e.g. 'senior engineer', 'remote', 'canada')" autofocus>
|
<input type="text" id="search" placeholder="Filter jobs... (press / to focus, Esc to clear)" autofocus>
|
||||||
<div class="filter-buttons">
|
<div class="filter-section">
|
||||||
<button class="filter-btn" data-filter="">All</button>
|
<span class="filter-label">Quick:</span>
|
||||||
<button class="filter-btn" data-filter="engineer">Engineering</button>
|
<button class="filter-btn" data-filter="" data-category="all">All ({total_jobs})</button>
|
||||||
<button class="filter-btn" data-filter="senior engineer">Senior Eng</button>
|
<button class="filter-btn clear-btn" data-action="clear">Clear Filters</button>
|
||||||
<button class="filter-btn" data-filter="staff principal">Staff+</button>
|
</div>
|
||||||
<button class="filter-btn" data-filter="manager director">Management</button>
|
<div class="filter-section">
|
||||||
<button class="filter-btn" data-filter="product">Product</button>
|
<span class="filter-label">Location:</span>
|
||||||
<button class="filter-btn" data-filter="design">Design</button>
|
{location_buttons} </div>
|
||||||
<button class="filter-btn" data-filter="security">Security</button>
|
<div class="filter-section">
|
||||||
<button class="filter-btn" data-filter="remote">Remote</button>
|
<span class="filter-label">Role:</span>
|
||||||
<button class="filter-btn" data-filter="canada">Canada</button>
|
<button class="filter-btn" data-filter="engineer" data-category="role">Engineering</button>
|
||||||
<button class="filter-btn" data-filter="germany">Germany</button>
|
<button class="filter-btn" data-filter="senior" data-category="role">Senior</button>
|
||||||
<button class="filter-btn" data-filter="emea">EMEA</button>
|
<button class="filter-btn" data-filter="staff principal" data-category="role">Staff+</button>
|
||||||
<button class="filter-btn" data-filter="americas">Americas</button>
|
<button class="filter-btn" data-filter="backend" data-category="role">Backend</button>
|
||||||
<button class="filter-btn" data-filter="worldwide">Worldwide</button>
|
<button class="filter-btn" data-filter="frontend" data-category="role">Frontend</button>
|
||||||
|
<button class="filter-btn" data-filter="infrastructure platform sre" data-category="role">Infra/Platform</button>
|
||||||
|
<button class="filter-btn" data-filter="security" data-category="role">Security</button>
|
||||||
|
<button class="filter-btn" data-filter="manager director" data-category="role">Management</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="stats">
|
<div class="stats">
|
||||||
<span id="visible-count">{total_shown} jobs shown</span>
|
<span id="visible-count">{total_jobs} jobs shown</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="toc">
|
<div class="toc">
|
||||||
<div class="toc-title">Jump to company:</div>
|
<div class="toc-title">Jump to company:</div>
|
||||||
<div class="toc-links">
|
<div class="toc-links" id="toc-links">
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Table of contents
|
# Table of contents with data attributes for JS updating
|
||||||
for company_name, company_jobs in sorted_companies:
|
for company_name, company_jobs in sorted_companies:
|
||||||
anchor = company_name.lower().replace(" ", "-")
|
anchor = company_name.lower().replace(" ", "-").replace("'", "")
|
||||||
filtered = len(company_jobs)
|
count = len(company_jobs)
|
||||||
total = total_per_company.get(company_name, 0)
|
css_class = "toc-link" if count > 0 else "toc-link empty"
|
||||||
if filtered > 0:
|
html += f' <a href="#{anchor}" class="{css_class}" data-company="{anchor}" data-total="{count}">{company_name} ({count})</a>\n'
|
||||||
html += f' <a href="#{anchor}">{company_name} ({filtered}/{total})</a>\n'
|
|
||||||
else:
|
|
||||||
html += f' <span class="empty">{company_name} (0/{total})</span>\n'
|
|
||||||
|
|
||||||
html += """ </div>
|
html += """ </div>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -438,44 +453,34 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
<main id="job-list">
|
<main id="job-list">
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Job listings (only for companies with jobs)
|
# Job listings
|
||||||
for company_name, company_jobs in sorted_companies:
|
for company_name, company_jobs in sorted_companies:
|
||||||
if not company_jobs:
|
if not company_jobs:
|
||||||
continue # Skip companies with no jobs after filtering
|
continue
|
||||||
anchor = company_name.lower().replace(" ", "-")
|
anchor = company_name.lower().replace(" ", "-").replace("'", "")
|
||||||
|
total = len(company_jobs)
|
||||||
html += f"""
|
html += f"""
|
||||||
<div class="company" id="{anchor}">
|
<div class="company" id="{anchor}" data-company="{anchor}" data-total="{total}">
|
||||||
<div class="company-header">
|
<div class="company-header">
|
||||||
<span class="company-name">{company_name}</span>
|
<span class="company-name">{company_name}</span>
|
||||||
<span class="company-count">{len(company_jobs)} positions</span>
|
<span class="company-count" data-total="{total}">{total} positions</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="jobs">
|
<div class="jobs">
|
||||||
"""
|
"""
|
||||||
for job in sorted(company_jobs, key=lambda j: j.title):
|
for job_data in sorted(company_jobs, key=lambda j: j["job"].title):
|
||||||
location = job.location or ""
|
job = job_data["job"]
|
||||||
location_lower = location.lower()
|
tags = job_data["tags"]
|
||||||
|
display = job_data["display"]
|
||||||
# Extract tags and short location
|
search_text = job_data["search_text"]
|
||||||
tag_list, short_loc = extract_location_tags(location, job.remote_type)
|
|
||||||
|
|
||||||
# Build tag HTML
|
# Build tag HTML
|
||||||
tags = ""
|
tag_html = ""
|
||||||
if "remote" in tag_list:
|
for tag in tags:
|
||||||
tags += '<span class="tag tag-remote">remote</span>'
|
tag_html += f'<span class="tag tag-{tag}">{tag}</span>'
|
||||||
if "canada" in tag_list:
|
|
||||||
tags += '<span class="tag tag-canada">canada</span>'
|
|
||||||
if "germany" in tag_list:
|
|
||||||
tags += '<span class="tag tag-berlin">germany</span>'
|
|
||||||
if "emea" in tag_list:
|
|
||||||
tags += '<span class="tag tag-emea">emea</span>'
|
|
||||||
if "americas" in tag_list:
|
|
||||||
tags += '<span class="tag tag-americas">americas</span>'
|
|
||||||
if "worldwide" in tag_list:
|
|
||||||
tags += '<span class="tag tag-worldwide">worldwide</span>'
|
|
||||||
|
|
||||||
html += f""" <div class="job" data-search="{job.title.lower()} {location_lower} {(job.department or '').lower()} {' '.join(tag_list)}">
|
html += f""" <div class="job" data-search="{search_text}">
|
||||||
<span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tags}</span>
|
<span class="job-title"><a href="{job.url}" target="_blank">{job.title}</a>{tag_html}</span>
|
||||||
<span class="job-location">{short_loc}</span>
|
<span class="job-location">{display}</span>
|
||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
html += """ </div>
|
html += """ </div>
|
||||||
|
|
@ -488,67 +493,155 @@ def generate_dashboard(output_path: str = "data/dashboard.html"):
|
||||||
const search = document.getElementById('search');
|
const search = document.getElementById('search');
|
||||||
const jobs = document.querySelectorAll('.job');
|
const jobs = document.querySelectorAll('.job');
|
||||||
const companies = document.querySelectorAll('.company');
|
const companies = document.querySelectorAll('.company');
|
||||||
|
const tocLinks = document.querySelectorAll('.toc-link');
|
||||||
const visibleCount = document.getElementById('visible-count');
|
const visibleCount = document.getElementById('visible-count');
|
||||||
const filterBtns = document.querySelectorAll('.filter-btn');
|
const filterBtns = document.querySelectorAll('.filter-btn');
|
||||||
|
const clearBtn = document.querySelector('.clear-btn');
|
||||||
|
|
||||||
function filterJobs(query) {
|
// Track active filters by category
|
||||||
let visible = 0;
|
const activeFilters = {
|
||||||
const terms = query.toLowerCase().trim().split(/\\s+/).filter(t => t);
|
location: null,
|
||||||
|
role: null
|
||||||
|
};
|
||||||
|
|
||||||
|
function applyFilters() {
|
||||||
|
let totalVisible = 0;
|
||||||
|
const searchTerms = search.value.toLowerCase().trim().split(/\\s+/).filter(t => t);
|
||||||
|
|
||||||
|
// Build filter terms from active category filters
|
||||||
|
const locationTerms = activeFilters.location ? activeFilters.location.split(/\\s+/) : [];
|
||||||
|
const roleTerms = activeFilters.role ? activeFilters.role.split(/\\s+/) : [];
|
||||||
|
|
||||||
|
const hasFilters = searchTerms.length > 0 || locationTerms.length > 0 || roleTerms.length > 0;
|
||||||
|
|
||||||
|
// Track visible counts per company
|
||||||
|
const companyCounts = {};
|
||||||
|
|
||||||
companies.forEach(company => {
|
companies.forEach(company => {
|
||||||
|
const companyId = company.dataset.company;
|
||||||
const companyJobs = company.querySelectorAll('.job');
|
const companyJobs = company.querySelectorAll('.job');
|
||||||
let companyVisible = 0;
|
let companyVisible = 0;
|
||||||
|
|
||||||
companyJobs.forEach(job => {
|
companyJobs.forEach(job => {
|
||||||
const searchText = job.dataset.search;
|
const searchText = job.dataset.search;
|
||||||
// Match if ANY term matches (OR logic for filter buttons)
|
|
||||||
const matches = terms.length === 0 || terms.some(term => searchText.includes(term));
|
// Match logic: AND between categories, OR within each category
|
||||||
|
let matches = true;
|
||||||
|
|
||||||
|
// Search box (OR within terms)
|
||||||
|
if (searchTerms.length > 0) {
|
||||||
|
matches = matches && searchTerms.some(term => searchText.includes(term));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Location filter (OR within terms)
|
||||||
|
if (locationTerms.length > 0) {
|
||||||
|
matches = matches && locationTerms.some(term => searchText.includes(term));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Role filter (OR within terms)
|
||||||
|
if (roleTerms.length > 0) {
|
||||||
|
matches = matches && roleTerms.some(term => searchText.includes(term));
|
||||||
|
}
|
||||||
|
|
||||||
job.classList.toggle('hidden', !matches);
|
job.classList.toggle('hidden', !matches);
|
||||||
if (matches) {
|
if (matches) {
|
||||||
companyVisible++;
|
companyVisible++;
|
||||||
visible++;
|
totalVisible++;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
company.classList.toggle('hidden', companyVisible === 0);
|
company.classList.toggle('hidden', companyVisible === 0);
|
||||||
|
companyCounts[companyId] = companyVisible;
|
||||||
|
|
||||||
|
// Update company header count
|
||||||
|
const countSpan = company.querySelector('.company-count');
|
||||||
|
const total = parseInt(countSpan.dataset.total);
|
||||||
|
if (!hasFilters) {
|
||||||
|
countSpan.textContent = `${total} positions`;
|
||||||
|
} else {
|
||||||
|
countSpan.textContent = `${companyVisible}/${total} positions`;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
visibleCount.textContent = `${visible} jobs shown`;
|
// Update TOC links - always show all, grey out empty ones
|
||||||
|
tocLinks.forEach(link => {
|
||||||
|
const companyId = link.dataset.company;
|
||||||
|
const total = parseInt(link.dataset.total);
|
||||||
|
const visible = companyCounts[companyId] || 0;
|
||||||
|
const name = link.textContent.replace(/\\s*\\(.*\\)/, '');
|
||||||
|
|
||||||
|
if (!hasFilters) {
|
||||||
|
link.textContent = `${name} (${total})`;
|
||||||
|
link.classList.toggle('empty', total === 0);
|
||||||
|
} else {
|
||||||
|
link.textContent = `${name} (${visible}/${total})`;
|
||||||
|
link.classList.toggle('empty', visible === 0);
|
||||||
|
}
|
||||||
|
// Always show the link, never hide
|
||||||
|
link.classList.remove('hidden');
|
||||||
|
});
|
||||||
|
|
||||||
|
visibleCount.textContent = `${totalVisible} jobs shown`;
|
||||||
}
|
}
|
||||||
|
|
||||||
search.addEventListener('input', (e) => {
|
function clearAllFilters() {
|
||||||
// Clear active button when typing
|
search.value = '';
|
||||||
|
activeFilters.location = null;
|
||||||
|
activeFilters.role = null;
|
||||||
filterBtns.forEach(btn => btn.classList.remove('active'));
|
filterBtns.forEach(btn => btn.classList.remove('active'));
|
||||||
filterJobs(e.target.value);
|
applyFilters();
|
||||||
|
}
|
||||||
|
|
||||||
|
search.addEventListener('input', () => {
|
||||||
|
applyFilters();
|
||||||
});
|
});
|
||||||
|
|
||||||
// Filter buttons
|
|
||||||
filterBtns.forEach(btn => {
|
filterBtns.forEach(btn => {
|
||||||
btn.addEventListener('click', () => {
|
btn.addEventListener('click', () => {
|
||||||
const filter = btn.dataset.filter;
|
const filter = btn.dataset.filter;
|
||||||
search.value = filter;
|
const category = btn.dataset.category;
|
||||||
filterBtns.forEach(b => b.classList.remove('active'));
|
const action = btn.dataset.action;
|
||||||
btn.classList.add('active');
|
|
||||||
filterJobs(filter);
|
// Handle clear button
|
||||||
|
if (action === 'clear') {
|
||||||
|
clearAllFilters();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle "All" button
|
||||||
|
if (category === 'all') {
|
||||||
|
clearAllFilters();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Toggle filter in category
|
||||||
|
const categoryBtns = document.querySelectorAll(`.filter-btn[data-category="${category}"]`);
|
||||||
|
|
||||||
|
if (btn.classList.contains('active')) {
|
||||||
|
// Deselect
|
||||||
|
btn.classList.remove('active');
|
||||||
|
activeFilters[category] = null;
|
||||||
|
} else {
|
||||||
|
// Select (deselect others in same category)
|
||||||
|
categoryBtns.forEach(b => b.classList.remove('active'));
|
||||||
|
btn.classList.add('active');
|
||||||
|
activeFilters[category] = filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
applyFilters();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// Keyboard shortcut: / to focus search
|
|
||||||
document.addEventListener('keydown', (e) => {
|
document.addEventListener('keydown', (e) => {
|
||||||
if (e.key === '/' && document.activeElement !== search) {
|
if (e.key === '/' && document.activeElement !== search) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
search.focus();
|
search.focus();
|
||||||
}
|
}
|
||||||
if (e.key === 'Escape') {
|
if (e.key === 'Escape') {
|
||||||
search.value = '';
|
clearAllFilters();
|
||||||
filterBtns.forEach(b => b.classList.remove('active'));
|
|
||||||
filterJobs('');
|
|
||||||
search.blur();
|
search.blur();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Set "All" as active by default
|
|
||||||
filterBtns[0].classList.add('active');
|
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
5242
data/dashboard.html
5242
data/dashboard.html
File diff suppressed because it is too large
Load diff
23
db.py
23
db.py
|
|
@ -247,3 +247,26 @@ class Database:
|
||||||
"SELECT name FROM companies WHERE active = TRUE ORDER BY name"
|
"SELECT name FROM companies WHERE active = TRUE ORDER BY name"
|
||||||
)
|
)
|
||||||
return [row["name"] for row in cursor.fetchall()]
|
return [row["name"] for row in cursor.fetchall()]
|
||||||
|
|
||||||
|
def cleanup_removed_companies(self, active_company_names: list[str]) -> list[str]:
|
||||||
|
"""
|
||||||
|
Remove companies (and their jobs) that are no longer in the config.
|
||||||
|
Returns list of removed company names.
|
||||||
|
"""
|
||||||
|
with self._get_conn() as conn:
|
||||||
|
# Get companies in DB but not in config
|
||||||
|
placeholders = ",".join("?" * len(active_company_names))
|
||||||
|
cursor = conn.execute(
|
||||||
|
f"SELECT id, name FROM companies WHERE name NOT IN ({placeholders})",
|
||||||
|
active_company_names
|
||||||
|
)
|
||||||
|
removed = []
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
company_id = row["id"]
|
||||||
|
company_name = row["name"]
|
||||||
|
# Delete jobs first (foreign key)
|
||||||
|
conn.execute("DELETE FROM jobs WHERE company_id = ?", (company_id,))
|
||||||
|
# Delete company
|
||||||
|
conn.execute("DELETE FROM companies WHERE id = ?", (company_id,))
|
||||||
|
removed.append(company_name)
|
||||||
|
return removed
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ services:
|
||||||
- /home/gruberb/.msmtprc:/root/.msmtprc:ro
|
- /home/gruberb/.msmtprc:/root/.msmtprc:ro
|
||||||
environment:
|
environment:
|
||||||
- TZ=America/Toronto
|
- TZ=America/Toronto
|
||||||
|
- PYTHONUNBUFFERED=1
|
||||||
command: ["python", "main.py", "--schedule"]
|
command: ["python", "main.py", "--schedule"]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
logging:
|
logging:
|
||||||
|
|
|
||||||
7
main.py
7
main.py
|
|
@ -145,6 +145,13 @@ def run_scraper(config: dict):
|
||||||
notifier = Notifier(config.get("notifications", {}))
|
notifier = Notifier(config.get("notifications", {}))
|
||||||
|
|
||||||
companies = config.get("companies", [])
|
companies = config.get("companies", [])
|
||||||
|
|
||||||
|
# Cleanup companies no longer in config
|
||||||
|
active_names = [c["name"] for c in companies]
|
||||||
|
removed = db.cleanup_removed_companies(active_names)
|
||||||
|
if removed:
|
||||||
|
print(f"\n🧹 Removed {len(removed)} companies no longer in config: {', '.join(removed)}")
|
||||||
|
|
||||||
print(f"\nMonitoring {len(companies)} companies...")
|
print(f"\nMonitoring {len(companies)} companies...")
|
||||||
|
|
||||||
reports = []
|
reports = []
|
||||||
|
|
|
||||||
108
notify.py
108
notify.py
|
|
@ -28,24 +28,23 @@ class Notifier:
|
||||||
|
|
||||||
if not reports_with_changes:
|
if not reports_with_changes:
|
||||||
print("\n✓ No changes detected across all companies.")
|
print("\n✓ No changes detected across all companies.")
|
||||||
return
|
else:
|
||||||
|
# Console output for changes
|
||||||
|
self._notify_console(reports_with_changes)
|
||||||
|
|
||||||
# Console output (always)
|
# Email (if configured) - only sends when there are changes
|
||||||
self._notify_console(reports_with_changes)
|
|
||||||
|
|
||||||
# Email (if configured)
|
|
||||||
email_config = self.config.get("email")
|
email_config = self.config.get("email")
|
||||||
if email_config:
|
if email_config and reports_with_changes:
|
||||||
self._notify_email(reports_with_changes, email_config)
|
self._notify_email(reports_with_changes, email_config)
|
||||||
|
|
||||||
# msmtp (if configured - uses system msmtp config)
|
# msmtp (if configured - sends daily summary always)
|
||||||
msmtp_config = self.config.get("msmtp")
|
msmtp_config = self.config.get("msmtp")
|
||||||
if msmtp_config:
|
if msmtp_config:
|
||||||
self._notify_msmtp(reports_with_changes, msmtp_config)
|
self._notify_msmtp_daily_summary(reports, msmtp_config)
|
||||||
|
|
||||||
# Slack (if configured)
|
# Slack (if configured) - only sends when there are changes
|
||||||
slack_config = self.config.get("slack")
|
slack_config = self.config.get("slack")
|
||||||
if slack_config:
|
if slack_config and reports_with_changes:
|
||||||
self._notify_slack(reports_with_changes, slack_config)
|
self._notify_slack(reports_with_changes, slack_config)
|
||||||
|
|
||||||
def _notify_console(self, reports: list[ChangeReport]):
|
def _notify_console(self, reports: list[ChangeReport]):
|
||||||
|
|
@ -180,6 +179,95 @@ Content-Type: text/plain; charset=UTF-8
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"✗ Failed to send msmtp notification: {e}")
|
print(f"✗ Failed to send msmtp notification: {e}")
|
||||||
|
|
||||||
|
def _notify_msmtp_daily_summary(self, reports: list[ChangeReport], config: dict):
|
||||||
|
"""Send daily summary email via system msmtp (always sends)."""
|
||||||
|
import subprocess
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
to_addr = config.get("to_addr", "me@bastiangruber.ca")
|
||||||
|
from_addr = config.get("from_addr", "admin@novanexus.ca")
|
||||||
|
|
||||||
|
# Calculate totals
|
||||||
|
total_companies = len([r for r in reports if r.total_active > 0])
|
||||||
|
total_jobs = sum(r.total_active for r in reports)
|
||||||
|
total_new = sum(len(r.new_jobs) for r in reports)
|
||||||
|
total_removed = sum(len(r.removed_jobs) for r in reports)
|
||||||
|
|
||||||
|
# Build subject line
|
||||||
|
if total_new or total_removed:
|
||||||
|
changes = []
|
||||||
|
if total_new:
|
||||||
|
changes.append(f"+{total_new}")
|
||||||
|
if total_removed:
|
||||||
|
changes.append(f"-{total_removed}")
|
||||||
|
subject = f"Job Board: {', '.join(changes)} | {total_jobs} jobs"
|
||||||
|
else:
|
||||||
|
subject = f"Job Board: No changes | {total_jobs} jobs"
|
||||||
|
|
||||||
|
# Build plain text body
|
||||||
|
body_lines = [
|
||||||
|
"JOB BOARD DAILY SUMMARY",
|
||||||
|
f"{datetime.now().strftime('%Y-%m-%d %H:%M')}",
|
||||||
|
"",
|
||||||
|
"OVERVIEW",
|
||||||
|
f" Companies with jobs: {total_companies}",
|
||||||
|
f" Total jobs tracked: {total_jobs}",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Changes section
|
||||||
|
reports_with_changes = [r for r in reports if r.new_jobs or r.removed_jobs]
|
||||||
|
|
||||||
|
if reports_with_changes:
|
||||||
|
body_lines.append(f"CHANGES: +{total_new} new, -{total_removed} removed")
|
||||||
|
body_lines.append("-" * 40)
|
||||||
|
|
||||||
|
for report in reports_with_changes:
|
||||||
|
if report.new_jobs:
|
||||||
|
for job in report.new_jobs:
|
||||||
|
location_str = f" [{job.location}]" if job.location else ""
|
||||||
|
remote_str = " (Remote)" if job.remote_type == "remote" else ""
|
||||||
|
body_lines.append(f" + {report.company_name}: {job.title}{location_str}{remote_str}")
|
||||||
|
|
||||||
|
if report.removed_jobs:
|
||||||
|
for job in report.removed_jobs:
|
||||||
|
body_lines.append(f" - {report.company_name}: {job.title}")
|
||||||
|
|
||||||
|
body_lines.append("")
|
||||||
|
else:
|
||||||
|
body_lines.append("CHANGES: No changes detected")
|
||||||
|
body_lines.append("")
|
||||||
|
|
||||||
|
body_lines.append("---")
|
||||||
|
body_lines.append("https://jobs.novanexus.ca")
|
||||||
|
|
||||||
|
body = "\n".join(body_lines)
|
||||||
|
|
||||||
|
# Build email message
|
||||||
|
email_msg = f"""Subject: {subject}
|
||||||
|
From: {from_addr}
|
||||||
|
To: {to_addr}
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
|
||||||
|
{body}
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["msmtp", to_addr],
|
||||||
|
input=email_msg,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
print("✓ Daily summary email sent")
|
||||||
|
else:
|
||||||
|
print(f"✗ msmtp failed: {result.stderr}")
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("✗ msmtp not found - install with: apt install msmtp")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Failed to send daily summary: {e}")
|
||||||
|
|
||||||
def _notify_slack(self, reports: list[ChangeReport], config: dict):
|
def _notify_slack(self, reports: list[ChangeReport], config: dict):
|
||||||
"""Send Slack notification."""
|
"""Send Slack notification."""
|
||||||
import httpx
|
import httpx
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue