dotfiles/pkgs/play-nijmegen-calendar/main.py

274 lines
13 KiB
Python

#!/usr/bin/env python
import locale
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from icalendar import Calendar, Event
import pytz
import requests
import sys
import math
# Set locale to Dutch
try:
locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8')
except locale.Error:
try:
locale.setlocale(locale.LC_TIME, 'nl_NL')
except locale.Error:
print("Warning: Could not set locale to nl_NL.UTF-8 or nl_NL. Using default.", file=sys.stderr)
# Fetch the HTML content
url = "https://hipsy.nl/shop/play-nijmegen"
try:
response = requests.get(url, timeout=15)
response.raise_for_status() # Raise an exception for bad status codes
html_content = response.text
except requests.exceptions.RequestException as e:
print(f"Error fetching URL {url}: {e}", file=sys.stderr)
sys.exit(1)
# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')
# Create a calendar
cal = Calendar()
cal.add('prodid', '-//Play Nijmegen Events//hipsy.nl//')
cal.add('version', '2.0')
cal.add('X-WR-CALNAME', 'Play Nijmegen Events') # Add Calendar Name
cal.add('X-WR-TIMEZONE', 'Europe/Amsterdam') # Add Timezone
# Set timezone
amsterdam_tz = pytz.timezone('Europe/Amsterdam')
# Find all event divs - updated selector to match current structure
event_divs = soup.find_all('div', class_='bg-white py-4 md:py-0 w-full rounded-lg md:rounded-bl-3xl overflow-hidden flex shadow-xs cursor-pointer relative')
total_events_added = 0
for event_div in event_divs:
# Extract event details with updated selectors
title_elem = event_div.find('a', class_='text-xl')
if not title_elem:
print("Skipping div, title element not found.", file=sys.stderr)
continue
title = title_elem.text.strip()
# Extract description
description_elem = event_div.find('p', class_='text-sm text-gray-800 py-2')
description = description_elem.text.strip() if description_elem else "No description available"
# Extract date string
date_elem = event_div.find('div', class_='text-green text-sm md:font-bold')
if not date_elem:
print(f"Skipping event '{title}', date element not found.", file=sys.stderr)
continue
date_str = date_elem.text.strip()
# Extract the event URL
event_url = title_elem.get('href')
if not event_url:
print(f"Skipping event '{title}', URL not found in title element.", file=sys.stderr)
continue
if not event_url.startswith('http'):
event_url = "https://hipsy.nl" + event_url
# Extract and store categories/tags if available - needed for recurring events too
tags = []
tag_elems = event_div.find_all('span', class_='bg-yellow-light rounded-sm md:rounded-full text-xs md:font-bold text-green py-0.5 px-1.5 md:px-2.5 md:py-1 whitespace-nowrap')
if tag_elems:
for tag_elem in tag_elems:
tags.append(tag_elem.text.strip())
# Parse date and time
try:
start_date = None
end_date = None
if ' tot ' in date_str:
parts = date_str.split(' tot ')
start_str = parts[0].strip()
end_str = parts[1].strip()
if ' van ' in start_str:
# Single day event with start and end times: "Woensdag 1 januari 2025 van 10:00 tot 12:00"
date_part, times = start_str.split(' van ')
start_time_str = times
end_time_str = end_str # End time is just the second part after 'tot'
start_date = datetime.strptime(f"{date_part} {start_time_str}", '%A %d %B %Y %H:%M')
# Need to re-parse end_date with the *same* date part
end_date = datetime.strptime(f"{date_part} {end_time_str}", '%A %d %B %Y %H:%M')
else:
# Multi-day event: "Dinsdag 06 mei 2025 om 19:15 tot dinsdag 27 mei 2025 om 22:00"
# Or: "Vrijdag 06 juni 2025 om 17:00 tot maandag 09 juni 2025 om 15:00"
# Or potentially just dates: "Maandag 1 jan 2025 tot Vrijdag 5 jan 2025"
# Try formats with explicit time first
try:
start_format = '%A %d %B %Y om %H:%M' if ' om ' in start_str else '%A %d %B %Y %H:%M'
end_format = '%A %d %B %Y om %H:%M' if ' om ' in end_str else '%A %d %B %Y %H:%M'
start_date = datetime.strptime(start_str, start_format)
end_date = datetime.strptime(end_str, end_format)
except ValueError:
# If time parsing fails, try parsing as dates only
try:
start_date = datetime.strptime(start_str, '%A %d %B %Y')
end_date = datetime.strptime(end_str, '%A %d %B %Y')
# Set default times if only dates are provided (make it all day?)
# Or use sensible defaults like 9-5? Let's stick to 9-5 for now.
start_date = start_date.replace(hour=9, minute=0)
end_date = end_date.replace(hour=17, minute=0)
print(f"Warning: Parsed multi-day event '{title}' without specific times, assuming 09:00-17:00.", file=sys.stderr)
except ValueError as e_date:
print(f"Could not parse multi-day format for event: {title}", file=sys.stderr)
print(f"Date string: {date_str}", file=sys.stderr)
print(f"Error: {e_date}", file=sys.stderr)
continue # Skip this event if date parsing fails completely
elif ' van ' in date_str:
# Single day event with only start time: "Zondag 18 mei 2025 van 14:15" (implies end time is missing)
date_part, start_time_str = date_str.split(' van ')
start_date = datetime.strptime(f"{date_part} {start_time_str}", '%A %d %B %Y %H:%M')
end_date = start_date + timedelta(hours=2) # Assume 2-hour duration
print(f"Warning: Event '{title}' only had start time, assuming 2-hour duration.", file=sys.stderr)
elif ' vanaf ' in date_str:
# Event with "vanaf" format: "Dinsdag 1 januari 2025 vanaf 19:00"
start_date = datetime.strptime(date_str, '%A %d %B %Y vanaf %H:%M')
end_date = start_date + timedelta(hours=2) # Assume 2-hour duration
print(f"Warning: Event '{title}' used 'vanaf', assuming 2-hour duration.", file=sys.stderr)
elif ' om ' in date_str:
# Event with "om" format, likely single day: "Woensdag 1 januari 2025 om 10:00"
parts = date_str.split(' om ')
date_part = parts[0]
time_part = parts[1]
start_date = datetime.strptime(f"{date_part} {time_part}", '%A %d %B %Y %H:%M')
end_date = start_date + timedelta(hours=2) # Assume 2-hour duration
print(f"Warning: Event '{title}' used 'om', assuming 2-hour duration.", file=sys.stderr)
else:
# Fallback for unrecognized formats or maybe just a date?
try:
# Try parsing just as a date
start_date = datetime.strptime(date_str, '%A %d %B %Y')
# Make it an all-day event? Or default duration? Let's assume 2 hours from 9am.
start_date = start_date.replace(hour=9, minute=0)
end_date = start_date + timedelta(hours=2)
print(f"Warning: Unrecognized date format for event '{title}', parsed as date only, assuming 09:00 + 2 hours.", file=sys.stderr)
except ValueError:
print(f"Unrecognized date format for event: {title}", file=sys.stderr)
print(f"Date string: {date_str}", file=sys.stderr)
continue
# --- Check for Recurring Cursus ---
is_cursus = 'cursus' in title.lower()
# Check if it spans multiple days and end time is later than start time (on their respective days)
is_multi_day = (end_date.date() - start_date.date()).days > 0
# Ensure end time is strictly later than start time, implies it's not overnight spanning the day boundary
# Or if start/end time are identical maybe? No, stick to strictly later.
ends_later_same_day = end_date.time() > start_date.time()
# Check if the total duration in days is an exact multiple of 7
total_days_diff = (end_date.date() - start_date.date()).days
is_multiple_of_7_days = total_days_diff > 0 and total_days_diff % 7 == 0
if is_cursus and is_multi_day and ends_later_same_day and is_multiple_of_7_days:
print(f"Detected recurring cursus: {title} from {start_date.date()} to {end_date.date()}", file=sys.stderr)
num_weeks = (total_days_diff // 7) + 1
# Calculate the duration of a single session
# Use the time difference on the first day
session_duration = end_date.replace(year=start_date.year, month=start_date.month, day=start_date.day) - start_date
# Ensure duration is positive if it crosses midnight (unlikely for cursus but safe)
if session_duration.total_seconds() < 0:
session_duration += timedelta(days=1) # Should not happen based on ends_later_same_day check
print(f" Generating {num_weeks} recurring events with duration {session_duration}", file=sys.stderr)
for i in range(num_weeks):
recurring_event = Event()
current_start_date_naive = start_date + timedelta(weeks=i)
current_end_date_naive = current_start_date_naive + session_duration
# Localize dates
current_start_date_aware = amsterdam_tz.localize(current_start_date_naive)
current_end_date_aware = amsterdam_tz.localize(current_end_date_naive)
# Add event details to the calendar
recurring_event.add('summary', f"{title} (Week {i+1}/{num_weeks})")
recurring_event.add('description', f"{description}\n\n(Part {i+1} of {num_weeks})\n\nMore info: {event_url}")
recurring_event.add('dtstart', current_start_date_aware)
recurring_event.add('dtend', current_end_date_aware)
recurring_event.add('dtstamp', amsterdam_tz.localize(datetime.now())) # Add timestamp of creation
recurring_event.add('uid', f"{event_url.split('/')[-1]}-week{i+1}@{url.split('/')[2]}") # Unique ID per instance
recurring_event.add('url', event_url)
if tags:
recurring_event.add('categories', tags)
cal.add_component(recurring_event)
total_events_added += 1
# Skip adding the original single multi-week event
continue
# --- Handle Non-Recurring Events (or those not matching the criteria) ---
event = Event()
# Localize dates
start_date_aware = amsterdam_tz.localize(start_date)
end_date_aware = amsterdam_tz.localize(end_date)
# Add event details to the calendar
event.add('summary', title)
event.add('description', f"{description}\n\nMore info: {event_url}")
event.add('dtstart', start_date_aware)
event.add('dtend', end_date_aware)
event.add('dtstamp', amsterdam_tz.localize(datetime.now())) # Add timestamp of creation
event.add('uid', f"{event_url.split('/')[-1]}@{url.split('/')[2]}") # Unique ID
event.add('url', event_url)
if tags:
event.add('categories', tags)
cal.add_component(event)
total_events_added += 1
except ValueError as e:
print(f"Could not parse date for event: {title}", file=sys.stderr)
print(f"Error: {e}", file=sys.stderr)
print(f"Date string: {date_str}", file=sys.stderr)
# Continue with the next event instead of stopping
except Exception as ex:
print(f"An unexpected error occurred processing event: {title}", file=sys.stderr)
print(f"Error: {ex}", file=sys.stderr)
print(f"Date string: {date_str}", file=sys.stderr)
continue
# Generate iCal content
try:
ical_content = cal.to_ical()
except Exception as e:
print(f"Error generating iCal content: {e}", file=sys.stderr)
sys.exit(1)
def main(file_name='play_nijmegen_events.ics'):
# Save to file
try:
with open(file_name, 'wb') as f:
f.write(ical_content)
# Use total_events_added instead of len(cal.subcomponents) as it reflects generated events accurately
print(f"iCal file '{file_name}' has been created with {total_events_added} events.")
except IOError as e:
print(f"Error writing iCal file '{file_name}': {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
output_file = 'play_nijmegen_events.ics'
if len(sys.argv) == 2:
output_file = sys.argv[1]
elif len(sys.argv) > 2:
print("Usage: python script_name.py [output_filename.ics]", file=sys.stderr)
sys.exit(1)
main(output_file)