import re from datetime import datetime from io import BytesIO import pytz from pytesseract import image_to_string from PIL import Image from lib.config import load_config from lib.room import Room __all__ = ["get_room_data"] config = load_config() RE_STRING = re.compile( # https://regex101.com/r/ELsqrO/1 r"(L-[0-9]{4}): " # room number (group 1) # time-slot (group 2,3,4 - group 5,6,7) r"([1-12]{1,2}):([0,3]{2})(am|pm) - ([1-12]{1,2}):([0,3]{2})(am|pm), " # weekday (group 8) r"(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), " # month (group 9) r"(January|February|March|April|May|June|July|August|September|October|November|December) " r"([1-9][0-9]|[1-9]), ([0-9]{4})", # day & year (group 10 & 11) flags=re.M ) class NotAMatchException(Exception): pass class NoMatchException(Exception): pass def add_zero_padding(value: str): """Adds zero-padding to a single digit value.""" if int(value) < 10: return "0"+str(int(value)) # this removes leading 0 if already present return value def libcal_to_datetime(year: str, month: str, day: str, hour: str, minute: str, am_pm: str) -> datetime: """Takes date information as displayed by LibCal and turns it into a datetime object. All values should be given in an unmodified string format.""" date = datetime.strptime(f"{year}-{month}-{add_zero_padding(day)}-" f"{add_zero_padding(hour)}-{add_zero_padding(minute)}-{am_pm}", "%Y-%B-%d-%I-%M-%p") tz = pytz.timezone(config.time_zone) return tz.localize(date) # adds timezone info to object def correct_commas(string: str): """Ensures all commas have a space after them in the given string.""" return re.sub(r"(,)([^ ])", r"\1 \2", string) def correct_newlines(string: str): """Replaces all newlines with a space in the given string.""" return string.replace("\n", " ") def get_room_data(img: bytes) -> list[Room]: """Gets the room data from a given image""" rooms: list[Room] = [] start_time: datetime | None = None end_time: datetime | None = None img_string = image_to_string(Image.open(BytesIO(img))) img_string = correct_newlines(img_string) img_string = correct_commas(img_string) matches = re.finditer(RE_STRING, img_string) for match in matches: if isinstance(match, re.Match): room_number = match.group(1) start_time = libcal_to_datetime(match.group(11), match.group( 9), match.group(10), match.group(2), match.group(3), match.group(4)) end_time = libcal_to_datetime(match.group(11), match.group( 9), match.group(10), match.group(5), match.group(6), match.group(7)) room = Room(room_number, start_time, end_time) rooms.append(room) else: raise NotAMatchException(match) if len(rooms) == 0: raise NoMatchException return rooms