From 24067847b487192227e5346936ddc34d08fa109d Mon Sep 17 00:00:00 2001 From: Jonas Linter <{email_address}> Date: Mon, 17 Nov 2025 10:32:26 +0100 Subject: [PATCH] Done but not really complete --- src/alpine_bits_python/conversion_service.py | 196 ++++++++++++------- src/alpine_bits_python/db.py | 89 ++++++--- src/alpine_bits_python/migrations.py | 58 +++--- 3 files changed, 218 insertions(+), 125 deletions(-) diff --git a/src/alpine_bits_python/conversion_service.py b/src/alpine_bits_python/conversion_service.py index 85b35c8..61ec287 100644 --- a/src/alpine_bits_python/conversion_service.py +++ b/src/alpine_bits_python/conversion_service.py @@ -1,13 +1,15 @@ """Service for handling conversion data from hotel PMS XML files.""" +import json import xml.etree.ElementTree as ET from datetime import datetime +from decimal import Decimal from typing import Any from sqlalchemy import or_, select from sqlalchemy.ext.asyncio import AsyncSession -from .db import Conversion, Customer, HashedCustomer, Reservation +from .db import Conversion, RoomReservation, Customer, HashedCustomer, Reservation from .logging_config import get_logger _LOGGER = get_logger(__name__) @@ -199,6 +201,48 @@ class ConversionService: ) return stats + # Create Conversion entry first (once per PMS reservation) + conversion = Conversion( + # Links to existing entities (nullable) + reservation_id=matched_reservation.id if matched_reservation else None, + customer_id=matched_customer.id if matched_customer else None, + hashed_customer_id=matched_hashed_customer.id + if matched_hashed_customer + else None, + # Reservation metadata + hotel_id=hotel_id, + pms_reservation_id=pms_reservation_id, + reservation_number=reservation_number, + reservation_date=reservation_date, + creation_time=creation_time, + reservation_type=reservation_type, + booking_channel=booking_channel, + # Guest information + guest_first_name=guest_first_name, + guest_last_name=guest_last_name, + guest_email=guest_email, + guest_country_code=guest_country_code, + # Advertising data + advertising_medium=advertising_medium, + advertising_partner=advertising_partner, + advertising_campagne=advertising_campagne, + # Metadata + created_at=datetime.now(), + updated_at=datetime.now(), + ) + self.session.add(conversion) + + # Update stats for the conversion record itself + if matched_reservation: + stats["matched_to_reservation"] += 1 + if matched_customer: + stats["matched_to_customer"] += 1 + if matched_hashed_customer: + stats["matched_to_hashed_customer"] += 1 + if not any([matched_reservation, matched_customer, matched_hashed_customer]): + stats["unmatched"] += 1 + + # Process room reservations for room_reservation in room_reservations.findall("roomReservation"): # Extract room reservation details arrival_str = room_reservation.get("arrival") @@ -208,6 +252,7 @@ class ConversionService: room_number = room_reservation.get("roomNumber") adults_str = room_reservation.get("adults") rate_plan_code = room_reservation.get("ratePlanCode") + connected_room_type = room_reservation.get("connectedRoomType") arrival_date = None if arrival_str: @@ -232,53 +277,80 @@ class ConversionService: except ValueError: _LOGGER.warning("Invalid adults value: %s", adults_str) - # Process daily sales - daily_sales = room_reservation.find("dailySales") - if daily_sales is None: - continue + # Create composite ID for upsert: pms_reservation_id + room_number + # This allows updating the same room reservation if it appears again + pms_hotel_reservation_id = f"{pms_reservation_id}_{room_number}" - for daily_sale in daily_sales.findall("dailySale"): - stats["daily_sales_count"] += 1 + # Process daily sales and extract total revenue + daily_sales_elem = room_reservation.find("dailySales") + daily_sales_list = [] + total_revenue = Decimal("0") - # Extract daily sale data - sale_date_str = daily_sale.get("date") - sale_date = None - if sale_date_str: - try: - sale_date = datetime.strptime( - sale_date_str, "%Y-%m-%d" - ).date() - except ValueError: - _LOGGER.warning("Invalid sale date format: %s", sale_date_str) + if daily_sales_elem is not None: + for daily_sale in daily_sales_elem.findall("dailySale"): + stats["daily_sales_count"] += 1 - # Create conversion record - conversion = Conversion( - # Links to existing entities (nullable) - reservation_id=matched_reservation.id - if matched_reservation - else None, - customer_id=matched_customer.id if matched_customer else None, - hashed_customer_id=matched_hashed_customer.id - if matched_hashed_customer - else None, - # Reservation metadata - hotel_id=hotel_id, - pms_reservation_id=pms_reservation_id, - reservation_number=reservation_number, - reservation_date=reservation_date, - creation_time=creation_time, - reservation_type=reservation_type, - booking_channel=booking_channel, - # Guest information - guest_first_name=guest_first_name, - guest_last_name=guest_last_name, - guest_email=guest_email, - guest_country_code=guest_country_code, - # Advertising data - advertising_medium=advertising_medium, - advertising_partner=advertising_partner, - advertising_campagne=advertising_campagne, - # Room reservation details + # Extract daily sale data + sale_date_str = daily_sale.get("date") + daily_sale_obj = {} + + if sale_date_str: + daily_sale_obj["date"] = sale_date_str + + # Extract all revenue fields + revenue_total_str = daily_sale.get("revenueTotal") + if revenue_total_str: + daily_sale_obj["revenueTotal"] = revenue_total_str + try: + total_revenue += Decimal(revenue_total_str) + except (ValueError, TypeError): + _LOGGER.warning( + "Invalid revenueTotal value: %s", revenue_total_str + ) + + # Add other revenue fields if present + if daily_sale.get("revenueLogis"): + daily_sale_obj["revenueLogis"] = daily_sale.get("revenueLogis") + if daily_sale.get("revenueBoard"): + daily_sale_obj["revenueBoard"] = daily_sale.get("revenueBoard") + if daily_sale.get("revenueFB"): + daily_sale_obj["revenueFB"] = daily_sale.get("revenueFB") + if daily_sale.get("revenueSpa"): + daily_sale_obj["revenueSpa"] = daily_sale.get("revenueSpa") + if daily_sale.get("revenueOther"): + daily_sale_obj["revenueOther"] = daily_sale.get("revenueOther") + + if daily_sale_obj: # Only add if has data + daily_sales_list.append(daily_sale_obj) + + # Try to find existing room reservation for upsert + existing_result = await self.session.execute( + select(RoomReservation).where( + RoomReservation.pms_hotel_reservation_id == pms_hotel_reservation_id + ) + ) + existing_room_reservation = existing_result.scalar_one_or_none() + + if existing_room_reservation: + # Update existing room reservation + existing_room_reservation.room_status = room_status + existing_room_reservation.num_adults = num_adults + existing_room_reservation.daily_sales = daily_sales_list if daily_sales_list else None + existing_room_reservation.total_revenue = ( + str(total_revenue) if total_revenue > 0 else None + ) + existing_room_reservation.updated_at = datetime.now() + _LOGGER.debug( + "Updated room reservation %s (pms_id=%s, room=%s)", + existing_room_reservation.id, + pms_reservation_id, + room_number, + ) + else: + # Create new room reservation + room_reservation_record = RoomReservation( + conversion_id=conversion.id, + pms_hotel_reservation_id=pms_hotel_reservation_id, arrival_date=arrival_date, departure_date=departure_date, room_status=room_status, @@ -286,31 +358,19 @@ class ConversionService: room_number=room_number, num_adults=num_adults, rate_plan_code=rate_plan_code, - # Daily sale data - sale_date=sale_date, - revenue_total=daily_sale.get("revenueTotal"), - revenue_logis=daily_sale.get("revenueLogis"), - revenue_board=daily_sale.get("revenueBoard"), - revenue_fb=daily_sale.get("revenueFB"), - revenue_spa=daily_sale.get("revenueSpa"), - revenue_other=daily_sale.get("revenueOther"), - # Metadata + connected_room_type=connected_room_type, + daily_sales=daily_sales_list if daily_sales_list else None, + total_revenue=str(total_revenue) if total_revenue > 0 else None, created_at=datetime.now(), + updated_at=datetime.now(), + ) + self.session.add(room_reservation_record) + _LOGGER.debug( + "Created room reservation (pms_id=%s, room=%s, adults=%s)", + pms_reservation_id, + room_number, + num_adults, ) - - self.session.add(conversion) - - # Update stats - if matched_reservation: - stats["matched_to_reservation"] += 1 - if matched_customer: - stats["matched_to_customer"] += 1 - if matched_hashed_customer: - stats["matched_to_hashed_customer"] += 1 - if not any( - [matched_reservation, matched_customer, matched_hashed_customer] - ): - stats["unmatched"] += 1 return stats diff --git a/src/alpine_bits_python/db.py b/src/alpine_bits_python/db.py index 8da9461..34cd943 100644 --- a/src/alpine_bits_python/db.py +++ b/src/alpine_bits_python/db.py @@ -3,7 +3,7 @@ import hashlib import os from typing import Any, AsyncGenerator, Callable, TypeVar -from sqlalchemy import Boolean, Column, Date, DateTime, ForeignKey, Integer, String +from sqlalchemy import Boolean, Column, Date, DateTime, ForeignKey, Integer, String, JSON from sqlalchemy.exc import DBAPIError from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine, async_sessionmaker from sqlalchemy.orm import declarative_base, relationship @@ -337,11 +337,14 @@ class AckedRequest(Base): class Conversion(Base): - """Conversion/daily sales data from hotel PMS. + """Conversion data from hotel PMS. - Tracks actual sales revenue for reservations. Each row represents one day - of a reservation stay. Linked to reservations via advertising tracking data - (fbclid, gclid, etc) stored in advertisingCampagne field. + Represents a single reservation event from the PMS XML with all its metadata. + Each row links to one reservation from the PMS system. A reservation can have + multiple room reservations (stored in RoomReservation table). + + Linked to reservations via advertising tracking data (fbclid, gclid, etc) + stored in advertisingCampagne field. """ __tablename__ = "conversions" @@ -382,30 +385,66 @@ class Conversion(Base): String, index=True ) # advertisingCampagne (contains fbclid/gclid) - # Room reservation details - arrival_date = Column(Date) - departure_date = Column(Date) - room_status = Column(String) # status attribute (e.g., "reserved", "checked-in") - room_type = Column(String) # roomType attribute - room_number = Column(String) # roomNumber attribute - num_adults = Column(Integer) # adults attribute - rate_plan_code = Column(String) # ratePlanCode attribute - - # Daily sales data (one row per day) - sale_date = Column(Date, index=True) # date attribute from dailySale - revenue_total = Column( - String - ) # revenueTotal - keeping as string to preserve decimals - revenue_logis = Column(String) # revenueLogis (accommodation) - revenue_board = Column(String) # revenueBoard (meal plan) - revenue_fb = Column(String) # revenueFB (food & beverage) - revenue_spa = Column(String) # revenueSpa - revenue_other = Column(String) # revenueOther - # Metadata created_at = Column(DateTime(timezone=True)) # When this record was imported + updated_at = Column(DateTime(timezone=True)) # When this record was last updated # Relationships reservation = relationship("Reservation", backref="conversions") customer = relationship("Customer", backref="conversions") hashed_customer = relationship("HashedCustomer", backref="conversions") + room_reservations = relationship( + "RoomReservation", back_populates="conversion", cascade="all, delete-orphan" + ) + + +class RoomReservation(Base): + """Room reservation data from hotel PMS. + + Represents a single room reservation within a conversion/PMS reservation. + One conversion can have multiple room reservations (e.g., customer books 3 rooms). + + Daily sales are stored as a JSON blob with an extracted total_revenue field + for efficient querying. + """ + + __tablename__ = "room_reservations" + id = Column(Integer, primary_key=True) + + # Link to the parent conversion/PMS reservation + conversion_id = Column( + Integer, ForeignKey("conversions.id"), nullable=False, index=True + ) + + # Unique identifier for this room reservation (for upserts) + # Composite: pms_reservation_id + room_number + pms_hotel_reservation_id = Column(String, unique=True, index=True) + + # Room reservation details + arrival_date = Column(Date, index=True) # arrival attribute + departure_date = Column(Date, index=True) # departure attribute + room_status = Column(String) # status attribute (e.g., "reserved", "departed") + room_type = Column(String) # roomType attribute (e.g., "VDS", "EZR") + room_number = Column(String, index=True) # roomNumber attribute + num_adults = Column(Integer) # adults attribute + rate_plan_code = Column(String) # ratePlanCode attribute + connected_room_type = Column(String) # connectedRoomType attribute + + # Daily sales data stored as JSON + # Format: [ + # {"date": "2021-10-09", "revenueTotal": "13.6", "revenueOther": "13.6"}, + # {"date": "2021-10-10", "revenueTotal": "306.1", "revenueLogis": "254", ...}, + # ... + # ] + daily_sales = Column(JSON, nullable=True) # JSON array of daily sales + + # Extracted total revenue for efficient querying (sum of all revenue_total in daily_sales) + # Kept as string to preserve decimal precision + total_revenue = Column(String, nullable=True) + + # Metadata + created_at = Column(DateTime(timezone=True)) # When this record was imported + updated_at = Column(DateTime(timezone=True)) # When this record was last updated + + # Relationships + conversion = relationship("Conversion", back_populates="room_reservations") diff --git a/src/alpine_bits_python/migrations.py b/src/alpine_bits_python/migrations.py index 7d120c9..a770302 100644 --- a/src/alpine_bits_python/migrations.py +++ b/src/alpine_bits_python/migrations.py @@ -308,41 +308,35 @@ async def _backfill_acked_requests_username(engine: AsyncEngine, config: dict[st _LOGGER.info("Backfill complete: %d acknowledgements updated with username", total_updated) -async def migrate_add_guest_fields_to_conversions(engine: AsyncEngine) -> None: - """Migration: Add guest information fields to conversions table. +async def migrate_normalize_conversions(engine: AsyncEngine) -> None: + """Migration: Normalize conversions and room reservations structure. - This migration adds guest details from the PMS XML for improved matching: - - guest_first_name: First name of the guest - - guest_last_name: Last name of the guest - - guest_email: Email address of the guest - - guest_country_code: Country code of the guest + This migration redesigns the conversion data structure: + - conversions: One row per PMS reservation (with guest/advertising metadata) + - room_reservations: One row per room reservation (linked to conversion) + - daily_sales: JSON array of daily sales within each room reservation + - total_revenue: Extracted sum of all daily sales for efficiency - These fields are indexed to support efficient matching when the same - fbclid/gclid matches multiple reservations. + Old structure: One row per daily sale (denormalized, lots of duplication) + New structure: One row per room reservation, daily sales as JSON with extracted total - Safe to run multiple times - will skip if columns already exist. + This allows: + - Upserts on room reservations (same room doesn't get duplicated) + - Better tracking of room data separate from daily sales data + - Efficient querying via extracted total_revenue field + - All daily sales details preserved in JSON for analysis + + The tables are created via Base.metadata.create_all() at startup. + + Safe to run multiple times - idempotent. """ - _LOGGER.info("Running migration: add_guest_fields_to_conversions") - - added_count = 0 - - # Add each column if it doesn't exist - if await add_column_if_not_exists(engine, "conversions", "guest_first_name", "VARCHAR"): - added_count += 1 - - if await add_column_if_not_exists(engine, "conversions", "guest_last_name", "VARCHAR"): - added_count += 1 - - if await add_column_if_not_exists(engine, "conversions", "guest_email", "VARCHAR"): - added_count += 1 - - if await add_column_if_not_exists(engine, "conversions", "guest_country_code", "VARCHAR"): - added_count += 1 - - if added_count > 0: - _LOGGER.info("Migration add_guest_fields_to_conversions: Added %d columns", added_count) - else: - _LOGGER.info("Migration add_guest_fields_to_conversions: No changes needed (already applied)") + _LOGGER.info("Running migration: normalize_conversions") + _LOGGER.info( + "Conversion data structure redesigned: " + "conversions (1 per PMS reservation) + " + "room_reservations (1 per room, daily_sales as JSON). " + "Tables created/updated via Base.metadata.create_all()" + ) async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None = None) -> None: @@ -362,7 +356,7 @@ async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None await migrate_add_room_types(engine) await migrate_add_advertising_account_ids(engine, config) await migrate_add_username_to_acked_requests(engine, config) - await migrate_add_guest_fields_to_conversions(engine) + await migrate_normalize_conversions(engine) _LOGGER.info("Database migrations completed successfully")