Files
alpinebits_python/src/alpine_bits_python/conversion_service.py
2025-11-19 14:17:13 +01:00

1228 lines
48 KiB
Python

"""Service for handling conversion data from hotel PMS XML files."""
import asyncio
import hashlib
import xml.etree.ElementTree as ET
from datetime import UTC, datetime
from decimal import Decimal
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from .db import (
Conversion,
ConversionGuest,
ConversionRoom,
Customer,
HashedCustomer,
Reservation,
SessionMaker,
)
from .logging_config import get_logger
_LOGGER = get_logger(__name__)
# Limit concurrent reservation processing to avoid overwhelming the database
MAX_CONCURRENT_RESERVATIONS = 10
class ConversionService:
"""Service for processing and storing conversion/daily sales data.
Supports two modes of operation:
1. Sequential mode: Single AsyncSession passed in, uses sequential processing
2. Concurrent mode: SessionMaker passed in, creates independent sessions per task
"""
def __init__(self, session: AsyncSession | SessionMaker | None = None):
"""Initialize the ConversionService.
Args:
session: Can be either:
- AsyncSession: Single session for sequential processing
- SessionMaker: Factory for creating sessions in concurrent mode
- None: Not recommended, but allowed for subclassing
"""
self.session = None
self.session_maker = None
self.supports_concurrent = False
# Cache for reservation and customer data within a single XML processing run
# Maps hotel_code -> list of (reservation, hashed_customer) tuples
# This significantly speeds up matching when processing large XML files
# Uses hashed data for matching to preserve privacy
self._reservation_cache: dict[
str | None, list[tuple[Reservation, HashedCustomer | None]]
] = {}
self._cache_initialized = False
if isinstance(session, SessionMaker):
self.session_maker = session
self.supports_concurrent = True
_LOGGER.info(
"ConversionService initialized in concurrent mode with SessionMaker"
)
elif isinstance(session, AsyncSession):
self.session = session
self.supports_concurrent = False
_LOGGER.info(
"ConversionService initialized in sequential mode with single session"
)
elif session is not None:
raise TypeError(
f"session must be AsyncSession or SessionMaker, got {type(session)}"
)
async def _get_or_create_conversion_guest(
self,
hotel_id: str,
guest_id: str | None,
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
guest_country_code: str | None,
guest_birth_date,
session: AsyncSession,
) -> ConversionGuest | None:
"""Get or create a ConversionGuest record for the given guest data.
Uses (hotel_id, guest_id) as the natural key to identify a guest.
If a guest with this key exists, updates it with new data.
If not, creates a new guest record.
NOTE: There is no database-level unique constraint on (hotel_id, guest_id),
so multiple ConversionGuest records can exist with the same key. This method
uses first() instead of scalar_one_or_none() to handle this gracefully and
update the most recently created record when duplicates exist.
Returns the ConversionGuest record, or None if no guest data provided.
"""
# Don't create a ConversionGuest if we have no guest information
if not any(
[guest_first_name, guest_last_name, guest_email, guest_country_code, guest_birth_date]
):
return None
now = datetime.now(UTC)
# Try to find existing guest by (hotel_id, guest_id)
if guest_id:
result = await session.execute(
select(ConversionGuest)
.where(
(ConversionGuest.hotel_id == hotel_id)
& (ConversionGuest.guest_id == guest_id)
)
.order_by(ConversionGuest.last_seen.desc()) # Get most recently updated
)
existing_guest = result.scalars().first()
if existing_guest:
# Update with new data
existing_guest.update_from_conversion_data(
guest_first_name,
guest_last_name,
guest_email,
guest_country_code,
guest_birth_date,
now,
)
return existing_guest
# Create new ConversionGuest
new_guest = ConversionGuest.create_from_conversion_data(
hotel_id=hotel_id,
guest_id=guest_id,
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
guest_birth_date=guest_birth_date,
now=now,
)
session.add(new_guest)
await session.flush() # Ensure the guest has an ID
return new_guest
async def process_conversion_xml(self, xml_content: str) -> dict[str, Any]:
"""Parse conversion XML and save daily sales data to database.
Args:
xml_content: XML string containing reservation and daily sales data
Returns:
Dictionary with processing statistics
"""
try:
root = ET.fromstring(xml_content)
except ET.ParseError as e:
_LOGGER.error("Failed to parse conversion XML: %s", e)
raise ValueError(f"Invalid XML content: {e}") from e
# Initialize cache for this XML processing run
await self._load_reservation_cache()
stats = {
"total_reservations": 0,
"deleted_reservations": 0,
"total_daily_sales": 0,
"matched_to_reservation": 0,
"matched_to_customer": 0,
"matched_to_hashed_customer": 0,
"unmatched": 0,
"errors": 0,
}
# Get a session for deleted reservations processing
if self.session_maker:
session = await self.session_maker.create_session()
else:
session = self.session
# Process deleted reservations
for deleted_res in root.findall("Deletedreservation"):
stats["deleted_reservations"] += 1
pms_reservation_id = deleted_res.get("ID")
try:
await self._handle_deleted_reservation(pms_reservation_id, session)
await session.commit()
except Exception as e:
await session.rollback()
_LOGGER.exception(
"Error deleting reservation %s: %s",
pms_reservation_id,
e,
)
stats["errors"] += 1
# Close session if created by SessionMaker
if self.session_maker:
await session.close()
# Process active reservations
reservations = root.findall("reservation")
stats["total_reservations"] = len(reservations)
if not reservations:
return stats
# Use concurrent processing if supported, otherwise sequential
if self.supports_concurrent:
await self._process_reservations_concurrent(reservations, stats)
else:
await self._process_reservations_sequential(reservations, stats)
return stats
async def _load_reservation_cache(self) -> None:
"""Load all reservations and hashed customers into cache for fast matching.
This method is called once at the start of processing a large XML file.
It loads all reservations with their associated hashed customers into an in-memory
cache organized by hotel_code. This avoids repeated database queries during
matching operations and uses hashed data for privacy-preserving matching.
The cache structure:
- Key: hotel_code (str or None)
- Value: List of (reservation, hashed_customer) tuples
This is especially beneficial for large XML files with many reservations
where matching criteria is the same across multiple reservations.
"""
if self._cache_initialized:
_LOGGER.debug("Reservation cache already initialized, skipping reload")
return
# Get a session for loading the cache
if self.session_maker:
session = await self.session_maker.create_session()
close_session = True
else:
session = self.session
close_session = False
if not session:
_LOGGER.warning("No session available for cache loading")
return
try:
# Load all reservations with their hashed customers in one query
from sqlalchemy.orm import selectinload
query = select(Reservation).options(
selectinload(Reservation.customer).selectinload(Customer.hashed_version)
)
result = await session.execute(query)
reservations = result.scalars().all()
_LOGGER.info("Loaded %d reservations into cache", len(reservations))
# Organize by hotel_code for efficient lookup
for reservation in reservations:
hotel_code = reservation.hotel_code
if hotel_code not in self._reservation_cache:
self._reservation_cache[hotel_code] = []
# Cache the hashed customer instead of raw customer
hashed_customer = None
if reservation.customer and reservation.customer.hashed_version:
hashed_customer = reservation.customer.hashed_version
self._reservation_cache[hotel_code].append(
(reservation, hashed_customer)
)
self._cache_initialized = True
_LOGGER.info(
"Reservation cache initialized with %d hotel codes",
len(self._reservation_cache),
)
except Exception as e:
_LOGGER.error("Failed to load reservation cache: %s", e)
# Cache remains empty, fall back to direct queries
self._cache_initialized = True
finally:
# Close session if we created it
if close_session:
await session.close()
async def _process_reservations_sequential(
self, reservations: list, stats: dict[str, int]
) -> None:
"""Process reservations one at a time (original behavior)."""
semaphore = asyncio.Semaphore(1) # Process one at a time
async with asyncio.TaskGroup() as tg:
for reservation in reservations:
tg.create_task(
self._process_reservation_safe(reservation, semaphore, stats)
)
async def _process_reservations_concurrent(
self, reservations: list, stats: dict[str, int]
) -> None:
"""Process reservations concurrently with semaphore limiting.
Each concurrent task gets its own independent database session
from the SessionMaker.
"""
if not self.session_maker:
_LOGGER.error(
"Concurrent processing requested but SessionMaker not available. "
"Falling back to sequential processing."
)
await self._process_reservations_sequential(reservations, stats)
return
semaphore = asyncio.Semaphore(MAX_CONCURRENT_RESERVATIONS)
async with asyncio.TaskGroup() as tg:
for reservation in reservations:
tg.create_task(
self._process_reservation_safe(reservation, semaphore, stats)
)
async def _process_reservation_safe(
self,
reservation_elem: Any,
semaphore: asyncio.Semaphore,
stats: dict[str, int],
) -> None:
"""Safely process a single reservation with semaphore and transaction management.
In concurrent mode, creates its own session from SessionMaker.
In sequential mode, uses the shared session.
Args:
reservation_elem: XML element for the reservation
semaphore: Semaphore to limit concurrent operations
stats: Shared stats dictionary (thread-safe due to GIL)
"""
pms_reservation_id = reservation_elem.get("id")
async with semaphore:
# In concurrent mode, create a new session for this task
if self.session_maker:
session = await self.session_maker.create_session()
else:
session = self.session
try:
# Process reservation with this task's session
reservation_stats = await self._process_reservation(
reservation_elem, session
)
stats["total_daily_sales"] += reservation_stats["daily_sales_count"]
stats["matched_to_reservation"] += reservation_stats.get(
"matched_to_reservation", 0
)
stats["matched_to_customer"] += reservation_stats.get(
"matched_to_customer", 0
)
stats["matched_to_hashed_customer"] += reservation_stats.get(
"matched_to_hashed_customer", 0
)
stats["unmatched"] += reservation_stats.get("unmatched", 0)
# Commit this task's transaction
await session.commit()
_LOGGER.debug(
"Successfully processed and committed reservation %s",
pms_reservation_id,
)
except Exception as e:
# Rollback this task's transaction
await session.rollback()
_LOGGER.exception(
"Error processing reservation %s: %s",
pms_reservation_id,
e,
)
stats["errors"] += 1
finally:
# Close the session if it was created by SessionMaker
if self.session_maker:
await session.close()
async def _handle_deleted_reservation(
self, pms_reservation_id: str, session: AsyncSession
):
"""Handle deleted reservation by marking conversions as deleted or removing them.
Args:
pms_reservation_id: PMS reservation ID to delete
session: AsyncSession to use for the operation
"""
# For now, we'll just log it. You could add a 'deleted' flag to the Conversion table
# or actually delete the conversion records
_LOGGER.info("Processing deleted reservation: PMS ID %s", pms_reservation_id)
# Option 1: Delete conversion records
result = await session.execute(
select(Conversion).where(
Conversion.pms_reservation_id == pms_reservation_id
)
)
conversions = result.scalars().all()
for conversion in conversions:
await session.delete(conversion)
if conversions:
_LOGGER.info(
"Deleted %d conversion records for PMS reservation %s",
len(conversions),
pms_reservation_id,
)
async def _process_reservation(
self, reservation_elem: ET.Element, session: AsyncSession | None = None
) -> dict[str, int]:
"""Process a single reservation element and its daily sales.
Args:
reservation_elem: XML element to process
session: AsyncSession to use. If None, uses self.session.
In concurrent mode, each task passes its own session.
Returns statistics about what was matched.
"""
if session is None:
session = self.session
stats = {
"daily_sales_count": 0,
"matched_to_reservation": 0,
"matched_to_customer": 0,
"matched_to_hashed_customer": 0,
"unmatched": 0,
}
# Extract reservation metadata
hotel_id = reservation_elem.get("hotelID")
pms_reservation_id = reservation_elem.get("id")
reservation_number = reservation_elem.get("number")
reservation_date_str = reservation_elem.get("date")
creation_time_str = reservation_elem.get("creationTime")
reservation_type = reservation_elem.get("type")
booking_channel = reservation_elem.get("bookingChannel")
# Extract guest information from guest element
guest_elem = reservation_elem.find("guest")
guest_first_name = None
guest_last_name = None
guest_email = None
guest_country_code = None
guest_birth_date_str = None
guest_id = None
if guest_elem is not None:
guest_first_name = guest_elem.get("firstName")
guest_last_name = guest_elem.get("lastName")
guest_email = guest_elem.get("email", None)
guest_country_code = guest_elem.get("countryCode", None)
guest_birth_date_str = guest_elem.get("dateOfBirth", None)
guest_id = guest_elem.get("id")
guest_birth_date = (
datetime.strptime(guest_birth_date_str, "%Y-%m-%d").date()
if guest_birth_date_str
else None
)
# Advertising/tracking data
advertising_medium = reservation_elem.get("advertisingMedium")
advertising_partner = reservation_elem.get("advertisingPartner")
advertising_campagne = reservation_elem.get("advertisingCampagne")
# Parse dates
reservation_date = None
if reservation_date_str:
try:
reservation_date = datetime.strptime(
reservation_date_str, "%Y-%m-%d"
).date()
except ValueError:
_LOGGER.warning(
"Invalid reservation date format: %s", reservation_date_str
)
creation_time = None
if creation_time_str:
try:
creation_time = datetime.fromisoformat(
creation_time_str.replace("Z", "+00:00")
)
except ValueError:
_LOGGER.warning("Invalid creation time format: %s", creation_time_str)
# Process all room reservations
room_reservations = reservation_elem.find("roomReservations")
if room_reservations is None:
_LOGGER.debug(
"No roomReservations found for reservation %s", pms_reservation_id
)
return stats
# Check if conversion already exists (upsert logic)
existing_result = await session.execute(
select(Conversion).where(
Conversion.pms_reservation_id == pms_reservation_id
)
)
existing_conversion = existing_result.scalar_one_or_none()
if existing_conversion:
# Update existing conversion (matching will be done later)
existing_conversion.reservation_number = reservation_number
existing_conversion.reservation_date = reservation_date
existing_conversion.creation_time = creation_time
existing_conversion.reservation_type = reservation_type
existing_conversion.booking_channel = booking_channel
existing_conversion.guest_first_name = guest_first_name
existing_conversion.guest_last_name = guest_last_name
existing_conversion.guest_email = guest_email
existing_conversion.guest_country_code = guest_country_code
existing_conversion.guest_birth_date = guest_birth_date
existing_conversion.advertising_medium = advertising_medium
existing_conversion.advertising_partner = advertising_partner
existing_conversion.advertising_campagne = advertising_campagne
existing_conversion.updated_at = datetime.now()
conversion = existing_conversion
_LOGGER.info(
"Updated conversion %s (pms_id=%s)",
conversion.id,
pms_reservation_id,
)
else:
# Create new conversion entry (without matching - will be done later)
conversion = Conversion(
# Links to existing entities (nullable, will be filled in after matching)
reservation_id=None,
customer_id=None,
hashed_customer_id=None,
# Reservation metadata
hotel_id=hotel_id,
pms_reservation_id=pms_reservation_id,
reservation_number=reservation_number,
reservation_date=reservation_date,
creation_time=creation_time,
reservation_type=reservation_type,
booking_channel=booking_channel,
# Guest information
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
guest_birth_date=guest_birth_date,
guest_id=guest_id,
# Advertising data
advertising_medium=advertising_medium,
advertising_partner=advertising_partner,
advertising_campagne=advertising_campagne,
# Metadata
created_at=datetime.now(),
updated_at=datetime.now(),
)
session.add(conversion)
_LOGGER.debug(
"Created conversion (pms_id=%s)",
pms_reservation_id,
)
# Flush to ensure conversion has an ID before creating room reservations
await session.flush()
# Create or update ConversionGuest and link it to the conversion
conversion_guest = await self._get_or_create_conversion_guest(
hotel_id=hotel_id,
guest_id=guest_id,
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
guest_birth_date=guest_birth_date,
session=session,
)
if conversion_guest:
conversion.conversion_guest_id = conversion_guest.id
# Batch-load existing room reservations to avoid N+1 queries
room_numbers = [
rm.get("roomNumber") for rm in room_reservations.findall("roomReservation")
]
pms_hotel_reservation_ids = [
f"{pms_reservation_id}_{room_num}" for room_num in room_numbers
]
existing_rooms_result = await session.execute(
select(ConversionRoom).where(
ConversionRoom.pms_hotel_reservation_id.in_(pms_hotel_reservation_ids)
)
)
existing_rooms = {
room.pms_hotel_reservation_id: room
for room in existing_rooms_result.scalars().all()
}
# Process room reservations
for room_reservation in room_reservations.findall("roomReservation"):
# Extract room reservation details
arrival_str = room_reservation.get("arrival")
departure_str = room_reservation.get("departure")
room_status = room_reservation.get("status")
room_type = room_reservation.get("roomType")
room_number = room_reservation.get("roomNumber")
adults_str = room_reservation.get("adults")
rate_plan_code = room_reservation.get("ratePlanCode")
connected_room_type = room_reservation.get("connectedRoomType")
arrival_date = None
if arrival_str:
try:
arrival_date = datetime.strptime(arrival_str, "%Y-%m-%d").date()
except ValueError:
_LOGGER.warning("Invalid arrival date format: %s", arrival_str)
departure_date = None
if departure_str:
try:
departure_date = datetime.strptime(departure_str, "%Y-%m-%d").date()
except ValueError:
_LOGGER.warning("Invalid departure date format: %s", departure_str)
num_adults = None
if adults_str:
try:
num_adults = int(adults_str)
except ValueError:
_LOGGER.warning("Invalid adults value: %s", adults_str)
# Create composite ID for upsert: pms_reservation_id + room_number
# This allows updating the same room reservation if it appears again
pms_hotel_reservation_id = f"{pms_reservation_id}_{room_number}"
# Process daily sales and extract total revenue
daily_sales_elem = room_reservation.find("dailySales")
daily_sales_list = []
total_revenue = Decimal(0)
if daily_sales_elem is not None:
for daily_sale in daily_sales_elem.findall("dailySale"):
stats["daily_sales_count"] += 1
# Extract daily sale data
sale_date_str = daily_sale.get("date")
daily_sale_obj = {}
if sale_date_str:
daily_sale_obj["date"] = sale_date_str
# Extract all revenue fields
revenue_total_str = daily_sale.get("revenueTotal")
if revenue_total_str:
daily_sale_obj["revenueTotal"] = revenue_total_str
try:
total_revenue += Decimal(revenue_total_str)
except (ValueError, TypeError):
_LOGGER.warning(
"Invalid revenueTotal value: %s", revenue_total_str
)
# Add other revenue fields if present
if daily_sale.get("revenueLogis"):
daily_sale_obj["revenueLogis"] = daily_sale.get("revenueLogis")
if daily_sale.get("revenueBoard"):
daily_sale_obj["revenueBoard"] = daily_sale.get("revenueBoard")
if daily_sale.get("revenueFB"):
daily_sale_obj["revenueFB"] = daily_sale.get("revenueFB")
if daily_sale.get("revenueSpa"):
daily_sale_obj["revenueSpa"] = daily_sale.get("revenueSpa")
if daily_sale.get("revenueOther"):
daily_sale_obj["revenueOther"] = daily_sale.get("revenueOther")
if daily_sale_obj: # Only add if has data
daily_sales_list.append(daily_sale_obj)
# Check if room reservation already exists using batch-loaded data
existing_room_reservation = existing_rooms.get(pms_hotel_reservation_id)
if total_revenue > 0 and (
guest_first_name is None
and guest_last_name is None
and guest_email is None
):
_LOGGER.info(
"Guest info missing but total revenue > 0 for PMS ID %s",
pms_reservation_id,
)
if existing_room_reservation:
# Update existing room reservation with all fields
existing_room_reservation.arrival_date = arrival_date
existing_room_reservation.departure_date = departure_date
existing_room_reservation.room_status = room_status
existing_room_reservation.room_type = room_type
existing_room_reservation.num_adults = num_adults
existing_room_reservation.rate_plan_code = rate_plan_code
existing_room_reservation.connected_room_type = connected_room_type
existing_room_reservation.daily_sales = (
daily_sales_list if daily_sales_list else None
)
existing_room_reservation.total_revenue = (
total_revenue if total_revenue > 0 else None
)
existing_room_reservation.updated_at = datetime.now()
_LOGGER.debug(
"Updated room reservation %s (pms_id=%s, room=%s)",
existing_room_reservation.id,
pms_reservation_id,
room_number,
)
else:
# Create new room reservation
room_reservation_record = ConversionRoom(
conversion_id=conversion.id,
pms_hotel_reservation_id=pms_hotel_reservation_id,
arrival_date=arrival_date,
departure_date=departure_date,
room_status=room_status,
room_type=room_type,
room_number=room_number,
num_adults=num_adults,
rate_plan_code=rate_plan_code,
connected_room_type=connected_room_type,
daily_sales=daily_sales_list if daily_sales_list else None,
total_revenue=total_revenue if total_revenue > 0 else None,
created_at=datetime.now(),
updated_at=datetime.now(),
)
session.add(room_reservation_record)
_LOGGER.debug(
"Created room reservation (pms_id=%s, room=%s, adults=%s)",
pms_reservation_id,
room_number,
num_adults,
)
# Now that conversion, conversion_guest, and conversion_room records exist,
# perform matching using hashed guest data from conversion_guest
matched_reservation = None
matched_customer = None
matched_hashed_customer = None
if advertising_campagne or True:
# Use hashed data from conversion_guest for matching
hashed_first_name = conversion_guest.hashed_first_name if conversion_guest else None
hashed_last_name = conversion_guest.hashed_last_name if conversion_guest else None
hashed_email = conversion_guest.hashed_email if conversion_guest else None
match_result = await self._find_matching_entities(
advertising_campagne,
hotel_id,
reservation_date,
hashed_first_name,
hashed_last_name,
hashed_email,
advertising_partner,
session,
)
matched_reservation = match_result["reservation"]
matched_customer = match_result["customer"]
matched_hashed_customer = match_result["hashed_customer"]
# Update the conversion with matched entities if found
if matched_reservation or matched_customer or matched_hashed_customer:
conversion.reservation_id = (
matched_reservation.id if matched_reservation else None
)
conversion.customer_id = (
matched_customer.id if matched_customer else None
)
conversion.hashed_customer_id = (
matched_hashed_customer.id if matched_hashed_customer else None
)
conversion.updated_at = datetime.now()
# Update stats for the conversion record
if matched_reservation:
stats["matched_to_reservation"] += 1
if matched_customer:
stats["matched_to_customer"] += 1
if matched_hashed_customer:
stats["matched_to_hashed_customer"] += 1
if not any([matched_reservation, matched_customer, matched_hashed_customer]):
stats["unmatched"] += 1
return stats
async def _find_matching_entities(
self,
advertising_campagne: str,
hotel_id: str | None,
reservation_date: Any,
guest_first_name: str | None = None,
guest_last_name: str | None = None,
guest_email: str | None = None,
advertising_partner: str | None = None,
session: AsyncSession | None = None,
) -> dict[str, Any]:
"""Find matching Reservation, Customer, and HashedCustomer.
Uses two strategies:
1. Advertising data matching (fbclid/gclid/utm_campaign) with guest details fallback
2. If no advertising data match, falls back to email/name-based matching
Args:
advertising_campagne: Truncated tracking ID from conversion XML
hotel_id: Hotel ID for additional filtering
reservation_date: Reservation date for additional filtering
guest_first_name: Guest first name for matching
guest_last_name: Guest last name for matching
guest_email: Guest email for matching
advertising_partner: Partner info (matches utm_medium for additional filtering)
session: AsyncSession to use. If None, uses self.session.
Returns:
Dictionary with 'reservation', 'customer', and 'hashed_customer' keys
"""
if session is None:
session = self.session
result = {
"reservation": None,
"customer": None,
"hashed_customer": None,
}
# Strategy 1: Try to match by advertising data (fbclid/gclid/utm_campaign)
if advertising_campagne:
matched_reservation = await self._match_by_advertising(
advertising_campagne,
hotel_id,
guest_first_name,
guest_last_name,
guest_email,
advertising_partner,
session,
)
if matched_reservation:
result["reservation"] = matched_reservation
_LOGGER.info(
"Matched conversion by advertising data (advertisingCampagne=%s, hotel=%s)",
advertising_campagne,
hotel_id,
)
else:
_LOGGER.debug(
"No match found by advertising data (advertisingCampagne=%s), "
"falling back to email/name matching",
advertising_campagne,
)
# Strategy 2: If no advertising match, try email/name-based matching
if not result["reservation"] and (
guest_email or guest_first_name or guest_last_name
):
matched_reservation = await self._match_by_guest_details(
hotel_id, guest_first_name, guest_last_name, guest_email, session
)
if matched_reservation:
result["reservation"] = matched_reservation
_LOGGER.info(
"Matched conversion by guest details (name=%s %s, email=%s, hotel=%s)",
guest_first_name,
guest_last_name,
guest_email,
hotel_id,
)
else:
_LOGGER.debug(
"No match found by guest details (name=%s %s, email=%s)",
guest_first_name,
guest_last_name,
guest_email,
)
# If we found a reservation, get its customer and hashed_customer
if result["reservation"]:
if result["reservation"].customer_id:
customer_query = select(Customer).where(
Customer.id == result["reservation"].customer_id
)
customer_result = await session.execute(customer_query)
result["customer"] = customer_result.scalar_one_or_none()
# Get hashed customer
if result["customer"]:
hashed_query = select(HashedCustomer).where(
HashedCustomer.customer_id == result["customer"].id
)
hashed_result = await session.execute(hashed_query)
result["hashed_customer"] = hashed_result.scalar_one_or_none()
return result
async def _match_by_advertising(
self,
advertising_campagne: str,
hotel_id: str | None,
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
advertising_partner: str | None,
session: AsyncSession | None = None,
) -> Reservation | None:
"""Match reservation by advertising tracking data (fbclid/gclid/md5_unique_id).
Args:
advertising_campagne: Tracking ID from PMS (could be truncated click_id or md5_unique_id)
hotel_id: Hotel ID for filtering
guest_first_name: Guest first name for disambiguation
guest_last_name: Guest last name for disambiguation
guest_email: Guest email for disambiguation
advertising_partner: Partner info (matches utm_medium)
session: AsyncSession to use. If None, uses self.session.
Returns:
Matched Reservation or None
"""
if session is None:
session = self.session
# Find reservations where:
# - fbclid/gclid starts with the advertising_campagne value, OR
# - md5_unique_id matches exactly (for direct ID matching)
query = select(Reservation).where(
or_(
Reservation.fbclid.like(f"{advertising_campagne}%"),
Reservation.gclid.like(f"{advertising_campagne}%"),
Reservation.md5_unique_id == advertising_campagne,
)
)
# Eagerly load the customer relationship
query = query.options(selectinload(Reservation.customer))
# Add hotel filter if available
if hotel_id:
query = query.where(Reservation.hotel_code == hotel_id)
# Execute query
db_result = await session.execute(query)
reservations = db_result.scalars().all()
if not reservations:
return None
# If single match, return it
if len(reservations) == 1:
return reservations[0]
# If multiple matches, try to narrow down using guest details
_LOGGER.debug(
"Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. "
"Attempting to narrow down using guest details.",
advertising_campagne,
hotel_id,
len(reservations),
)
matched_reservation = self._filter_reservations_by_guest_details(
reservations,
guest_first_name,
guest_last_name,
guest_email,
advertising_partner,
)
if matched_reservation is None:
# If we still can't narrow it down, use the first match and log warning
_LOGGER.warning(
"Could not narrow down multiple reservations for advertisingCampagne %s "
"(hotel=%s, guest=%s %s, email=%s). Using first match.",
advertising_campagne,
hotel_id,
guest_first_name,
guest_last_name,
guest_email,
)
matched_reservation = reservations[0]
return matched_reservation
async def _match_by_guest_details(
self,
hotel_id: str | None,
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
session: AsyncSession | None = None,
) -> Reservation | None:
"""Match reservation by guest name and email using cached data.
This method uses the reservation cache populated at the start of XML processing.
If cache is not available, falls back to database queries.
Args:
hotel_id: Hotel ID for filtering
guest_first_name: Guest first name
guest_last_name: Guest last name
guest_email: Guest email
session: AsyncSession to use. If None, uses self.session.
Returns:
Matched Reservation or None
"""
if session is None:
session = self.session
# Try to use cache first
if self._cache_initialized and self._reservation_cache:
all_reservations = []
# Get reservations from cache for this hotel
if hotel_id and hotel_id in self._reservation_cache:
all_reservations = [res for res, _ in self._reservation_cache[hotel_id]]
elif not hotel_id:
# If no hotel_id specified, use all cached reservations
for reservations_list in self._reservation_cache.values():
all_reservations.extend([res for res, _ in reservations_list])
if all_reservations:
_LOGGER.debug(
"Using cached reservations for matching (hotel=%s, count=%d)",
hotel_id,
len(all_reservations),
)
return self._match_reservations_by_guest_details(
all_reservations,
guest_first_name,
guest_last_name,
guest_email,
)
# Fallback: Query database if cache is not available or empty
_LOGGER.debug(
"Cache unavailable or empty, falling back to database query (hotel=%s)",
hotel_id,
)
from sqlalchemy.orm import selectinload
query = select(Reservation).options(
selectinload(Reservation.customer).selectinload(Customer.hashed_version)
)
if hotel_id:
query = query.where(Reservation.hotel_code == hotel_id)
db_result = await session.execute(query)
all_reservations = db_result.scalars().all()
return self._match_reservations_by_guest_details(
all_reservations, guest_first_name, guest_last_name, guest_email
)
def _match_reservations_by_guest_details(
self,
reservations: list[Reservation],
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
) -> Reservation | None:
"""Match a reservation from a list by guest name and email (non-async).
Uses hashed data matching for privacy. The guest_first_name, guest_last_name,
and guest_email parameters should be pre-hashed values from ConversionGuest.
Compares hashed values directly against hashed_customer hash fields.
Args:
reservations: List of reservations to search through
guest_first_name: Pre-hashed guest first name (or None)
guest_last_name: Pre-hashed guest last name (or None)
guest_email: Pre-hashed guest email (or None)
Returns:
Matched Reservation or None
"""
# Filter by guest details using hashed data
candidates = []
for reservation in reservations:
customer = reservation.customer
if not customer:
continue
# Access hashed_version - handle both direct objects and relationships
hashed_customer = customer.hashed_version
if not hashed_customer:
continue
# Defensive check: ensure hashed_customer is actually a HashedCustomer object
# (not an InstrumentedList or other SQLAlchemy construct)
if not hasattr(hashed_customer, 'hashed_email'):
_LOGGER.debug(
"Customer %s has invalid hashed_version type: %s",
customer.id,
type(hashed_customer),
)
continue
# Match by email (highest priority) using hashed comparison
if guest_email and hashed_customer.hashed_email:
if hashed_customer.hashed_email == guest_email:
_LOGGER.info(
"Found exact email match (reservation_id=%s) via hash",
reservation.id,
)
candidates.append((reservation, 3)) # Highest score
continue
# Match by name (first + last) using hashed comparison
if guest_first_name and guest_last_name:
first_match = (
hashed_customer.hashed_given_name
and hashed_customer.hashed_given_name == guest_first_name
)
last_match = (
hashed_customer.hashed_surname
and hashed_customer.hashed_surname == guest_last_name
)
if first_match and last_match:
_LOGGER.info(
"Found exact name match (reservation_id=%s) via hash",
reservation.id,
)
candidates.append((reservation, 2)) # Medium-high score
continue
# Return highest-scoring match
if candidates:
candidates.sort(key=lambda x: x[1], reverse=True)
return candidates[0][0]
return None
def _filter_reservations_by_guest_details(
self,
reservations: list[Reservation],
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
advertising_partner: str | None,
) -> Reservation | None:
"""Filter reservations using guest details to find a single match.
First tries to match by guest name and email. If that doesn't yield a single match,
tries matching by advertising_partner against utm_medium.
Args:
reservations: List of candidate reservations
guest_first_name: Guest first name
guest_last_name: Guest last name
guest_email: Guest email
advertising_partner: Partner info (e.g., "Facebook_Mobile_Feed")
Returns:
Single best-match Reservation, or None if no good match found
"""
candidates = reservations
# Try to narrow down by guest name and email
if guest_first_name or guest_last_name or guest_email:
# First try exact match on all available fields
for reservation in candidates:
customer = reservation.customer
if customer:
name_match = True
email_match = True
if guest_first_name:
name_match = name_match and (
customer.given_name
and customer.given_name.lower() == guest_first_name.lower()
)
if guest_last_name:
name_match = name_match and (
customer.surname
and customer.surname.lower() == guest_last_name.lower()
)
if guest_email:
email_match = (
customer.email_address
and customer.email_address.lower() == guest_email.lower()
)
if name_match and email_match:
_LOGGER.debug(
"Found exact match on guest name/email for %s %s",
guest_first_name,
guest_last_name,
)
return reservation
# Try to narrow down by advertising_partner matching utm_medium
if advertising_partner:
for reservation in candidates:
if (
reservation.utm_medium
and reservation.utm_medium.lower() == advertising_partner.lower()
):
_LOGGER.debug(
"Found match on advertising_partner=%s matching utm_medium",
advertising_partner,
)
return reservation
# No single clear match found
return None