1228 lines
48 KiB
Python
1228 lines
48 KiB
Python
"""Service for handling conversion data from hotel PMS XML files."""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import xml.etree.ElementTree as ET
|
|
from datetime import UTC, datetime
|
|
from decimal import Decimal
|
|
from typing import Any
|
|
|
|
from sqlalchemy import or_, select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy.orm import selectinload
|
|
|
|
from .db import (
|
|
Conversion,
|
|
ConversionGuest,
|
|
ConversionRoom,
|
|
Customer,
|
|
HashedCustomer,
|
|
Reservation,
|
|
SessionMaker,
|
|
)
|
|
from .logging_config import get_logger
|
|
|
|
_LOGGER = get_logger(__name__)
|
|
|
|
# Limit concurrent reservation processing to avoid overwhelming the database
|
|
MAX_CONCURRENT_RESERVATIONS = 10
|
|
|
|
|
|
class ConversionService:
|
|
"""Service for processing and storing conversion/daily sales data.
|
|
|
|
Supports two modes of operation:
|
|
1. Sequential mode: Single AsyncSession passed in, uses sequential processing
|
|
2. Concurrent mode: SessionMaker passed in, creates independent sessions per task
|
|
"""
|
|
|
|
def __init__(self, session: AsyncSession | SessionMaker | None = None):
|
|
"""Initialize the ConversionService.
|
|
|
|
Args:
|
|
session: Can be either:
|
|
- AsyncSession: Single session for sequential processing
|
|
- SessionMaker: Factory for creating sessions in concurrent mode
|
|
- None: Not recommended, but allowed for subclassing
|
|
|
|
"""
|
|
self.session = None
|
|
self.session_maker = None
|
|
self.supports_concurrent = False
|
|
|
|
# Cache for reservation and customer data within a single XML processing run
|
|
# Maps hotel_code -> list of (reservation, hashed_customer) tuples
|
|
# This significantly speeds up matching when processing large XML files
|
|
# Uses hashed data for matching to preserve privacy
|
|
self._reservation_cache: dict[
|
|
str | None, list[tuple[Reservation, HashedCustomer | None]]
|
|
] = {}
|
|
self._cache_initialized = False
|
|
|
|
if isinstance(session, SessionMaker):
|
|
self.session_maker = session
|
|
self.supports_concurrent = True
|
|
_LOGGER.info(
|
|
"ConversionService initialized in concurrent mode with SessionMaker"
|
|
)
|
|
elif isinstance(session, AsyncSession):
|
|
self.session = session
|
|
self.supports_concurrent = False
|
|
_LOGGER.info(
|
|
"ConversionService initialized in sequential mode with single session"
|
|
)
|
|
elif session is not None:
|
|
raise TypeError(
|
|
f"session must be AsyncSession or SessionMaker, got {type(session)}"
|
|
)
|
|
|
|
async def _get_or_create_conversion_guest(
|
|
self,
|
|
hotel_id: str,
|
|
guest_id: str | None,
|
|
guest_first_name: str | None,
|
|
guest_last_name: str | None,
|
|
guest_email: str | None,
|
|
guest_country_code: str | None,
|
|
guest_birth_date,
|
|
session: AsyncSession,
|
|
) -> ConversionGuest | None:
|
|
"""Get or create a ConversionGuest record for the given guest data.
|
|
|
|
Uses (hotel_id, guest_id) as the natural key to identify a guest.
|
|
If a guest with this key exists, updates it with new data.
|
|
If not, creates a new guest record.
|
|
|
|
NOTE: There is no database-level unique constraint on (hotel_id, guest_id),
|
|
so multiple ConversionGuest records can exist with the same key. This method
|
|
uses first() instead of scalar_one_or_none() to handle this gracefully and
|
|
update the most recently created record when duplicates exist.
|
|
|
|
Returns the ConversionGuest record, or None if no guest data provided.
|
|
"""
|
|
# Don't create a ConversionGuest if we have no guest information
|
|
if not any(
|
|
[guest_first_name, guest_last_name, guest_email, guest_country_code, guest_birth_date]
|
|
):
|
|
return None
|
|
|
|
now = datetime.now(UTC)
|
|
|
|
# Try to find existing guest by (hotel_id, guest_id)
|
|
if guest_id:
|
|
result = await session.execute(
|
|
select(ConversionGuest)
|
|
.where(
|
|
(ConversionGuest.hotel_id == hotel_id)
|
|
& (ConversionGuest.guest_id == guest_id)
|
|
)
|
|
.order_by(ConversionGuest.last_seen.desc()) # Get most recently updated
|
|
)
|
|
existing_guest = result.scalars().first()
|
|
|
|
if existing_guest:
|
|
# Update with new data
|
|
existing_guest.update_from_conversion_data(
|
|
guest_first_name,
|
|
guest_last_name,
|
|
guest_email,
|
|
guest_country_code,
|
|
guest_birth_date,
|
|
now,
|
|
)
|
|
return existing_guest
|
|
|
|
# Create new ConversionGuest
|
|
new_guest = ConversionGuest.create_from_conversion_data(
|
|
hotel_id=hotel_id,
|
|
guest_id=guest_id,
|
|
guest_first_name=guest_first_name,
|
|
guest_last_name=guest_last_name,
|
|
guest_email=guest_email,
|
|
guest_country_code=guest_country_code,
|
|
guest_birth_date=guest_birth_date,
|
|
now=now,
|
|
)
|
|
session.add(new_guest)
|
|
await session.flush() # Ensure the guest has an ID
|
|
return new_guest
|
|
|
|
async def process_conversion_xml(self, xml_content: str) -> dict[str, Any]:
|
|
"""Parse conversion XML and save daily sales data to database.
|
|
|
|
Args:
|
|
xml_content: XML string containing reservation and daily sales data
|
|
|
|
Returns:
|
|
Dictionary with processing statistics
|
|
|
|
"""
|
|
try:
|
|
root = ET.fromstring(xml_content)
|
|
except ET.ParseError as e:
|
|
_LOGGER.error("Failed to parse conversion XML: %s", e)
|
|
raise ValueError(f"Invalid XML content: {e}") from e
|
|
|
|
# Initialize cache for this XML processing run
|
|
await self._load_reservation_cache()
|
|
|
|
stats = {
|
|
"total_reservations": 0,
|
|
"deleted_reservations": 0,
|
|
"total_daily_sales": 0,
|
|
"matched_to_reservation": 0,
|
|
"matched_to_customer": 0,
|
|
"matched_to_hashed_customer": 0,
|
|
"unmatched": 0,
|
|
"errors": 0,
|
|
}
|
|
|
|
# Get a session for deleted reservations processing
|
|
if self.session_maker:
|
|
session = await self.session_maker.create_session()
|
|
else:
|
|
session = self.session
|
|
|
|
# Process deleted reservations
|
|
for deleted_res in root.findall("Deletedreservation"):
|
|
stats["deleted_reservations"] += 1
|
|
pms_reservation_id = deleted_res.get("ID")
|
|
try:
|
|
await self._handle_deleted_reservation(pms_reservation_id, session)
|
|
await session.commit()
|
|
except Exception as e:
|
|
await session.rollback()
|
|
_LOGGER.exception(
|
|
"Error deleting reservation %s: %s",
|
|
pms_reservation_id,
|
|
e,
|
|
)
|
|
stats["errors"] += 1
|
|
|
|
# Close session if created by SessionMaker
|
|
if self.session_maker:
|
|
await session.close()
|
|
|
|
# Process active reservations
|
|
reservations = root.findall("reservation")
|
|
stats["total_reservations"] = len(reservations)
|
|
|
|
if not reservations:
|
|
return stats
|
|
|
|
# Use concurrent processing if supported, otherwise sequential
|
|
if self.supports_concurrent:
|
|
await self._process_reservations_concurrent(reservations, stats)
|
|
else:
|
|
await self._process_reservations_sequential(reservations, stats)
|
|
|
|
return stats
|
|
|
|
async def _load_reservation_cache(self) -> None:
|
|
"""Load all reservations and hashed customers into cache for fast matching.
|
|
|
|
This method is called once at the start of processing a large XML file.
|
|
It loads all reservations with their associated hashed customers into an in-memory
|
|
cache organized by hotel_code. This avoids repeated database queries during
|
|
matching operations and uses hashed data for privacy-preserving matching.
|
|
|
|
The cache structure:
|
|
- Key: hotel_code (str or None)
|
|
- Value: List of (reservation, hashed_customer) tuples
|
|
|
|
This is especially beneficial for large XML files with many reservations
|
|
where matching criteria is the same across multiple reservations.
|
|
"""
|
|
if self._cache_initialized:
|
|
_LOGGER.debug("Reservation cache already initialized, skipping reload")
|
|
return
|
|
|
|
# Get a session for loading the cache
|
|
if self.session_maker:
|
|
session = await self.session_maker.create_session()
|
|
close_session = True
|
|
else:
|
|
session = self.session
|
|
close_session = False
|
|
|
|
if not session:
|
|
_LOGGER.warning("No session available for cache loading")
|
|
return
|
|
|
|
try:
|
|
# Load all reservations with their hashed customers in one query
|
|
from sqlalchemy.orm import selectinload
|
|
query = select(Reservation).options(
|
|
selectinload(Reservation.customer).selectinload(Customer.hashed_version)
|
|
)
|
|
result = await session.execute(query)
|
|
reservations = result.scalars().all()
|
|
|
|
_LOGGER.info("Loaded %d reservations into cache", len(reservations))
|
|
|
|
# Organize by hotel_code for efficient lookup
|
|
for reservation in reservations:
|
|
hotel_code = reservation.hotel_code
|
|
if hotel_code not in self._reservation_cache:
|
|
self._reservation_cache[hotel_code] = []
|
|
# Cache the hashed customer instead of raw customer
|
|
hashed_customer = None
|
|
if reservation.customer and reservation.customer.hashed_version:
|
|
hashed_customer = reservation.customer.hashed_version
|
|
self._reservation_cache[hotel_code].append(
|
|
(reservation, hashed_customer)
|
|
)
|
|
|
|
self._cache_initialized = True
|
|
_LOGGER.info(
|
|
"Reservation cache initialized with %d hotel codes",
|
|
len(self._reservation_cache),
|
|
)
|
|
except Exception as e:
|
|
_LOGGER.error("Failed to load reservation cache: %s", e)
|
|
# Cache remains empty, fall back to direct queries
|
|
self._cache_initialized = True
|
|
finally:
|
|
# Close session if we created it
|
|
if close_session:
|
|
await session.close()
|
|
|
|
async def _process_reservations_sequential(
|
|
self, reservations: list, stats: dict[str, int]
|
|
) -> None:
|
|
"""Process reservations one at a time (original behavior)."""
|
|
semaphore = asyncio.Semaphore(1) # Process one at a time
|
|
async with asyncio.TaskGroup() as tg:
|
|
for reservation in reservations:
|
|
tg.create_task(
|
|
self._process_reservation_safe(reservation, semaphore, stats)
|
|
)
|
|
|
|
async def _process_reservations_concurrent(
|
|
self, reservations: list, stats: dict[str, int]
|
|
) -> None:
|
|
"""Process reservations concurrently with semaphore limiting.
|
|
|
|
Each concurrent task gets its own independent database session
|
|
from the SessionMaker.
|
|
"""
|
|
if not self.session_maker:
|
|
_LOGGER.error(
|
|
"Concurrent processing requested but SessionMaker not available. "
|
|
"Falling back to sequential processing."
|
|
)
|
|
await self._process_reservations_sequential(reservations, stats)
|
|
return
|
|
|
|
semaphore = asyncio.Semaphore(MAX_CONCURRENT_RESERVATIONS)
|
|
async with asyncio.TaskGroup() as tg:
|
|
for reservation in reservations:
|
|
tg.create_task(
|
|
self._process_reservation_safe(reservation, semaphore, stats)
|
|
)
|
|
|
|
async def _process_reservation_safe(
|
|
self,
|
|
reservation_elem: Any,
|
|
semaphore: asyncio.Semaphore,
|
|
stats: dict[str, int],
|
|
) -> None:
|
|
"""Safely process a single reservation with semaphore and transaction management.
|
|
|
|
In concurrent mode, creates its own session from SessionMaker.
|
|
In sequential mode, uses the shared session.
|
|
|
|
Args:
|
|
reservation_elem: XML element for the reservation
|
|
semaphore: Semaphore to limit concurrent operations
|
|
stats: Shared stats dictionary (thread-safe due to GIL)
|
|
|
|
"""
|
|
pms_reservation_id = reservation_elem.get("id")
|
|
|
|
async with semaphore:
|
|
# In concurrent mode, create a new session for this task
|
|
if self.session_maker:
|
|
session = await self.session_maker.create_session()
|
|
else:
|
|
session = self.session
|
|
|
|
try:
|
|
# Process reservation with this task's session
|
|
reservation_stats = await self._process_reservation(
|
|
reservation_elem, session
|
|
)
|
|
stats["total_daily_sales"] += reservation_stats["daily_sales_count"]
|
|
stats["matched_to_reservation"] += reservation_stats.get(
|
|
"matched_to_reservation", 0
|
|
)
|
|
stats["matched_to_customer"] += reservation_stats.get(
|
|
"matched_to_customer", 0
|
|
)
|
|
stats["matched_to_hashed_customer"] += reservation_stats.get(
|
|
"matched_to_hashed_customer", 0
|
|
)
|
|
stats["unmatched"] += reservation_stats.get("unmatched", 0)
|
|
|
|
# Commit this task's transaction
|
|
await session.commit()
|
|
_LOGGER.debug(
|
|
"Successfully processed and committed reservation %s",
|
|
pms_reservation_id,
|
|
)
|
|
|
|
except Exception as e:
|
|
# Rollback this task's transaction
|
|
await session.rollback()
|
|
_LOGGER.exception(
|
|
"Error processing reservation %s: %s",
|
|
pms_reservation_id,
|
|
e,
|
|
)
|
|
stats["errors"] += 1
|
|
finally:
|
|
# Close the session if it was created by SessionMaker
|
|
if self.session_maker:
|
|
await session.close()
|
|
|
|
async def _handle_deleted_reservation(
|
|
self, pms_reservation_id: str, session: AsyncSession
|
|
):
|
|
"""Handle deleted reservation by marking conversions as deleted or removing them.
|
|
|
|
Args:
|
|
pms_reservation_id: PMS reservation ID to delete
|
|
session: AsyncSession to use for the operation
|
|
|
|
"""
|
|
# For now, we'll just log it. You could add a 'deleted' flag to the Conversion table
|
|
# or actually delete the conversion records
|
|
_LOGGER.info("Processing deleted reservation: PMS ID %s", pms_reservation_id)
|
|
|
|
# Option 1: Delete conversion records
|
|
result = await session.execute(
|
|
select(Conversion).where(
|
|
Conversion.pms_reservation_id == pms_reservation_id
|
|
)
|
|
)
|
|
conversions = result.scalars().all()
|
|
|
|
for conversion in conversions:
|
|
await session.delete(conversion)
|
|
|
|
if conversions:
|
|
_LOGGER.info(
|
|
"Deleted %d conversion records for PMS reservation %s",
|
|
len(conversions),
|
|
pms_reservation_id,
|
|
)
|
|
|
|
async def _process_reservation(
|
|
self, reservation_elem: ET.Element, session: AsyncSession | None = None
|
|
) -> dict[str, int]:
|
|
"""Process a single reservation element and its daily sales.
|
|
|
|
Args:
|
|
reservation_elem: XML element to process
|
|
session: AsyncSession to use. If None, uses self.session.
|
|
In concurrent mode, each task passes its own session.
|
|
|
|
Returns statistics about what was matched.
|
|
|
|
"""
|
|
if session is None:
|
|
session = self.session
|
|
stats = {
|
|
"daily_sales_count": 0,
|
|
"matched_to_reservation": 0,
|
|
"matched_to_customer": 0,
|
|
"matched_to_hashed_customer": 0,
|
|
"unmatched": 0,
|
|
}
|
|
|
|
# Extract reservation metadata
|
|
hotel_id = reservation_elem.get("hotelID")
|
|
pms_reservation_id = reservation_elem.get("id")
|
|
reservation_number = reservation_elem.get("number")
|
|
reservation_date_str = reservation_elem.get("date")
|
|
creation_time_str = reservation_elem.get("creationTime")
|
|
reservation_type = reservation_elem.get("type")
|
|
booking_channel = reservation_elem.get("bookingChannel")
|
|
|
|
# Extract guest information from guest element
|
|
guest_elem = reservation_elem.find("guest")
|
|
guest_first_name = None
|
|
guest_last_name = None
|
|
guest_email = None
|
|
guest_country_code = None
|
|
guest_birth_date_str = None
|
|
guest_id = None
|
|
|
|
if guest_elem is not None:
|
|
guest_first_name = guest_elem.get("firstName")
|
|
guest_last_name = guest_elem.get("lastName")
|
|
guest_email = guest_elem.get("email", None)
|
|
guest_country_code = guest_elem.get("countryCode", None)
|
|
guest_birth_date_str = guest_elem.get("dateOfBirth", None)
|
|
guest_id = guest_elem.get("id")
|
|
|
|
guest_birth_date = (
|
|
datetime.strptime(guest_birth_date_str, "%Y-%m-%d").date()
|
|
if guest_birth_date_str
|
|
else None
|
|
)
|
|
|
|
# Advertising/tracking data
|
|
advertising_medium = reservation_elem.get("advertisingMedium")
|
|
advertising_partner = reservation_elem.get("advertisingPartner")
|
|
advertising_campagne = reservation_elem.get("advertisingCampagne")
|
|
|
|
# Parse dates
|
|
reservation_date = None
|
|
if reservation_date_str:
|
|
try:
|
|
reservation_date = datetime.strptime(
|
|
reservation_date_str, "%Y-%m-%d"
|
|
).date()
|
|
except ValueError:
|
|
_LOGGER.warning(
|
|
"Invalid reservation date format: %s", reservation_date_str
|
|
)
|
|
|
|
creation_time = None
|
|
if creation_time_str:
|
|
try:
|
|
creation_time = datetime.fromisoformat(
|
|
creation_time_str.replace("Z", "+00:00")
|
|
)
|
|
except ValueError:
|
|
_LOGGER.warning("Invalid creation time format: %s", creation_time_str)
|
|
|
|
# Process all room reservations
|
|
room_reservations = reservation_elem.find("roomReservations")
|
|
if room_reservations is None:
|
|
_LOGGER.debug(
|
|
"No roomReservations found for reservation %s", pms_reservation_id
|
|
)
|
|
return stats
|
|
|
|
# Check if conversion already exists (upsert logic)
|
|
existing_result = await session.execute(
|
|
select(Conversion).where(
|
|
Conversion.pms_reservation_id == pms_reservation_id
|
|
)
|
|
)
|
|
existing_conversion = existing_result.scalar_one_or_none()
|
|
|
|
if existing_conversion:
|
|
# Update existing conversion (matching will be done later)
|
|
existing_conversion.reservation_number = reservation_number
|
|
existing_conversion.reservation_date = reservation_date
|
|
existing_conversion.creation_time = creation_time
|
|
existing_conversion.reservation_type = reservation_type
|
|
existing_conversion.booking_channel = booking_channel
|
|
existing_conversion.guest_first_name = guest_first_name
|
|
existing_conversion.guest_last_name = guest_last_name
|
|
existing_conversion.guest_email = guest_email
|
|
existing_conversion.guest_country_code = guest_country_code
|
|
existing_conversion.guest_birth_date = guest_birth_date
|
|
existing_conversion.advertising_medium = advertising_medium
|
|
existing_conversion.advertising_partner = advertising_partner
|
|
existing_conversion.advertising_campagne = advertising_campagne
|
|
existing_conversion.updated_at = datetime.now()
|
|
conversion = existing_conversion
|
|
_LOGGER.info(
|
|
"Updated conversion %s (pms_id=%s)",
|
|
conversion.id,
|
|
pms_reservation_id,
|
|
)
|
|
else:
|
|
# Create new conversion entry (without matching - will be done later)
|
|
conversion = Conversion(
|
|
# Links to existing entities (nullable, will be filled in after matching)
|
|
reservation_id=None,
|
|
customer_id=None,
|
|
hashed_customer_id=None,
|
|
# Reservation metadata
|
|
hotel_id=hotel_id,
|
|
pms_reservation_id=pms_reservation_id,
|
|
reservation_number=reservation_number,
|
|
reservation_date=reservation_date,
|
|
creation_time=creation_time,
|
|
reservation_type=reservation_type,
|
|
booking_channel=booking_channel,
|
|
# Guest information
|
|
guest_first_name=guest_first_name,
|
|
guest_last_name=guest_last_name,
|
|
guest_email=guest_email,
|
|
guest_country_code=guest_country_code,
|
|
guest_birth_date=guest_birth_date,
|
|
guest_id=guest_id,
|
|
# Advertising data
|
|
advertising_medium=advertising_medium,
|
|
advertising_partner=advertising_partner,
|
|
advertising_campagne=advertising_campagne,
|
|
# Metadata
|
|
created_at=datetime.now(),
|
|
updated_at=datetime.now(),
|
|
)
|
|
session.add(conversion)
|
|
_LOGGER.debug(
|
|
"Created conversion (pms_id=%s)",
|
|
pms_reservation_id,
|
|
)
|
|
|
|
# Flush to ensure conversion has an ID before creating room reservations
|
|
await session.flush()
|
|
|
|
# Create or update ConversionGuest and link it to the conversion
|
|
conversion_guest = await self._get_or_create_conversion_guest(
|
|
hotel_id=hotel_id,
|
|
guest_id=guest_id,
|
|
guest_first_name=guest_first_name,
|
|
guest_last_name=guest_last_name,
|
|
guest_email=guest_email,
|
|
guest_country_code=guest_country_code,
|
|
guest_birth_date=guest_birth_date,
|
|
session=session,
|
|
)
|
|
if conversion_guest:
|
|
conversion.conversion_guest_id = conversion_guest.id
|
|
|
|
# Batch-load existing room reservations to avoid N+1 queries
|
|
room_numbers = [
|
|
rm.get("roomNumber") for rm in room_reservations.findall("roomReservation")
|
|
]
|
|
pms_hotel_reservation_ids = [
|
|
f"{pms_reservation_id}_{room_num}" for room_num in room_numbers
|
|
]
|
|
|
|
existing_rooms_result = await session.execute(
|
|
select(ConversionRoom).where(
|
|
ConversionRoom.pms_hotel_reservation_id.in_(pms_hotel_reservation_ids)
|
|
)
|
|
)
|
|
existing_rooms = {
|
|
room.pms_hotel_reservation_id: room
|
|
for room in existing_rooms_result.scalars().all()
|
|
}
|
|
|
|
# Process room reservations
|
|
for room_reservation in room_reservations.findall("roomReservation"):
|
|
# Extract room reservation details
|
|
arrival_str = room_reservation.get("arrival")
|
|
departure_str = room_reservation.get("departure")
|
|
room_status = room_reservation.get("status")
|
|
room_type = room_reservation.get("roomType")
|
|
room_number = room_reservation.get("roomNumber")
|
|
adults_str = room_reservation.get("adults")
|
|
rate_plan_code = room_reservation.get("ratePlanCode")
|
|
connected_room_type = room_reservation.get("connectedRoomType")
|
|
|
|
arrival_date = None
|
|
if arrival_str:
|
|
try:
|
|
arrival_date = datetime.strptime(arrival_str, "%Y-%m-%d").date()
|
|
except ValueError:
|
|
_LOGGER.warning("Invalid arrival date format: %s", arrival_str)
|
|
|
|
departure_date = None
|
|
if departure_str:
|
|
try:
|
|
departure_date = datetime.strptime(departure_str, "%Y-%m-%d").date()
|
|
except ValueError:
|
|
_LOGGER.warning("Invalid departure date format: %s", departure_str)
|
|
|
|
num_adults = None
|
|
if adults_str:
|
|
try:
|
|
num_adults = int(adults_str)
|
|
except ValueError:
|
|
_LOGGER.warning("Invalid adults value: %s", adults_str)
|
|
|
|
# Create composite ID for upsert: pms_reservation_id + room_number
|
|
# This allows updating the same room reservation if it appears again
|
|
pms_hotel_reservation_id = f"{pms_reservation_id}_{room_number}"
|
|
|
|
# Process daily sales and extract total revenue
|
|
daily_sales_elem = room_reservation.find("dailySales")
|
|
daily_sales_list = []
|
|
total_revenue = Decimal(0)
|
|
|
|
if daily_sales_elem is not None:
|
|
for daily_sale in daily_sales_elem.findall("dailySale"):
|
|
stats["daily_sales_count"] += 1
|
|
|
|
# Extract daily sale data
|
|
sale_date_str = daily_sale.get("date")
|
|
daily_sale_obj = {}
|
|
|
|
if sale_date_str:
|
|
daily_sale_obj["date"] = sale_date_str
|
|
|
|
# Extract all revenue fields
|
|
revenue_total_str = daily_sale.get("revenueTotal")
|
|
if revenue_total_str:
|
|
daily_sale_obj["revenueTotal"] = revenue_total_str
|
|
try:
|
|
total_revenue += Decimal(revenue_total_str)
|
|
except (ValueError, TypeError):
|
|
_LOGGER.warning(
|
|
"Invalid revenueTotal value: %s", revenue_total_str
|
|
)
|
|
|
|
# Add other revenue fields if present
|
|
if daily_sale.get("revenueLogis"):
|
|
daily_sale_obj["revenueLogis"] = daily_sale.get("revenueLogis")
|
|
if daily_sale.get("revenueBoard"):
|
|
daily_sale_obj["revenueBoard"] = daily_sale.get("revenueBoard")
|
|
if daily_sale.get("revenueFB"):
|
|
daily_sale_obj["revenueFB"] = daily_sale.get("revenueFB")
|
|
if daily_sale.get("revenueSpa"):
|
|
daily_sale_obj["revenueSpa"] = daily_sale.get("revenueSpa")
|
|
if daily_sale.get("revenueOther"):
|
|
daily_sale_obj["revenueOther"] = daily_sale.get("revenueOther")
|
|
|
|
if daily_sale_obj: # Only add if has data
|
|
daily_sales_list.append(daily_sale_obj)
|
|
|
|
# Check if room reservation already exists using batch-loaded data
|
|
existing_room_reservation = existing_rooms.get(pms_hotel_reservation_id)
|
|
|
|
if total_revenue > 0 and (
|
|
guest_first_name is None
|
|
and guest_last_name is None
|
|
and guest_email is None
|
|
):
|
|
_LOGGER.info(
|
|
"Guest info missing but total revenue > 0 for PMS ID %s",
|
|
pms_reservation_id,
|
|
)
|
|
|
|
if existing_room_reservation:
|
|
# Update existing room reservation with all fields
|
|
existing_room_reservation.arrival_date = arrival_date
|
|
existing_room_reservation.departure_date = departure_date
|
|
existing_room_reservation.room_status = room_status
|
|
existing_room_reservation.room_type = room_type
|
|
existing_room_reservation.num_adults = num_adults
|
|
existing_room_reservation.rate_plan_code = rate_plan_code
|
|
existing_room_reservation.connected_room_type = connected_room_type
|
|
existing_room_reservation.daily_sales = (
|
|
daily_sales_list if daily_sales_list else None
|
|
)
|
|
existing_room_reservation.total_revenue = (
|
|
total_revenue if total_revenue > 0 else None
|
|
)
|
|
existing_room_reservation.updated_at = datetime.now()
|
|
_LOGGER.debug(
|
|
"Updated room reservation %s (pms_id=%s, room=%s)",
|
|
existing_room_reservation.id,
|
|
pms_reservation_id,
|
|
room_number,
|
|
)
|
|
else:
|
|
# Create new room reservation
|
|
room_reservation_record = ConversionRoom(
|
|
conversion_id=conversion.id,
|
|
pms_hotel_reservation_id=pms_hotel_reservation_id,
|
|
arrival_date=arrival_date,
|
|
departure_date=departure_date,
|
|
room_status=room_status,
|
|
room_type=room_type,
|
|
room_number=room_number,
|
|
num_adults=num_adults,
|
|
rate_plan_code=rate_plan_code,
|
|
connected_room_type=connected_room_type,
|
|
daily_sales=daily_sales_list if daily_sales_list else None,
|
|
total_revenue=total_revenue if total_revenue > 0 else None,
|
|
created_at=datetime.now(),
|
|
updated_at=datetime.now(),
|
|
)
|
|
session.add(room_reservation_record)
|
|
_LOGGER.debug(
|
|
"Created room reservation (pms_id=%s, room=%s, adults=%s)",
|
|
pms_reservation_id,
|
|
room_number,
|
|
num_adults,
|
|
)
|
|
|
|
# Now that conversion, conversion_guest, and conversion_room records exist,
|
|
# perform matching using hashed guest data from conversion_guest
|
|
matched_reservation = None
|
|
matched_customer = None
|
|
matched_hashed_customer = None
|
|
|
|
if advertising_campagne or True:
|
|
# Use hashed data from conversion_guest for matching
|
|
hashed_first_name = conversion_guest.hashed_first_name if conversion_guest else None
|
|
hashed_last_name = conversion_guest.hashed_last_name if conversion_guest else None
|
|
hashed_email = conversion_guest.hashed_email if conversion_guest else None
|
|
|
|
match_result = await self._find_matching_entities(
|
|
advertising_campagne,
|
|
hotel_id,
|
|
reservation_date,
|
|
hashed_first_name,
|
|
hashed_last_name,
|
|
hashed_email,
|
|
advertising_partner,
|
|
session,
|
|
)
|
|
matched_reservation = match_result["reservation"]
|
|
matched_customer = match_result["customer"]
|
|
matched_hashed_customer = match_result["hashed_customer"]
|
|
|
|
# Update the conversion with matched entities if found
|
|
if matched_reservation or matched_customer or matched_hashed_customer:
|
|
conversion.reservation_id = (
|
|
matched_reservation.id if matched_reservation else None
|
|
)
|
|
conversion.customer_id = (
|
|
matched_customer.id if matched_customer else None
|
|
)
|
|
conversion.hashed_customer_id = (
|
|
matched_hashed_customer.id if matched_hashed_customer else None
|
|
)
|
|
conversion.updated_at = datetime.now()
|
|
|
|
# Update stats for the conversion record
|
|
if matched_reservation:
|
|
stats["matched_to_reservation"] += 1
|
|
if matched_customer:
|
|
stats["matched_to_customer"] += 1
|
|
if matched_hashed_customer:
|
|
stats["matched_to_hashed_customer"] += 1
|
|
if not any([matched_reservation, matched_customer, matched_hashed_customer]):
|
|
stats["unmatched"] += 1
|
|
|
|
return stats
|
|
|
|
async def _find_matching_entities(
|
|
self,
|
|
advertising_campagne: str,
|
|
hotel_id: str | None,
|
|
reservation_date: Any,
|
|
guest_first_name: str | None = None,
|
|
guest_last_name: str | None = None,
|
|
guest_email: str | None = None,
|
|
advertising_partner: str | None = None,
|
|
session: AsyncSession | None = None,
|
|
) -> dict[str, Any]:
|
|
"""Find matching Reservation, Customer, and HashedCustomer.
|
|
|
|
Uses two strategies:
|
|
1. Advertising data matching (fbclid/gclid/utm_campaign) with guest details fallback
|
|
2. If no advertising data match, falls back to email/name-based matching
|
|
|
|
Args:
|
|
advertising_campagne: Truncated tracking ID from conversion XML
|
|
hotel_id: Hotel ID for additional filtering
|
|
reservation_date: Reservation date for additional filtering
|
|
guest_first_name: Guest first name for matching
|
|
guest_last_name: Guest last name for matching
|
|
guest_email: Guest email for matching
|
|
advertising_partner: Partner info (matches utm_medium for additional filtering)
|
|
session: AsyncSession to use. If None, uses self.session.
|
|
|
|
Returns:
|
|
Dictionary with 'reservation', 'customer', and 'hashed_customer' keys
|
|
|
|
"""
|
|
if session is None:
|
|
session = self.session
|
|
result = {
|
|
"reservation": None,
|
|
"customer": None,
|
|
"hashed_customer": None,
|
|
}
|
|
|
|
# Strategy 1: Try to match by advertising data (fbclid/gclid/utm_campaign)
|
|
if advertising_campagne:
|
|
matched_reservation = await self._match_by_advertising(
|
|
advertising_campagne,
|
|
hotel_id,
|
|
guest_first_name,
|
|
guest_last_name,
|
|
guest_email,
|
|
advertising_partner,
|
|
session,
|
|
)
|
|
|
|
if matched_reservation:
|
|
result["reservation"] = matched_reservation
|
|
_LOGGER.info(
|
|
"Matched conversion by advertising data (advertisingCampagne=%s, hotel=%s)",
|
|
advertising_campagne,
|
|
hotel_id,
|
|
)
|
|
else:
|
|
_LOGGER.debug(
|
|
"No match found by advertising data (advertisingCampagne=%s), "
|
|
"falling back to email/name matching",
|
|
advertising_campagne,
|
|
)
|
|
|
|
# Strategy 2: If no advertising match, try email/name-based matching
|
|
if not result["reservation"] and (
|
|
guest_email or guest_first_name or guest_last_name
|
|
):
|
|
matched_reservation = await self._match_by_guest_details(
|
|
hotel_id, guest_first_name, guest_last_name, guest_email, session
|
|
)
|
|
|
|
if matched_reservation:
|
|
result["reservation"] = matched_reservation
|
|
_LOGGER.info(
|
|
"Matched conversion by guest details (name=%s %s, email=%s, hotel=%s)",
|
|
guest_first_name,
|
|
guest_last_name,
|
|
guest_email,
|
|
hotel_id,
|
|
)
|
|
else:
|
|
_LOGGER.debug(
|
|
"No match found by guest details (name=%s %s, email=%s)",
|
|
guest_first_name,
|
|
guest_last_name,
|
|
guest_email,
|
|
)
|
|
|
|
# If we found a reservation, get its customer and hashed_customer
|
|
if result["reservation"]:
|
|
if result["reservation"].customer_id:
|
|
customer_query = select(Customer).where(
|
|
Customer.id == result["reservation"].customer_id
|
|
)
|
|
customer_result = await session.execute(customer_query)
|
|
result["customer"] = customer_result.scalar_one_or_none()
|
|
|
|
# Get hashed customer
|
|
if result["customer"]:
|
|
hashed_query = select(HashedCustomer).where(
|
|
HashedCustomer.customer_id == result["customer"].id
|
|
)
|
|
hashed_result = await session.execute(hashed_query)
|
|
result["hashed_customer"] = hashed_result.scalar_one_or_none()
|
|
|
|
return result
|
|
|
|
async def _match_by_advertising(
|
|
self,
|
|
advertising_campagne: str,
|
|
hotel_id: str | None,
|
|
guest_first_name: str | None,
|
|
guest_last_name: str | None,
|
|
guest_email: str | None,
|
|
advertising_partner: str | None,
|
|
session: AsyncSession | None = None,
|
|
) -> Reservation | None:
|
|
"""Match reservation by advertising tracking data (fbclid/gclid/md5_unique_id).
|
|
|
|
Args:
|
|
advertising_campagne: Tracking ID from PMS (could be truncated click_id or md5_unique_id)
|
|
hotel_id: Hotel ID for filtering
|
|
guest_first_name: Guest first name for disambiguation
|
|
guest_last_name: Guest last name for disambiguation
|
|
guest_email: Guest email for disambiguation
|
|
advertising_partner: Partner info (matches utm_medium)
|
|
session: AsyncSession to use. If None, uses self.session.
|
|
|
|
Returns:
|
|
Matched Reservation or None
|
|
|
|
"""
|
|
if session is None:
|
|
session = self.session
|
|
# Find reservations where:
|
|
# - fbclid/gclid starts with the advertising_campagne value, OR
|
|
# - md5_unique_id matches exactly (for direct ID matching)
|
|
query = select(Reservation).where(
|
|
or_(
|
|
Reservation.fbclid.like(f"{advertising_campagne}%"),
|
|
Reservation.gclid.like(f"{advertising_campagne}%"),
|
|
Reservation.md5_unique_id == advertising_campagne,
|
|
)
|
|
)
|
|
|
|
# Eagerly load the customer relationship
|
|
query = query.options(selectinload(Reservation.customer))
|
|
|
|
# Add hotel filter if available
|
|
if hotel_id:
|
|
query = query.where(Reservation.hotel_code == hotel_id)
|
|
|
|
# Execute query
|
|
db_result = await session.execute(query)
|
|
reservations = db_result.scalars().all()
|
|
|
|
if not reservations:
|
|
return None
|
|
|
|
# If single match, return it
|
|
if len(reservations) == 1:
|
|
return reservations[0]
|
|
|
|
# If multiple matches, try to narrow down using guest details
|
|
_LOGGER.debug(
|
|
"Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. "
|
|
"Attempting to narrow down using guest details.",
|
|
advertising_campagne,
|
|
hotel_id,
|
|
len(reservations),
|
|
)
|
|
|
|
matched_reservation = self._filter_reservations_by_guest_details(
|
|
reservations,
|
|
guest_first_name,
|
|
guest_last_name,
|
|
guest_email,
|
|
advertising_partner,
|
|
)
|
|
|
|
if matched_reservation is None:
|
|
# If we still can't narrow it down, use the first match and log warning
|
|
_LOGGER.warning(
|
|
"Could not narrow down multiple reservations for advertisingCampagne %s "
|
|
"(hotel=%s, guest=%s %s, email=%s). Using first match.",
|
|
advertising_campagne,
|
|
hotel_id,
|
|
guest_first_name,
|
|
guest_last_name,
|
|
guest_email,
|
|
)
|
|
matched_reservation = reservations[0]
|
|
|
|
return matched_reservation
|
|
|
|
async def _match_by_guest_details(
|
|
self,
|
|
hotel_id: str | None,
|
|
guest_first_name: str | None,
|
|
guest_last_name: str | None,
|
|
guest_email: str | None,
|
|
session: AsyncSession | None = None,
|
|
) -> Reservation | None:
|
|
"""Match reservation by guest name and email using cached data.
|
|
|
|
This method uses the reservation cache populated at the start of XML processing.
|
|
If cache is not available, falls back to database queries.
|
|
|
|
Args:
|
|
hotel_id: Hotel ID for filtering
|
|
guest_first_name: Guest first name
|
|
guest_last_name: Guest last name
|
|
guest_email: Guest email
|
|
session: AsyncSession to use. If None, uses self.session.
|
|
|
|
Returns:
|
|
Matched Reservation or None
|
|
|
|
"""
|
|
if session is None:
|
|
session = self.session
|
|
|
|
# Try to use cache first
|
|
if self._cache_initialized and self._reservation_cache:
|
|
all_reservations = []
|
|
|
|
# Get reservations from cache for this hotel
|
|
if hotel_id and hotel_id in self._reservation_cache:
|
|
all_reservations = [res for res, _ in self._reservation_cache[hotel_id]]
|
|
elif not hotel_id:
|
|
# If no hotel_id specified, use all cached reservations
|
|
for reservations_list in self._reservation_cache.values():
|
|
all_reservations.extend([res for res, _ in reservations_list])
|
|
|
|
if all_reservations:
|
|
_LOGGER.debug(
|
|
"Using cached reservations for matching (hotel=%s, count=%d)",
|
|
hotel_id,
|
|
len(all_reservations),
|
|
)
|
|
return self._match_reservations_by_guest_details(
|
|
all_reservations,
|
|
guest_first_name,
|
|
guest_last_name,
|
|
guest_email,
|
|
)
|
|
|
|
# Fallback: Query database if cache is not available or empty
|
|
_LOGGER.debug(
|
|
"Cache unavailable or empty, falling back to database query (hotel=%s)",
|
|
hotel_id,
|
|
)
|
|
from sqlalchemy.orm import selectinload
|
|
query = select(Reservation).options(
|
|
selectinload(Reservation.customer).selectinload(Customer.hashed_version)
|
|
)
|
|
|
|
if hotel_id:
|
|
query = query.where(Reservation.hotel_code == hotel_id)
|
|
|
|
db_result = await session.execute(query)
|
|
all_reservations = db_result.scalars().all()
|
|
|
|
return self._match_reservations_by_guest_details(
|
|
all_reservations, guest_first_name, guest_last_name, guest_email
|
|
)
|
|
|
|
def _match_reservations_by_guest_details(
|
|
self,
|
|
reservations: list[Reservation],
|
|
guest_first_name: str | None,
|
|
guest_last_name: str | None,
|
|
guest_email: str | None,
|
|
) -> Reservation | None:
|
|
"""Match a reservation from a list by guest name and email (non-async).
|
|
|
|
Uses hashed data matching for privacy. The guest_first_name, guest_last_name,
|
|
and guest_email parameters should be pre-hashed values from ConversionGuest.
|
|
Compares hashed values directly against hashed_customer hash fields.
|
|
|
|
Args:
|
|
reservations: List of reservations to search through
|
|
guest_first_name: Pre-hashed guest first name (or None)
|
|
guest_last_name: Pre-hashed guest last name (or None)
|
|
guest_email: Pre-hashed guest email (or None)
|
|
|
|
Returns:
|
|
Matched Reservation or None
|
|
|
|
"""
|
|
# Filter by guest details using hashed data
|
|
candidates = []
|
|
|
|
for reservation in reservations:
|
|
customer = reservation.customer
|
|
if not customer:
|
|
continue
|
|
|
|
# Access hashed_version - handle both direct objects and relationships
|
|
hashed_customer = customer.hashed_version
|
|
if not hashed_customer:
|
|
continue
|
|
|
|
# Defensive check: ensure hashed_customer is actually a HashedCustomer object
|
|
# (not an InstrumentedList or other SQLAlchemy construct)
|
|
if not hasattr(hashed_customer, 'hashed_email'):
|
|
_LOGGER.debug(
|
|
"Customer %s has invalid hashed_version type: %s",
|
|
customer.id,
|
|
type(hashed_customer),
|
|
)
|
|
continue
|
|
|
|
# Match by email (highest priority) using hashed comparison
|
|
if guest_email and hashed_customer.hashed_email:
|
|
if hashed_customer.hashed_email == guest_email:
|
|
_LOGGER.info(
|
|
"Found exact email match (reservation_id=%s) via hash",
|
|
reservation.id,
|
|
)
|
|
candidates.append((reservation, 3)) # Highest score
|
|
continue
|
|
|
|
# Match by name (first + last) using hashed comparison
|
|
if guest_first_name and guest_last_name:
|
|
first_match = (
|
|
hashed_customer.hashed_given_name
|
|
and hashed_customer.hashed_given_name == guest_first_name
|
|
)
|
|
last_match = (
|
|
hashed_customer.hashed_surname
|
|
and hashed_customer.hashed_surname == guest_last_name
|
|
)
|
|
|
|
if first_match and last_match:
|
|
_LOGGER.info(
|
|
"Found exact name match (reservation_id=%s) via hash",
|
|
reservation.id,
|
|
)
|
|
candidates.append((reservation, 2)) # Medium-high score
|
|
continue
|
|
|
|
# Return highest-scoring match
|
|
if candidates:
|
|
candidates.sort(key=lambda x: x[1], reverse=True)
|
|
return candidates[0][0]
|
|
|
|
return None
|
|
|
|
def _filter_reservations_by_guest_details(
|
|
self,
|
|
reservations: list[Reservation],
|
|
guest_first_name: str | None,
|
|
guest_last_name: str | None,
|
|
guest_email: str | None,
|
|
advertising_partner: str | None,
|
|
) -> Reservation | None:
|
|
"""Filter reservations using guest details to find a single match.
|
|
|
|
First tries to match by guest name and email. If that doesn't yield a single match,
|
|
tries matching by advertising_partner against utm_medium.
|
|
|
|
Args:
|
|
reservations: List of candidate reservations
|
|
guest_first_name: Guest first name
|
|
guest_last_name: Guest last name
|
|
guest_email: Guest email
|
|
advertising_partner: Partner info (e.g., "Facebook_Mobile_Feed")
|
|
|
|
Returns:
|
|
Single best-match Reservation, or None if no good match found
|
|
|
|
"""
|
|
candidates = reservations
|
|
|
|
# Try to narrow down by guest name and email
|
|
if guest_first_name or guest_last_name or guest_email:
|
|
# First try exact match on all available fields
|
|
for reservation in candidates:
|
|
customer = reservation.customer
|
|
if customer:
|
|
name_match = True
|
|
email_match = True
|
|
|
|
if guest_first_name:
|
|
name_match = name_match and (
|
|
customer.given_name
|
|
and customer.given_name.lower() == guest_first_name.lower()
|
|
)
|
|
|
|
if guest_last_name:
|
|
name_match = name_match and (
|
|
customer.surname
|
|
and customer.surname.lower() == guest_last_name.lower()
|
|
)
|
|
|
|
if guest_email:
|
|
email_match = (
|
|
customer.email_address
|
|
and customer.email_address.lower() == guest_email.lower()
|
|
)
|
|
|
|
if name_match and email_match:
|
|
_LOGGER.debug(
|
|
"Found exact match on guest name/email for %s %s",
|
|
guest_first_name,
|
|
guest_last_name,
|
|
)
|
|
return reservation
|
|
|
|
# Try to narrow down by advertising_partner matching utm_medium
|
|
if advertising_partner:
|
|
for reservation in candidates:
|
|
if (
|
|
reservation.utm_medium
|
|
and reservation.utm_medium.lower() == advertising_partner.lower()
|
|
):
|
|
_LOGGER.debug(
|
|
"Found match on advertising_partner=%s matching utm_medium",
|
|
advertising_partner,
|
|
)
|
|
return reservation
|
|
|
|
# No single clear match found
|
|
return None
|