Adding guests to conversion_import

This commit is contained in:
Jonas Linter
2025-11-17 09:22:35 +01:00
parent 9b82be9a6e
commit 0c37254317
6 changed files with 964551 additions and 8 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

46
format_xml.py Normal file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/env python3
"""Format a large XML file for readability."""
import xml.dom.minidom
import sys
from pathlib import Path
def format_xml(input_path, output_path=None):
"""Format XML file with proper indentation."""
input_file = Path(input_path)
if not input_file.exists():
print(f"Error: File {input_path} not found", file=sys.stderr)
sys.exit(1)
print(f"Reading {input_file.name}...", file=sys.stderr)
with open(input_file, 'r', encoding='utf-8') as f:
xml_content = f.read()
print("Parsing XML...", file=sys.stderr)
dom = xml.dom.minidom.parseString(xml_content)
print("Formatting XML...", file=sys.stderr)
pretty_xml = dom.toprettyxml(indent=" ")
# Remove extra blank lines that toprettyxml adds
pretty_xml = "\n".join([line for line in pretty_xml.split("\n") if line.strip()])
if output_path is None:
output_path = input_file.with_stem(input_file.stem + "_formatted")
print(f"Writing formatted XML to {output_path}...", file=sys.stderr)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(pretty_xml)
print(f"Done! Formatted XML saved to {output_path}", file=sys.stderr)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python format_xml.py <input_file> [output_file]", file=sys.stderr)
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else None
format_xml(input_file, output_file)

View File

@@ -131,6 +131,19 @@ class ConversionService:
reservation_type = reservation_elem.get("type") reservation_type = reservation_elem.get("type")
booking_channel = reservation_elem.get("bookingChannel") booking_channel = reservation_elem.get("bookingChannel")
# Extract guest information from guest element
guest_elem = reservation_elem.find("guest")
guest_first_name = None
guest_last_name = None
guest_email = None
guest_country_code = None
if guest_elem is not None:
guest_first_name = guest_elem.get("firstName")
guest_last_name = guest_elem.get("lastName")
guest_email = guest_elem.get("email")
guest_country_code = guest_elem.get("countryCode")
# Advertising/tracking data # Advertising/tracking data
advertising_medium = reservation_elem.get("advertisingMedium") advertising_medium = reservation_elem.get("advertisingMedium")
advertising_partner = reservation_elem.get("advertisingPartner") advertising_partner = reservation_elem.get("advertisingPartner")
@@ -159,14 +172,20 @@ class ConversionService:
"Invalid creation time format: %s", creation_time_str "Invalid creation time format: %s", creation_time_str
) )
# Find matching reservation, customer, and hashed_customer using advertising data # Find matching reservation, customer, and hashed_customer using advertising data and guest details
matched_reservation = None matched_reservation = None
matched_customer = None matched_customer = None
matched_hashed_customer = None matched_hashed_customer = None
if advertising_campagne: if advertising_campagne:
match_result = await self._find_matching_entities( match_result = await self._find_matching_entities(
advertising_campagne, hotel_id, reservation_date advertising_campagne,
hotel_id,
reservation_date,
guest_first_name,
guest_last_name,
guest_email,
advertising_partner,
) )
matched_reservation = match_result["reservation"] matched_reservation = match_result["reservation"]
matched_customer = match_result["customer"] matched_customer = match_result["customer"]
@@ -250,6 +269,11 @@ class ConversionService:
creation_time=creation_time, creation_time=creation_time,
reservation_type=reservation_type, reservation_type=reservation_type,
booking_channel=booking_channel, booking_channel=booking_channel,
# Guest information
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
# Advertising data # Advertising data
advertising_medium=advertising_medium, advertising_medium=advertising_medium,
advertising_partner=advertising_partner, advertising_partner=advertising_partner,
@@ -295,16 +319,26 @@ class ConversionService:
advertising_campagne: str, advertising_campagne: str,
hotel_id: str | None, hotel_id: str | None,
reservation_date: Any, reservation_date: Any,
guest_first_name: str | None = None,
guest_last_name: str | None = None,
guest_email: str | None = None,
advertising_partner: str | None = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Find matching Reservation, Customer, and HashedCustomer using advertising data. """Find matching Reservation, Customer, and HashedCustomer using advertising data.
The advertisingCampagne field contains a truncated (64 char) version of The advertisingCampagne field contains a truncated (64 char) version of
fbclid/gclid, so we use prefix matching. fbclid/gclid, so we use prefix matching. When multiple matches exist,
uses guest details (first_name, last_name, email) and utm_medium
(matched against advertisingPartner) to narrow down to a single match.
Args: Args:
advertising_campagne: Truncated tracking ID from conversion XML advertising_campagne: Truncated tracking ID from conversion XML
hotel_id: Hotel ID for additional filtering hotel_id: Hotel ID for additional filtering
reservation_date: Reservation date for additional filtering reservation_date: Reservation date for additional filtering
guest_first_name: Guest first name for disambiguation
guest_last_name: Guest last name for disambiguation
guest_email: Guest email for disambiguation
advertising_partner: Partner info (matches utm_medium for additional filtering)
Returns: Returns:
Dictionary with 'reservation', 'customer', and 'hashed_customer' keys Dictionary with 'reservation', 'customer', and 'hashed_customer' keys
@@ -344,16 +378,39 @@ class ConversionService:
) )
return result return result
# If multiple matches, try to narrow down using guest details and advertising_partner
if len(reservations) > 1: if len(reservations) > 1:
_LOGGER.warning( _LOGGER.debug(
"Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. Using first match.", "Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. "
"Attempting to narrow down using guest details.",
advertising_campagne, advertising_campagne,
hotel_id, hotel_id,
len(reservations), len(reservations),
) )
# Use the first matching reservation matched_reservation = self._filter_reservations_by_guest_details(
matched_reservation = reservations[0] reservations,
guest_first_name,
guest_last_name,
guest_email,
advertising_partner,
)
if matched_reservation is None:
# If we still can't narrow it down, use the first match and log warning
_LOGGER.warning(
"Could not narrow down multiple reservations for advertisingCampagne %s "
"(hotel=%s, guest=%s %s, email=%s). Using first match.",
advertising_campagne,
hotel_id,
guest_first_name,
guest_last_name,
guest_email,
)
matched_reservation = reservations[0]
else:
matched_reservation = reservations[0]
result["reservation"] = matched_reservation result["reservation"] = matched_reservation
# Get associated customer and hashed_customer # Get associated customer and hashed_customer
@@ -373,11 +430,91 @@ class ConversionService:
result["hashed_customer"] = hashed_result.scalar_one_or_none() result["hashed_customer"] = hashed_result.scalar_one_or_none()
_LOGGER.info( _LOGGER.info(
"Matched conversion to reservation_id=%s, customer_id=%s, hashed_customer_id=%s (advertisingCampagne=%s)", "Matched conversion to reservation_id=%s, customer_id=%s, hashed_customer_id=%s "
"(advertisingCampagne=%s, guest=%s %s, email=%s)",
result["reservation"].id if result["reservation"] else None, result["reservation"].id if result["reservation"] else None,
result["customer"].id if result["customer"] else None, result["customer"].id if result["customer"] else None,
result["hashed_customer"].id if result["hashed_customer"] else None, result["hashed_customer"].id if result["hashed_customer"] else None,
advertising_campagne, advertising_campagne,
guest_first_name,
guest_last_name,
guest_email,
) )
return result return result
def _filter_reservations_by_guest_details(
self,
reservations: list[Reservation],
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
advertising_partner: str | None,
) -> Reservation | None:
"""Filter reservations using guest details to find a single match.
First tries to match by guest name and email. If that doesn't yield a single match,
tries matching by advertising_partner against utm_medium.
Args:
reservations: List of candidate reservations
guest_first_name: Guest first name
guest_last_name: Guest last name
guest_email: Guest email
advertising_partner: Partner info (e.g., "Facebook_Mobile_Feed")
Returns:
Single best-match Reservation, or None if no good match found
"""
candidates = reservations
# Try to narrow down by guest name and email
if guest_first_name or guest_last_name or guest_email:
# First try exact match on all available fields
for reservation in candidates:
customer = reservation.customer
if customer:
name_match = True
email_match = True
if guest_first_name:
name_match = name_match and (
customer.given_name
and customer.given_name.lower() == guest_first_name.lower()
)
if guest_last_name:
name_match = name_match and (
customer.surname
and customer.surname.lower() == guest_last_name.lower()
)
if guest_email:
email_match = (
customer.email_address
and customer.email_address.lower() == guest_email.lower()
)
if name_match and email_match:
_LOGGER.debug(
"Found exact match on guest name/email for %s %s",
guest_first_name,
guest_last_name,
)
return reservation
# Try to narrow down by advertising_partner matching utm_medium
if advertising_partner:
for reservation in candidates:
if (
reservation.utm_medium
and reservation.utm_medium.lower() == advertising_partner.lower()
):
_LOGGER.debug(
"Found match on advertising_partner=%s matching utm_medium",
advertising_partner,
)
return reservation
# No single clear match found
return None

View File

@@ -365,6 +365,12 @@ class Conversion(Base):
reservation_type = Column(String) # type attribute (e.g., "reservation") reservation_type = Column(String) # type attribute (e.g., "reservation")
booking_channel = Column(String) # bookingChannel attribute booking_channel = Column(String) # bookingChannel attribute
# Guest information from reservation XML - used for matching
guest_first_name = Column(String, index=True) # firstName from guest element
guest_last_name = Column(String, index=True) # lastName from guest element
guest_email = Column(String, index=True) # email from guest element
guest_country_code = Column(String) # countryCode from guest element
# Advertising/tracking data - used for matching to existing reservations # Advertising/tracking data - used for matching to existing reservations
advertising_medium = Column( advertising_medium = Column(
String, index=True String, index=True

View File

@@ -308,6 +308,43 @@ async def _backfill_acked_requests_username(engine: AsyncEngine, config: dict[st
_LOGGER.info("Backfill complete: %d acknowledgements updated with username", total_updated) _LOGGER.info("Backfill complete: %d acknowledgements updated with username", total_updated)
async def migrate_add_guest_fields_to_conversions(engine: AsyncEngine) -> None:
"""Migration: Add guest information fields to conversions table.
This migration adds guest details from the PMS XML for improved matching:
- guest_first_name: First name of the guest
- guest_last_name: Last name of the guest
- guest_email: Email address of the guest
- guest_country_code: Country code of the guest
These fields are indexed to support efficient matching when the same
fbclid/gclid matches multiple reservations.
Safe to run multiple times - will skip if columns already exist.
"""
_LOGGER.info("Running migration: add_guest_fields_to_conversions")
added_count = 0
# Add each column if it doesn't exist
if await add_column_if_not_exists(engine, "conversions", "guest_first_name", "VARCHAR"):
added_count += 1
if await add_column_if_not_exists(engine, "conversions", "guest_last_name", "VARCHAR"):
added_count += 1
if await add_column_if_not_exists(engine, "conversions", "guest_email", "VARCHAR"):
added_count += 1
if await add_column_if_not_exists(engine, "conversions", "guest_country_code", "VARCHAR"):
added_count += 1
if added_count > 0:
_LOGGER.info("Migration add_guest_fields_to_conversions: Added %d columns", added_count)
else:
_LOGGER.info("Migration add_guest_fields_to_conversions: No changes needed (already applied)")
async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None = None) -> None: async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None = None) -> None:
"""Run all pending migrations. """Run all pending migrations.
@@ -325,6 +362,7 @@ async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None
await migrate_add_room_types(engine) await migrate_add_room_types(engine)
await migrate_add_advertising_account_ids(engine, config) await migrate_add_advertising_account_ids(engine, config)
await migrate_add_username_to_acked_requests(engine, config) await migrate_add_username_to_acked_requests(engine, config)
await migrate_add_guest_fields_to_conversions(engine)
_LOGGER.info("Database migrations completed successfully") _LOGGER.info("Database migrations completed successfully")