2 Commits

Author SHA1 Message Date
Jonas Linter
0c37254317 Adding guests to conversion_import 2025-11-17 09:22:35 +01:00
Jonas Linter
9b82be9a6e Fixed export 2025-11-17 09:13:33 +01:00
7 changed files with 964563 additions and 10 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

46
format_xml.py Normal file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/env python3
"""Format a large XML file for readability."""
import xml.dom.minidom
import sys
from pathlib import Path
def format_xml(input_path, output_path=None):
"""Format XML file with proper indentation."""
input_file = Path(input_path)
if not input_file.exists():
print(f"Error: File {input_path} not found", file=sys.stderr)
sys.exit(1)
print(f"Reading {input_file.name}...", file=sys.stderr)
with open(input_file, 'r', encoding='utf-8') as f:
xml_content = f.read()
print("Parsing XML...", file=sys.stderr)
dom = xml.dom.minidom.parseString(xml_content)
print("Formatting XML...", file=sys.stderr)
pretty_xml = dom.toprettyxml(indent=" ")
# Remove extra blank lines that toprettyxml adds
pretty_xml = "\n".join([line for line in pretty_xml.split("\n") if line.strip()])
if output_path is None:
output_path = input_file.with_stem(input_file.stem + "_formatted")
print(f"Writing formatted XML to {output_path}...", file=sys.stderr)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(pretty_xml)
print(f"Done! Formatted XML saved to {output_path}", file=sys.stderr)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python format_xml.py <input_file> [output_file]", file=sys.stderr)
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else None
format_xml(input_file, output_file)

View File

@@ -7,6 +7,7 @@ import multiprocessing
import os
import traceback
import urllib.parse
import xml.dom.minidom
from collections import defaultdict
from datetime import date, datetime
from functools import partial
@@ -1323,8 +1324,17 @@ async def handle_xml_upload(
extension = Path(filename).suffix or ".xml"
log_filename = logs_dir / f"{base_filename}_{username}_{timestamp}{extension}"
# Save XML content to file
log_filename.write_text(xml_content, encoding="utf-8")
# Format and save XML content to file
try:
dom = xml.dom.minidom.parseString(xml_content)
pretty_xml = dom.toprettyxml(indent=" ")
# Remove extra blank lines that toprettyxml adds
pretty_xml = "\n".join([line for line in pretty_xml.split("\n") if line.strip()])
log_filename.write_text(pretty_xml, encoding="utf-8")
except Exception as e:
# If formatting fails, save the original content
_LOGGER.warning("Failed to format XML: %s. Saving unformatted.", str(e))
log_filename.write_text(xml_content, encoding="utf-8")
_LOGGER.info(
"XML file saved to %s by user %s (original: %s)",

View File

@@ -131,6 +131,19 @@ class ConversionService:
reservation_type = reservation_elem.get("type")
booking_channel = reservation_elem.get("bookingChannel")
# Extract guest information from guest element
guest_elem = reservation_elem.find("guest")
guest_first_name = None
guest_last_name = None
guest_email = None
guest_country_code = None
if guest_elem is not None:
guest_first_name = guest_elem.get("firstName")
guest_last_name = guest_elem.get("lastName")
guest_email = guest_elem.get("email")
guest_country_code = guest_elem.get("countryCode")
# Advertising/tracking data
advertising_medium = reservation_elem.get("advertisingMedium")
advertising_partner = reservation_elem.get("advertisingPartner")
@@ -159,14 +172,20 @@ class ConversionService:
"Invalid creation time format: %s", creation_time_str
)
# Find matching reservation, customer, and hashed_customer using advertising data
# Find matching reservation, customer, and hashed_customer using advertising data and guest details
matched_reservation = None
matched_customer = None
matched_hashed_customer = None
if advertising_campagne:
match_result = await self._find_matching_entities(
advertising_campagne, hotel_id, reservation_date
advertising_campagne,
hotel_id,
reservation_date,
guest_first_name,
guest_last_name,
guest_email,
advertising_partner,
)
matched_reservation = match_result["reservation"]
matched_customer = match_result["customer"]
@@ -250,6 +269,11 @@ class ConversionService:
creation_time=creation_time,
reservation_type=reservation_type,
booking_channel=booking_channel,
# Guest information
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
# Advertising data
advertising_medium=advertising_medium,
advertising_partner=advertising_partner,
@@ -295,16 +319,26 @@ class ConversionService:
advertising_campagne: str,
hotel_id: str | None,
reservation_date: Any,
guest_first_name: str | None = None,
guest_last_name: str | None = None,
guest_email: str | None = None,
advertising_partner: str | None = None,
) -> dict[str, Any]:
"""Find matching Reservation, Customer, and HashedCustomer using advertising data.
The advertisingCampagne field contains a truncated (64 char) version of
fbclid/gclid, so we use prefix matching.
fbclid/gclid, so we use prefix matching. When multiple matches exist,
uses guest details (first_name, last_name, email) and utm_medium
(matched against advertisingPartner) to narrow down to a single match.
Args:
advertising_campagne: Truncated tracking ID from conversion XML
hotel_id: Hotel ID for additional filtering
reservation_date: Reservation date for additional filtering
guest_first_name: Guest first name for disambiguation
guest_last_name: Guest last name for disambiguation
guest_email: Guest email for disambiguation
advertising_partner: Partner info (matches utm_medium for additional filtering)
Returns:
Dictionary with 'reservation', 'customer', and 'hashed_customer' keys
@@ -344,16 +378,39 @@ class ConversionService:
)
return result
# If multiple matches, try to narrow down using guest details and advertising_partner
if len(reservations) > 1:
_LOGGER.warning(
"Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. Using first match.",
_LOGGER.debug(
"Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. "
"Attempting to narrow down using guest details.",
advertising_campagne,
hotel_id,
len(reservations),
)
# Use the first matching reservation
matched_reservation = reservations[0]
matched_reservation = self._filter_reservations_by_guest_details(
reservations,
guest_first_name,
guest_last_name,
guest_email,
advertising_partner,
)
if matched_reservation is None:
# If we still can't narrow it down, use the first match and log warning
_LOGGER.warning(
"Could not narrow down multiple reservations for advertisingCampagne %s "
"(hotel=%s, guest=%s %s, email=%s). Using first match.",
advertising_campagne,
hotel_id,
guest_first_name,
guest_last_name,
guest_email,
)
matched_reservation = reservations[0]
else:
matched_reservation = reservations[0]
result["reservation"] = matched_reservation
# Get associated customer and hashed_customer
@@ -373,11 +430,91 @@ class ConversionService:
result["hashed_customer"] = hashed_result.scalar_one_or_none()
_LOGGER.info(
"Matched conversion to reservation_id=%s, customer_id=%s, hashed_customer_id=%s (advertisingCampagne=%s)",
"Matched conversion to reservation_id=%s, customer_id=%s, hashed_customer_id=%s "
"(advertisingCampagne=%s, guest=%s %s, email=%s)",
result["reservation"].id if result["reservation"] else None,
result["customer"].id if result["customer"] else None,
result["hashed_customer"].id if result["hashed_customer"] else None,
advertising_campagne,
guest_first_name,
guest_last_name,
guest_email,
)
return result
def _filter_reservations_by_guest_details(
self,
reservations: list[Reservation],
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
advertising_partner: str | None,
) -> Reservation | None:
"""Filter reservations using guest details to find a single match.
First tries to match by guest name and email. If that doesn't yield a single match,
tries matching by advertising_partner against utm_medium.
Args:
reservations: List of candidate reservations
guest_first_name: Guest first name
guest_last_name: Guest last name
guest_email: Guest email
advertising_partner: Partner info (e.g., "Facebook_Mobile_Feed")
Returns:
Single best-match Reservation, or None if no good match found
"""
candidates = reservations
# Try to narrow down by guest name and email
if guest_first_name or guest_last_name or guest_email:
# First try exact match on all available fields
for reservation in candidates:
customer = reservation.customer
if customer:
name_match = True
email_match = True
if guest_first_name:
name_match = name_match and (
customer.given_name
and customer.given_name.lower() == guest_first_name.lower()
)
if guest_last_name:
name_match = name_match and (
customer.surname
and customer.surname.lower() == guest_last_name.lower()
)
if guest_email:
email_match = (
customer.email_address
and customer.email_address.lower() == guest_email.lower()
)
if name_match and email_match:
_LOGGER.debug(
"Found exact match on guest name/email for %s %s",
guest_first_name,
guest_last_name,
)
return reservation
# Try to narrow down by advertising_partner matching utm_medium
if advertising_partner:
for reservation in candidates:
if (
reservation.utm_medium
and reservation.utm_medium.lower() == advertising_partner.lower()
):
_LOGGER.debug(
"Found match on advertising_partner=%s matching utm_medium",
advertising_partner,
)
return reservation
# No single clear match found
return None

View File

@@ -365,6 +365,12 @@ class Conversion(Base):
reservation_type = Column(String) # type attribute (e.g., "reservation")
booking_channel = Column(String) # bookingChannel attribute
# Guest information from reservation XML - used for matching
guest_first_name = Column(String, index=True) # firstName from guest element
guest_last_name = Column(String, index=True) # lastName from guest element
guest_email = Column(String, index=True) # email from guest element
guest_country_code = Column(String) # countryCode from guest element
# Advertising/tracking data - used for matching to existing reservations
advertising_medium = Column(
String, index=True

View File

@@ -308,6 +308,43 @@ async def _backfill_acked_requests_username(engine: AsyncEngine, config: dict[st
_LOGGER.info("Backfill complete: %d acknowledgements updated with username", total_updated)
async def migrate_add_guest_fields_to_conversions(engine: AsyncEngine) -> None:
"""Migration: Add guest information fields to conversions table.
This migration adds guest details from the PMS XML for improved matching:
- guest_first_name: First name of the guest
- guest_last_name: Last name of the guest
- guest_email: Email address of the guest
- guest_country_code: Country code of the guest
These fields are indexed to support efficient matching when the same
fbclid/gclid matches multiple reservations.
Safe to run multiple times - will skip if columns already exist.
"""
_LOGGER.info("Running migration: add_guest_fields_to_conversions")
added_count = 0
# Add each column if it doesn't exist
if await add_column_if_not_exists(engine, "conversions", "guest_first_name", "VARCHAR"):
added_count += 1
if await add_column_if_not_exists(engine, "conversions", "guest_last_name", "VARCHAR"):
added_count += 1
if await add_column_if_not_exists(engine, "conversions", "guest_email", "VARCHAR"):
added_count += 1
if await add_column_if_not_exists(engine, "conversions", "guest_country_code", "VARCHAR"):
added_count += 1
if added_count > 0:
_LOGGER.info("Migration add_guest_fields_to_conversions: Added %d columns", added_count)
else:
_LOGGER.info("Migration add_guest_fields_to_conversions: No changes needed (already applied)")
async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None = None) -> None:
"""Run all pending migrations.
@@ -325,6 +362,7 @@ async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None
await migrate_add_room_types(engine)
await migrate_add_advertising_account_ids(engine, config)
await migrate_add_username_to_acked_requests(engine, config)
await migrate_add_guest_fields_to_conversions(engine)
_LOGGER.info("Database migrations completed successfully")