Adding guests to conversion_import

Fixed export
2025-11-17 09:22:35 +01:00 · 2025-11-17 09:13:33 +01:00
7 changed files with 964563 additions and 10 deletions
--- a/examples/Reservierungen_bemelman_20251117_064824.xml
+++ b/examples/Reservierungen_bemelman_20251117_064824.xml
--- a/examples/formatted_reservierungen.xml
+++ b/examples/formatted_reservierungen.xml
--- a/format_xml.py
+++ b/format_xml.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+"""Format a large XML file for readability."""
+
+import xml.dom.minidom
+import sys
+from pathlib import Path
+
+def format_xml(input_path, output_path=None):
+    """Format XML file with proper indentation."""
+    input_file = Path(input_path)
+
+    if not input_file.exists():
+        print(f"Error: File {input_path} not found", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Reading {input_file.name}...", file=sys.stderr)
+    with open(input_file, 'r', encoding='utf-8') as f:
+        xml_content = f.read()
+
+    print("Parsing XML...", file=sys.stderr)
+    dom = xml.dom.minidom.parseString(xml_content)
+
+    print("Formatting XML...", file=sys.stderr)
+    pretty_xml = dom.toprettyxml(indent="  ")
+
+    # Remove extra blank lines that toprettyxml adds
+    pretty_xml = "\n".join([line for line in pretty_xml.split("\n") if line.strip()])
+
+    if output_path is None:
+        output_path = input_file.with_stem(input_file.stem + "_formatted")
+
+    print(f"Writing formatted XML to {output_path}...", file=sys.stderr)
+    with open(output_path, 'w', encoding='utf-8') as f:
+        f.write(pretty_xml)
+
+    print(f"Done! Formatted XML saved to {output_path}", file=sys.stderr)
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python format_xml.py <input_file> [output_file]", file=sys.stderr)
+        sys.exit(1)
+
+    input_file = sys.argv[1]
+    output_file = sys.argv[2] if len(sys.argv) > 2 else None
+
+    format_xml(input_file, output_file)
--- a/src/alpine_bits_python/api.py
+++ b/src/alpine_bits_python/api.py
@@ -7,6 +7,7 @@ import multiprocessing
 import os
 import traceback
 import urllib.parse
+import xml.dom.minidom
 from collections import defaultdict
 from datetime import date, datetime
 from functools import partial
@@ -1323,8 +1324,17 @@ async def handle_xml_upload(
        extension = Path(filename).suffix or ".xml"
        log_filename = logs_dir / f"{base_filename}_{username}_{timestamp}{extension}"

-        # Save XML content to file
-        log_filename.write_text(xml_content, encoding="utf-8")
+        # Format and save XML content to file
+        try:
+            dom = xml.dom.minidom.parseString(xml_content)
+            pretty_xml = dom.toprettyxml(indent="  ")
+            # Remove extra blank lines that toprettyxml adds
+            pretty_xml = "\n".join([line for line in pretty_xml.split("\n") if line.strip()])
+            log_filename.write_text(pretty_xml, encoding="utf-8")
+        except Exception as e:
+            # If formatting fails, save the original content
+            _LOGGER.warning("Failed to format XML: %s. Saving unformatted.", str(e))
+            log_filename.write_text(xml_content, encoding="utf-8")

        _LOGGER.info(
            "XML file saved to %s by user %s (original: %s)",
--- a/src/alpine_bits_python/conversion_service.py
+++ b/src/alpine_bits_python/conversion_service.py
@@ -131,6 +131,19 @@ class ConversionService:
        reservation_type = reservation_elem.get("type")
        booking_channel = reservation_elem.get("bookingChannel")

+        # Extract guest information from guest element
+        guest_elem = reservation_elem.find("guest")
+        guest_first_name = None
+        guest_last_name = None
+        guest_email = None
+        guest_country_code = None
+
+        if guest_elem is not None:
+            guest_first_name = guest_elem.get("firstName")
+            guest_last_name = guest_elem.get("lastName")
+            guest_email = guest_elem.get("email")
+            guest_country_code = guest_elem.get("countryCode")
+
        # Advertising/tracking data
        advertising_medium = reservation_elem.get("advertisingMedium")
        advertising_partner = reservation_elem.get("advertisingPartner")
@@ -159,14 +172,20 @@ class ConversionService:
                    "Invalid creation time format: %s", creation_time_str
                )

-        # Find matching reservation, customer, and hashed_customer using advertising data
+        # Find matching reservation, customer, and hashed_customer using advertising data and guest details
        matched_reservation = None
        matched_customer = None
        matched_hashed_customer = None

        if advertising_campagne:
            match_result = await self._find_matching_entities(
-                advertising_campagne, hotel_id, reservation_date
+                advertising_campagne,
+                hotel_id,
+                reservation_date,
+                guest_first_name,
+                guest_last_name,
+                guest_email,
+                advertising_partner,
            )
            matched_reservation = match_result["reservation"]
            matched_customer = match_result["customer"]
@@ -250,6 +269,11 @@ class ConversionService:
                    creation_time=creation_time,
                    reservation_type=reservation_type,
                    booking_channel=booking_channel,
+                    # Guest information
+                    guest_first_name=guest_first_name,
+                    guest_last_name=guest_last_name,
+                    guest_email=guest_email,
+                    guest_country_code=guest_country_code,
                    # Advertising data
                    advertising_medium=advertising_medium,
                    advertising_partner=advertising_partner,
@@ -295,16 +319,26 @@ class ConversionService:
        advertising_campagne: str,
        hotel_id: str | None,
        reservation_date: Any,
+        guest_first_name: str | None = None,
+        guest_last_name: str | None = None,
+        guest_email: str | None = None,
+        advertising_partner: str | None = None,
    ) -> dict[str, Any]:
        """Find matching Reservation, Customer, and HashedCustomer using advertising data.

        The advertisingCampagne field contains a truncated (64 char) version of
-        fbclid/gclid, so we use prefix matching.
+        fbclid/gclid, so we use prefix matching. When multiple matches exist,
+        uses guest details (first_name, last_name, email) and utm_medium
+        (matched against advertisingPartner) to narrow down to a single match.

        Args:
            advertising_campagne: Truncated tracking ID from conversion XML
            hotel_id: Hotel ID for additional filtering
            reservation_date: Reservation date for additional filtering
+            guest_first_name: Guest first name for disambiguation
+            guest_last_name: Guest last name for disambiguation
+            guest_email: Guest email for disambiguation
+            advertising_partner: Partner info (matches utm_medium for additional filtering)

        Returns:
            Dictionary with 'reservation', 'customer', and 'hashed_customer' keys
@@ -344,16 +378,39 @@ class ConversionService:
            )
            return result

+        # If multiple matches, try to narrow down using guest details and advertising_partner
        if len(reservations) > 1:
-            _LOGGER.warning(
-                "Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. Using first match.",
+            _LOGGER.debug(
+                "Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. "
+                "Attempting to narrow down using guest details.",
                advertising_campagne,
                hotel_id,
                len(reservations),
            )

-        # Use the first matching reservation
-        matched_reservation = reservations[0]
+            matched_reservation = self._filter_reservations_by_guest_details(
+                reservations,
+                guest_first_name,
+                guest_last_name,
+                guest_email,
+                advertising_partner,
+            )
+
+            if matched_reservation is None:
+                # If we still can't narrow it down, use the first match and log warning
+                _LOGGER.warning(
+                    "Could not narrow down multiple reservations for advertisingCampagne %s "
+                    "(hotel=%s, guest=%s %s, email=%s). Using first match.",
+                    advertising_campagne,
+                    hotel_id,
+                    guest_first_name,
+                    guest_last_name,
+                    guest_email,
+                )
+                matched_reservation = reservations[0]
+        else:
+            matched_reservation = reservations[0]
+
        result["reservation"] = matched_reservation

        # Get associated customer and hashed_customer
@@ -373,11 +430,91 @@ class ConversionService:
                result["hashed_customer"] = hashed_result.scalar_one_or_none()

        _LOGGER.info(
-            "Matched conversion to reservation_id=%s, customer_id=%s, hashed_customer_id=%s (advertisingCampagne=%s)",
+            "Matched conversion to reservation_id=%s, customer_id=%s, hashed_customer_id=%s "
+            "(advertisingCampagne=%s, guest=%s %s, email=%s)",
            result["reservation"].id if result["reservation"] else None,
            result["customer"].id if result["customer"] else None,
            result["hashed_customer"].id if result["hashed_customer"] else None,
            advertising_campagne,
+            guest_first_name,
+            guest_last_name,
+            guest_email,
        )

        return result
+
+    def _filter_reservations_by_guest_details(
+        self,
+        reservations: list[Reservation],
+        guest_first_name: str | None,
+        guest_last_name: str | None,
+        guest_email: str | None,
+        advertising_partner: str | None,
+    ) -> Reservation | None:
+        """Filter reservations using guest details to find a single match.
+
+        First tries to match by guest name and email. If that doesn't yield a single match,
+        tries matching by advertising_partner against utm_medium.
+
+        Args:
+            reservations: List of candidate reservations
+            guest_first_name: Guest first name
+            guest_last_name: Guest last name
+            guest_email: Guest email
+            advertising_partner: Partner info (e.g., "Facebook_Mobile_Feed")
+
+        Returns:
+            Single best-match Reservation, or None if no good match found
+        """
+        candidates = reservations
+
+        # Try to narrow down by guest name and email
+        if guest_first_name or guest_last_name or guest_email:
+            # First try exact match on all available fields
+            for reservation in candidates:
+                customer = reservation.customer
+                if customer:
+                    name_match = True
+                    email_match = True
+
+                    if guest_first_name:
+                        name_match = name_match and (
+                            customer.given_name
+                            and customer.given_name.lower() == guest_first_name.lower()
+                        )
+
+                    if guest_last_name:
+                        name_match = name_match and (
+                            customer.surname
+                            and customer.surname.lower() == guest_last_name.lower()
+                        )
+
+                    if guest_email:
+                        email_match = (
+                            customer.email_address
+                            and customer.email_address.lower() == guest_email.lower()
+                        )
+
+                    if name_match and email_match:
+                        _LOGGER.debug(
+                            "Found exact match on guest name/email for %s %s",
+                            guest_first_name,
+                            guest_last_name,
+                        )
+                        return reservation
+
+        # Try to narrow down by advertising_partner matching utm_medium
+        if advertising_partner:
+            for reservation in candidates:
+                if (
+                    reservation.utm_medium
+                    and reservation.utm_medium.lower() == advertising_partner.lower()
+                ):
+                    _LOGGER.debug(
+                        "Found match on advertising_partner=%s matching utm_medium",
+                        advertising_partner,
+                    )
+                    return reservation
+
+        # No single clear match found
+        return None
--- a/src/alpine_bits_python/db.py
+++ b/src/alpine_bits_python/db.py
@@ -365,6 +365,12 @@ class Conversion(Base):
    reservation_type = Column(String)  # type attribute (e.g., "reservation")
    booking_channel = Column(String)  # bookingChannel attribute

+    # Guest information from reservation XML - used for matching
+    guest_first_name = Column(String, index=True)  # firstName from guest element
+    guest_last_name = Column(String, index=True)  # lastName from guest element
+    guest_email = Column(String, index=True)  # email from guest element
+    guest_country_code = Column(String)  # countryCode from guest element
+
    # Advertising/tracking data - used for matching to existing reservations
    advertising_medium = Column(
        String, index=True
--- a/src/alpine_bits_python/migrations.py
+++ b/src/alpine_bits_python/migrations.py
@@ -308,6 +308,43 @@ async def _backfill_acked_requests_username(engine: AsyncEngine, config: dict[st
    _LOGGER.info("Backfill complete: %d acknowledgements updated with username", total_updated)


+async def migrate_add_guest_fields_to_conversions(engine: AsyncEngine) -> None:
+    """Migration: Add guest information fields to conversions table.
+
+    This migration adds guest details from the PMS XML for improved matching:
+    - guest_first_name: First name of the guest
+    - guest_last_name: Last name of the guest
+    - guest_email: Email address of the guest
+    - guest_country_code: Country code of the guest
+
+    These fields are indexed to support efficient matching when the same
+    fbclid/gclid matches multiple reservations.
+
+    Safe to run multiple times - will skip if columns already exist.
+    """
+    _LOGGER.info("Running migration: add_guest_fields_to_conversions")
+
+    added_count = 0
+
+    # Add each column if it doesn't exist
+    if await add_column_if_not_exists(engine, "conversions", "guest_first_name", "VARCHAR"):
+        added_count += 1
+
+    if await add_column_if_not_exists(engine, "conversions", "guest_last_name", "VARCHAR"):
+        added_count += 1
+
+    if await add_column_if_not_exists(engine, "conversions", "guest_email", "VARCHAR"):
+        added_count += 1
+
+    if await add_column_if_not_exists(engine, "conversions", "guest_country_code", "VARCHAR"):
+        added_count += 1
+
+    if added_count > 0:
+        _LOGGER.info("Migration add_guest_fields_to_conversions: Added %d columns", added_count)
+    else:
+        _LOGGER.info("Migration add_guest_fields_to_conversions: No changes needed (already applied)")
+
+
 async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None = None) -> None:
    """Run all pending migrations.

@@ -325,6 +362,7 @@ async def run_all_migrations(engine: AsyncEngine, config: dict[str, Any] | None
        await migrate_add_room_types(engine)
        await migrate_add_advertising_account_ids(engine, config)
        await migrate_add_username_to_acked_requests(engine, config)
+        await migrate_add_guest_fields_to_conversions(engine)

        _LOGGER.info("Database migrations completed successfully")
Author	SHA1	Message	Date
Jonas Linter	0c37254317	Adding guests to conversion_import	2025-11-17 09:22:35 +01:00
Jonas Linter	9b82be9a6e	Fixed export	2025-11-17 09:13:33 +01:00