From 45452ac918d03110557b9a4be72f3be283b4bd65 Mon Sep 17 00:00:00 2001 From: Jonas Linter <{email_address}> Date: Wed, 19 Nov 2025 14:49:42 +0100 Subject: [PATCH] Significant refactorings --- src/alpine_bits_python/conversion_service.py | 176 ++++++++++++++----- src/alpine_bits_python/db.py | 8 +- 2 files changed, 132 insertions(+), 52 deletions(-) diff --git a/src/alpine_bits_python/conversion_service.py b/src/alpine_bits_python/conversion_service.py index c6ccfea..1b4b49a 100644 --- a/src/alpine_bits_python/conversion_service.py +++ b/src/alpine_bits_python/conversion_service.py @@ -515,20 +515,30 @@ class ConversionService: existing_conversion = existing_result.scalar_one_or_none() if existing_conversion: - # Update existing conversion (matching will be done later) + # Update existing conversion - only update reservation metadata and advertising data + # Don't overwrite guest info (will be handled by matching logic which uses hashed data) + # Don't clear reservation/customer links (matching logic will update if needed) existing_conversion.reservation_number = reservation_number existing_conversion.reservation_date = reservation_date existing_conversion.creation_time = creation_time existing_conversion.reservation_type = reservation_type existing_conversion.booking_channel = booking_channel - existing_conversion.guest_first_name = guest_first_name - existing_conversion.guest_last_name = guest_last_name - existing_conversion.guest_email = guest_email - existing_conversion.guest_country_code = guest_country_code - existing_conversion.guest_birth_date = guest_birth_date existing_conversion.advertising_medium = advertising_medium existing_conversion.advertising_partner = advertising_partner existing_conversion.advertising_campagne = advertising_campagne + + # Update guest info only if new data is provided (not None) + if guest_first_name: + existing_conversion.guest_first_name = guest_first_name + if guest_last_name: + existing_conversion.guest_last_name = guest_last_name + if guest_email: + existing_conversion.guest_email = guest_email + if guest_country_code: + existing_conversion.guest_country_code = guest_country_code + if guest_birth_date: + existing_conversion.guest_birth_date = guest_birth_date + existing_conversion.updated_at = datetime.now() conversion = existing_conversion _LOGGER.info( @@ -748,53 +758,112 @@ class ConversionService: ) # Now that conversion, conversion_guest, and conversion_room records exist, - # perform matching using hashed guest data from conversion_guest - matched_reservation = None - matched_customer = None - matched_hashed_customer = None + # perform matching using hashed guest data + match_stats = await self._match_conversion( + conversion, + guest_first_name, + guest_last_name, + guest_email, + advertising_campagne, + advertising_partner, + hotel_id, + reservation_date, + session, + ) - if advertising_campagne or True: - # Use hashed data from conversion_guest for matching - hashed_first_name = conversion_guest.hashed_first_name if conversion_guest else None - hashed_last_name = conversion_guest.hashed_last_name if conversion_guest else None - hashed_email = conversion_guest.hashed_email if conversion_guest else None + # Update stats + stats["matched_to_reservation"] = match_stats["matched_to_reservation"] + stats["matched_to_customer"] = match_stats["matched_to_customer"] + stats["matched_to_hashed_customer"] = match_stats["matched_to_hashed_customer"] + stats["unmatched"] = match_stats["unmatched"] - match_result = await self._find_matching_entities( - advertising_campagne, - hotel_id, - reservation_date, - hashed_first_name, - hashed_last_name, - hashed_email, - advertising_partner, - session, + return stats + + async def _match_conversion( + self, + conversion: Conversion, + guest_first_name: str | None, + guest_last_name: str | None, + guest_email: str | None, + advertising_campagne: str | None, + advertising_partner: str | None, + hotel_id: str | None, + reservation_date: Any, + session: AsyncSession | None = None, + ) -> dict[str, int]: + """Match a conversion to reservations and customers using guest and advertising data. + + This is the matching phase that runs AFTER conversion data has been stored. + It uses hashed guest data to match conversions to existing reservations/customers. + + Args: + conversion: The Conversion record to match + guest_first_name: Guest first name (will be hashed for matching) + guest_last_name: Guest last name (will be hashed for matching) + guest_email: Guest email (will be hashed for matching) + advertising_campagne: Advertising campaign identifier + advertising_partner: Advertising partner info + hotel_id: Hotel ID for filtering matches + reservation_date: Reservation date for additional filtering + session: AsyncSession to use for database queries + + Returns: + Dictionary with match statistics: matched_to_reservation, matched_to_customer, + matched_to_hashed_customer, and unmatched (all counts of 0 or 1) + """ + if session is None: + session = self.session + + stats = { + "matched_to_reservation": 0, + "matched_to_customer": 0, + "matched_to_hashed_customer": 0, + "unmatched": 0, + } + + # Hash guest data for matching (same hashing logic as ConversionGuest) + hashed_first_name = ConversionGuest._normalize_and_hash(guest_first_name) + hashed_last_name = ConversionGuest._normalize_and_hash(guest_last_name) + hashed_email = ConversionGuest._normalize_and_hash(guest_email) + + # Find matching entities + match_result = await self._find_matching_entities( + advertising_campagne, + hotel_id, + reservation_date, + hashed_first_name, + hashed_last_name, + hashed_email, + advertising_partner, + session, + ) + + matched_reservation = match_result["reservation"] + matched_customer = match_result["customer"] + matched_hashed_customer = match_result["hashed_customer"] + + # Update the conversion with matched entities if found + if matched_reservation or matched_customer or matched_hashed_customer: + conversion.reservation_id = ( + matched_reservation.id if matched_reservation else None ) - matched_reservation = match_result["reservation"] - matched_customer = match_result["customer"] - matched_hashed_customer = match_result["hashed_customer"] + conversion.customer_id = ( + matched_customer.id if matched_customer else None + ) + conversion.hashed_customer_id = ( + matched_hashed_customer.id if matched_hashed_customer else None + ) + conversion.updated_at = datetime.now() - # Update the conversion with matched entities if found - if matched_reservation or matched_customer or matched_hashed_customer: - conversion.reservation_id = ( - matched_reservation.id if matched_reservation else None - ) - conversion.customer_id = ( - matched_customer.id if matched_customer else None - ) - conversion.hashed_customer_id = ( - matched_hashed_customer.id if matched_hashed_customer else None - ) - conversion.updated_at = datetime.now() - - # Update stats for the conversion record + # Update stats if matched_reservation: - stats["matched_to_reservation"] += 1 + stats["matched_to_reservation"] = 1 if matched_customer: - stats["matched_to_customer"] += 1 + stats["matched_to_customer"] = 1 if matched_hashed_customer: - stats["matched_to_hashed_customer"] += 1 + stats["matched_to_hashed_customer"] = 1 if not any([matched_reservation, matched_customer, matched_hashed_customer]): - stats["unmatched"] += 1 + stats["unmatched"] = 1 return stats @@ -1028,11 +1097,20 @@ class ConversionService: # Get reservations from cache for this hotel if hotel_id and hotel_id in self._reservation_cache: - all_reservations = [res for res, _ in self._reservation_cache[hotel_id]] + # Extract reservations AND reattach their cached hashed_customer relationships + for reservation, hashed_customer in self._reservation_cache[hotel_id]: + if reservation.customer: + # Manually set the hashed_version from cache to ensure it's available + reservation.customer.hashed_version = hashed_customer + all_reservations.append(reservation) elif not hotel_id: # If no hotel_id specified, use all cached reservations for reservations_list in self._reservation_cache.values(): - all_reservations.extend([res for res, _ in reservations_list]) + for reservation, hashed_customer in reservations_list: + if reservation.customer: + # Manually set the hashed_version from cache to ensure it's available + reservation.customer.hashed_version = hashed_customer + all_reservations.append(reservation) if all_reservations: _LOGGER.debug( @@ -1117,8 +1195,10 @@ class ConversionService: if guest_email and hashed_customer.hashed_email: if hashed_customer.hashed_email == guest_email: _LOGGER.info( - "Found exact email match (reservation_id=%s) via hash", + "Found exact email match (reservation_id=%s, cust=%s, email=%s) via hash", reservation.id, + customer.email_address, + guest_email, ) candidates.append((reservation, 3)) # Highest score continue diff --git a/src/alpine_bits_python/db.py b/src/alpine_bits_python/db.py index b5c46bd..adb9651 100644 --- a/src/alpine_bits_python/db.py +++ b/src/alpine_bits_python/db.py @@ -22,7 +22,7 @@ from sqlalchemy.ext.asyncio import ( async_sessionmaker, create_async_engine, ) -from sqlalchemy.orm import declarative_base, relationship +from sqlalchemy.orm import backref, declarative_base, relationship from .logging_config import get_logger @@ -347,7 +347,7 @@ class HashedCustomer(Base): __tablename__ = "hashed_customers" id = Column(Integer, primary_key=True) customer_id = Column( - Integer, ForeignKey("customers.id"), unique=True, nullable=False + Integer, ForeignKey("customers.id", ondelete="SET NULL"), unique=True, nullable=True ) contact_id = Column(String, unique=True) # Keep unhashed for reference hashed_email = Column(String(64)) # SHA256 produces 64 hex chars @@ -361,7 +361,7 @@ class HashedCustomer(Base): hashed_birth_date = Column(String(64)) created_at = Column(DateTime(timezone=True)) - customer = relationship("Customer", backref="hashed_version") + customer = relationship("Customer", backref=backref("hashed_version", uselist=False, lazy="joined")) class ConversionGuest(Base): @@ -477,7 +477,7 @@ class ConversionGuest(Base): class Reservation(Base): __tablename__ = "reservations" id = Column(Integer, primary_key=True) - customer_id = Column(Integer, ForeignKey("customers.id")) + customer_id = Column(Integer, ForeignKey("customers.id", ondelete="SET NULL")) unique_id = Column(String, unique=True) md5_unique_id = Column(String(32), unique=True) # max length 32 guaranteed start_date = Column(Date)