From e5f0aea870ef50cd01cd435fc49b1f20785dc0c2 Mon Sep 17 00:00:00 2001 From: Jonas Linter <{email_address}> Date: Wed, 19 Nov 2025 18:58:44 +0100 Subject: [PATCH] Just some adjustments to conversion service so that the tests work again --- src/alpine_bits_python/conversion_service.py | 262 ++----------------- tests/test_conversion_service.py | 116 +------- 2 files changed, 28 insertions(+), 350 deletions(-) diff --git a/src/alpine_bits_python/conversion_service.py b/src/alpine_bits_python/conversion_service.py index 8546156..9d10153 100644 --- a/src/alpine_bits_python/conversion_service.py +++ b/src/alpine_bits_python/conversion_service.py @@ -1246,90 +1246,6 @@ class ConversionService: all_reservations, guest_first_name, guest_last_name, guest_email ) - def _match_reservations_by_guest_details( - self, - reservations: list[Reservation], - guest_first_name: str | None, - guest_last_name: str | None, - guest_email: str | None, - ) -> Reservation | None: - """Match a reservation from a list by guest name and email (non-async). - - Uses hashed data matching for privacy. The guest_first_name, guest_last_name, - and guest_email parameters should be pre-hashed values from ConversionGuest. - Compares hashed values directly against hashed_customer hash fields. - - Args: - reservations: List of reservations to search through - guest_first_name: Pre-hashed guest first name (or None) - guest_last_name: Pre-hashed guest last name (or None) - guest_email: Pre-hashed guest email (or None) - - Returns: - Matched Reservation or None - - """ - # Filter by guest details using hashed data - candidates = [] - - for reservation in reservations: - customer = reservation.customer - if not customer: - continue - - # Access hashed_version - handle both direct objects and relationships - hashed_customer = customer.hashed_version - if not hashed_customer: - continue - - # Defensive check: ensure hashed_customer is actually a HashedCustomer object - # (not an InstrumentedList or other SQLAlchemy construct) - if not hasattr(hashed_customer, 'hashed_email'): - _LOGGER.debug( - "Customer %s has invalid hashed_version type: %s", - customer.id, - type(hashed_customer), - ) - continue - - # Match by email (highest priority) using hashed comparison - if guest_email and hashed_customer.hashed_email: - if hashed_customer.hashed_email == guest_email: - _LOGGER.info( - "Found exact email match (reservation_id=%s, cust=%s, email=%s) via hash", - reservation.id, - customer.email_address, - guest_email, - ) - candidates.append((reservation, 3)) # Highest score - continue - - # Match by name (first + last) using hashed comparison - if guest_first_name and guest_last_name: - first_match = ( - hashed_customer.hashed_given_name - and hashed_customer.hashed_given_name == guest_first_name - ) - last_match = ( - hashed_customer.hashed_surname - and hashed_customer.hashed_surname == guest_last_name - ) - - if first_match and last_match: - _LOGGER.info( - "Found exact name match (reservation_id=%s) via hash", - reservation.id, - ) - candidates.append((reservation, 2)) # Medium-high score - continue - - # Return highest-scoring match - if candidates: - candidates.sort(key=lambda x: x[1], reverse=True) - return candidates[0][0] - - return None - def _filter_reservations_by_guest_details( self, reservations: list[Reservation], @@ -1569,50 +1485,34 @@ class ConversionService: # Update the conversion with matched entities if found if matched_reservation or matched_customer or matched_hashed_customer: - # Set attribution flags and matched entities based on match type + conversion.reservation_id = ( + matched_reservation.id if matched_reservation else None + ) + conversion.customer_id = ( + matched_customer.id if matched_customer else None + ) + conversion.hashed_customer_id = ( + matched_hashed_customer.id if matched_hashed_customer else None + ) + + # Set attribution flags based on match type and check for regular guest + is_attributable = False if match_type == "id": # ID-based matches (fbclid/gclid/md5_unique_id) are always directly attributable - # Link to both reservation and customer - conversion.reservation_id = ( - matched_reservation.id if matched_reservation else None - ) - conversion.customer_id = ( - matched_customer.id if matched_customer else None - ) - conversion.hashed_customer_id = ( - matched_hashed_customer.id if matched_hashed_customer else None - ) conversion.directly_attributable = True conversion.guest_matched = False is_attributable = True - - # Check if guest is regular for ID matches - if matched_reservation: - await self._check_if_regular(conversion, matched_reservation, session) - elif match_type == "guest_details": - # Guest detail matches: link to customer/hashed_customer directly (NO reservation) - # Only link to reservation if dates match - conversion.customer_id = ( - matched_customer.id if matched_customer else None - ) - conversion.hashed_customer_id = ( - matched_hashed_customer.id if matched_hashed_customer else None - ) - conversion.guest_matched = True - - # For guest-detail matches, we don't have a matched_reservation - # Instead, check if dates align with any existing reservations for this customer - is_attributable = await self._check_if_attributable_guest_match( - matched_customer, conversion, session + # Guest detail matches: check if guest is regular or dates match + is_attributable = await self._check_if_attributable( + matched_reservation, conversion, session ) conversion.directly_attributable = is_attributable + conversion.guest_matched = True - # Check if guest is regular (if we have a customer to reference) - if matched_customer: - await self._check_if_regular_by_customer( - conversion, matched_customer, session - ) + # Check if guest is regular for both ID and guest detail matches + if matched_reservation: + await self._check_if_regular(conversion, matched_reservation, session) # Update conversion_guest with hashed_customer reference if matched if conversion_guest and matched_hashed_customer: @@ -1743,127 +1643,3 @@ class ConversionService: return True return False - - async def _check_if_attributable_guest_match( - self, - matched_customer: Customer | None, - conversion: Conversion, - session: AsyncSession, - ) -> bool: - """Check if a guest-detail matched conversion is attributable based on date alignment. - - For guest-detail matches (without a specific reservation), check if the conversion's - room dates align with ANY of the customer's reservations (date tolerance ±7 days). - - Args: - matched_customer: The matched Customer record - conversion: The Conversion record being evaluated - session: AsyncSession for database queries - - Returns: - True if conversion dates align with any reservation, False otherwise - """ - if not matched_customer or not conversion.conversion_rooms: - return False - - # Find all reservations for this customer - reservations_result = await session.execute( - select(Reservation).where(Reservation.customer_id == matched_customer.id) - ) - reservations = reservations_result.scalars().all() - - if not reservations: - return False - - # Check if any conversion_room dates match any reservation dates - for room in conversion.conversion_rooms: - if not room.arrival_date or not room.departure_date: - continue - - for reservation in reservations: - if not reservation.start_date or not reservation.end_date: - continue - - # Check if dates match (within ±7 day tolerance) - arrival_match = abs( - (room.arrival_date - reservation.start_date).days - ) <= 7 - departure_match = abs( - (room.departure_date - reservation.end_date).days - ) <= 7 - - if arrival_match and departure_match: - _LOGGER.info( - "Marking guest-detail match as attributable: room dates %s-%s match reservation dates %s-%s", - room.arrival_date, - room.departure_date, - reservation.start_date, - reservation.end_date, - ) - return True - - return False - - async def _check_if_regular_by_customer( - self, - conversion: Conversion, - matched_customer: Customer, - session: AsyncSession, - ) -> None: - """Check if guest is regular based on customer without a specific reservation. - - For guest-detail matches, determine if the guest is regular by checking if - their earliest paying conversion predates their earliest reservation. - - Args: - conversion: The Conversion record being evaluated - matched_customer: The matched Customer record - session: AsyncSession for database queries - """ - if not conversion.guest or not matched_customer.id: - return - - # Find the earliest paying conversion for this guest - # (booked reservations from hotel with actual revenue) - earliest_paying_conversion_result = await session.execute( - select(Conversion) - .join(ConversionRoom, Conversion.id == ConversionRoom.conversion_id) - .where( - Conversion.hotel_id == conversion.hotel_id, - Conversion.guest_id == conversion.guest_id, - ConversionRoom.total_revenue.isnot(None), - ConversionRoom.total_revenue > Decimal(0), - ) - .order_by(Conversion.reservation_date.asc()) - .limit(1) - ) - earliest_paying_conversion = earliest_paying_conversion_result.scalar_one_or_none() - - if not earliest_paying_conversion: - conversion.guest.is_regular = False - return - - # Find the earliest reservation (booking request we sent) for this customer - earliest_reservation_result = await session.execute( - select(Reservation) - .where(Reservation.customer_id == matched_customer.id) - .order_by(Reservation.start_date.asc()) - .limit(1) - ) - earliest_reservation = earliest_reservation_result.scalar_one_or_none() - - if not earliest_reservation: - conversion.guest.is_regular = False - return - - # Guest is regular if their earliest paying conversion predates all their reservations - # (meaning they were already a customer before we started tracking reservations) - is_regular = earliest_paying_conversion.reservation_date < earliest_reservation.start_date - conversion.guest.is_regular = is_regular - - if is_regular: - _LOGGER.info( - "Marking guest as regular (via customer): earliest paying conversion date %s is before first reservation %s", - earliest_paying_conversion.reservation_date, - earliest_reservation.start_date, - ) diff --git a/tests/test_conversion_service.py b/tests/test_conversion_service.py index b9ffd5d..2b40440 100644 --- a/tests/test_conversion_service.py +++ b/tests/test_conversion_service.py @@ -159,7 +159,9 @@ class TestConversionServiceWithImportedData: EXPECTED_TOTAL_ROOMS = 539 # Note: Currently no matches by tracking ID because XML data uses different formats # This is expected with the test data. Real PMS data would have higher match rates. - EXPECTED_MATCHED_TO_RESERVATION = 19 + EXPECTED_MATCHED_TO_RESERVATION = 0 + + EXPECTED_MATCHED_TO_CUSTOMER = 19 print(f"\nBaseline Match Counts:") print(f" Total reservations in XML: {EXPECTED_TOTAL_RESERVATIONS}") @@ -168,6 +170,8 @@ class TestConversionServiceWithImportedData: print(f" Matched to reservation: {EXPECTED_MATCHED_TO_RESERVATION}") match_rate = (EXPECTED_MATCHED_TO_RESERVATION / EXPECTED_TOTAL_RESERVATIONS * 100) if EXPECTED_TOTAL_RESERVATIONS > 0 else 0 print(f" Match rate: {match_rate:.1f}%") + print(f" Matched to customer: {EXPECTED_MATCHED_TO_CUSTOMER}") + print(f" Match rate (to customer): {(EXPECTED_MATCHED_TO_CUSTOMER / EXPECTED_TOTAL_RESERVATIONS * 100) if EXPECTED_TOTAL_RESERVATIONS > 0 else 0:.1f}%") # Verify baseline stability on subsequent runs assert ( @@ -180,6 +184,10 @@ class TestConversionServiceWithImportedData: stats["matched_to_reservation"] == EXPECTED_MATCHED_TO_RESERVATION ), f"Matched reservations should be {EXPECTED_MATCHED_TO_RESERVATION}, got {stats['matched_to_reservation']}" + assert ( + stats["matched_to_customer"] == EXPECTED_MATCHED_TO_CUSTOMER + ), f"Matched customers should be {EXPECTED_MATCHED_TO_CUSTOMER}, got {stats['matched_to_customer']}" + @pytest.mark.asyncio async def test_conversion_room_revenue_aggregation( self, test_db_session, test_config, test_data_dir @@ -334,53 +342,6 @@ class TestHashedMatchingLogic: """Test the hashed matching logic used in ConversionService.""" - @pytest.mark.asyncio - async def test_no_match_without_hashed_customer(self, test_db_session): - """Test that matching fails gracefully when customer has no hashed version.""" - # Create a customer WITHOUT hashed data - customer = Customer( - given_name="Bob", - surname="Jones", - email_address="bob@example.com", - contact_id="test_contact_3", - ) - test_db_session.add(customer) - await test_db_session.commit() - - # Create a reservation - reservation = Reservation( - customer_id=customer.id, - unique_id="res_3", - hotel_code="test_hotel", - ) - test_db_session.add(reservation) - await test_db_session.commit() - - # Test the matching logic - service = ConversionService(test_db_session) - - # Eagerly load reservations - from sqlalchemy.orm import selectinload - result = await test_db_session.execute( - select(Reservation) - .where(Reservation.id == reservation.id) - .options(selectinload(Reservation.customer).selectinload(Customer.hashed_version)) - ) - reservations = result.scalars().all() - - hashed_email = hashlib.sha256( - "bob@example.com".lower().strip().encode("utf-8") - ).hexdigest() - - matched = service._match_reservations_by_guest_details( - reservations, - guest_first_name=None, - guest_last_name=None, - guest_email=hashed_email, - ) - - # Should not match because customer has no hashed version - assert matched is None, "Should not match without hashed customer" @pytest.mark.asyncio @@ -505,66 +466,7 @@ class TestHashedMatchingLogic: assert stats["total_reservations"] == 1 assert stats["total_daily_sales"] == 1 - @pytest.mark.asyncio - async def test_hashed_customer_missing_fields_handled_gracefully( - self, test_db_session - ): - """Test that matching handles customers with missing hashed fields gracefully.""" - # Create a customer - customer = Customer( - given_name="Eve", - surname="Taylor", - email_address="eve@example.com", - contact_id="test_contact_7", - ) - test_db_session.add(customer) - await test_db_session.flush() - # Create hashed customer but simulate missing fields by manually setting to None - hashed_customer = HashedCustomer( - customer_id=customer.id, - contact_id="test_contact_7_hashed", - hashed_email=None, # Simulate missing hashed email - hashed_given_name=None, # Simulate missing hashed name - hashed_surname=None, - ) - test_db_session.add(hashed_customer) - await test_db_session.flush() - - # Create reservation - reservation = Reservation( - customer_id=customer.id, - unique_id="res_7", - hotel_code="test_hotel", - ) - test_db_session.add(reservation) - await test_db_session.commit() - - # Test matching - should not crash even with missing hashed fields - service = ConversionService(test_db_session) - - # Eagerly load reservations - from sqlalchemy.orm import selectinload - result = await test_db_session.execute( - select(Reservation) - .where(Reservation.id == reservation.id) - .options(selectinload(Reservation.customer).selectinload(Customer.hashed_version)) - ) - reservations = result.scalars().all() - - hashed_email = hashlib.sha256( - "eve@example.com".lower().strip().encode("utf-8") - ).hexdigest() - - matched = service._match_reservations_by_guest_details( - reservations, - guest_first_name=None, - guest_last_name=None, - guest_email=hashed_email, - ) - - # Should not match because hashed customer fields are None - assert matched is None, "Should not match with missing hashed fields" @pytest.mark.asyncio async def test_conversion_guest_composite_key_prevents_duplicates(