From 1bdef3ee119c890aeb86466aae28917f9590059c Mon Sep 17 00:00:00 2001 From: Jonas Linter <{email_address}> Date: Wed, 3 Dec 2025 11:10:27 +0100 Subject: [PATCH] HashedCustomer is now no longer necessary --- src/alpine_bits_python/conversion_service.py | 53 ++++++++++---------- src/alpine_bits_python/customer_service.py | 6 +-- test_capi.py | 2 +- tests/test_conversion_service.py | 5 +- tests/test_customer_service.py | 40 +++++++++------ 5 files changed, 56 insertions(+), 50 deletions(-) diff --git a/src/alpine_bits_python/conversion_service.py b/src/alpine_bits_python/conversion_service.py index 43847f6..5a3de97 100644 --- a/src/alpine_bits_python/conversion_service.py +++ b/src/alpine_bits_python/conversion_service.py @@ -17,7 +17,6 @@ from .db import ( ConversionGuest, ConversionRoom, Customer, - HashedCustomer, Reservation, SessionMaker, ) @@ -95,11 +94,11 @@ class ConversionService: self.hotel_id = hotel_id # Cache for reservation and customer data within a single XML processing run - # Maps hotel_code -> list of (reservation, hashed_customer) tuples + # Maps hotel_code -> list of (reservation, customer) tuples # This significantly speeds up matching when processing large XML files # Uses hashed data for matching to preserve privacy self._reservation_cache: dict[ - str | None, list[tuple[Reservation, HashedCustomer | None]] + str | None, list[tuple[Reservation, Customer | None]] ] = {} self._cache_initialized = False @@ -1146,8 +1145,8 @@ class ConversionService: guest_last_name: str | None, guest_email: str | None, session: AsyncSession | None = None, - ) -> HashedCustomer | None: - """Match guest by name and email directly to HashedCustomer (no Reservation needed). + ) -> Customer | None: + """Match guest by name and email directly to Customer (no Reservation needed). This method bypasses the Reservation table entirely and matches directly against hashed customer data. Used for guest-detail matching where we don't need to link @@ -1160,23 +1159,23 @@ class ConversionService: session: AsyncSession to use. If None, uses self.session. Returns: - Matched HashedCustomer or None + Matched Customer or None """ if session is None: session = self.session # Query all hashed customers that match the guest details - query = select(HashedCustomer).options(selectinload(HashedCustomer.customer)) + query = select(Customer) # Build filter conditions conditions = [] if guest_email: - conditions.append(HashedCustomer.hashed_email == guest_email) + conditions.append(Customer.hashed_email == guest_email) if guest_first_name and guest_last_name: conditions.append( - (HashedCustomer.hashed_given_name == guest_first_name) - & (HashedCustomer.hashed_surname == guest_last_name) + (Customer.hashed_given_name == guest_first_name) + & (Customer.hashed_surname == guest_last_name) ) if not conditions: @@ -1288,10 +1287,10 @@ class ConversionService: async def _extract_unmatched_guests( self, session: AsyncSession - ) -> dict[str, HashedCustomer]: + ) -> dict[str, Customer]: """Phase 3b: Extract unique guests from unmatched conversions and match them to customers. - Returns a mapping of guest_id -> HashedCustomer for all unique guests found in + Returns a mapping of guest_id -> Customer for all unique guests found in unmatched conversions. Only processes each guest once. This includes: @@ -1303,7 +1302,7 @@ class ConversionService: session: AsyncSession for database queries Returns: - Dictionary mapping guest_id to matched HashedCustomer (or None if no match) + Dictionary mapping guest_id to matched Customer (or None if no match) """ # Find all conversions that either: @@ -1336,7 +1335,7 @@ class ConversionService: ) # Match each unique guest to a hashed customer - guest_to_hashed_customer: dict[str, HashedCustomer] = {} + guest_to_hashed_customer: dict[str, Customer] = {} for guest_id, conversion in unique_guests.items(): conversion_guest = conversion.guest if not conversion_guest: @@ -1365,7 +1364,7 @@ class ConversionService: async def _link_matched_guests_to_reservations( self, - guest_to_hashed_customer: dict[str, HashedCustomer], + guest_to_customer_dict: dict[str, Customer], session: AsyncSession, stats: dict[str, int], ) -> None: @@ -1383,13 +1382,13 @@ class ConversionService: by looking at whether they have paying conversions that predate any reservations. Args: - guest_to_hashed_customer: Mapping from guest_id to matched HashedCustomer + guest_to_customer: Mapping from guest_id to matched Customer session: AsyncSession for database queries stats: Shared stats dictionary to update """ - for guest_id, matched_hashed_customer in guest_to_hashed_customer.items(): - if not matched_hashed_customer or not matched_hashed_customer.customer_id: + for guest_id, matched_hashed_customer in guest_to_customer_dict.items(): + if not matched_hashed_customer or not matched_hashed_customer.id: continue # Find all conversions from this guest that don't have a reservation @@ -1414,7 +1413,7 @@ class ConversionService: "Phase 3c: Processing %d conversions for guest %s (customer_id=%d)", len(conversions), guest_id, - matched_hashed_customer.customer_id, + matched_hashed_customer.id, ) # Try to link each conversion to a reservation for this customer @@ -1423,7 +1422,7 @@ class ConversionService: matched_reservation, is_attributable, ) = await self._check_if_attributable( - matched_hashed_customer.customer_id, conversion, session + matched_hashed_customer.id, conversion, session ) if matched_reservation and is_attributable: @@ -1431,7 +1430,7 @@ class ConversionService: was_previously_matched = conversion.customer_id is not None conversion.reservation_id = matched_reservation.id - conversion.customer_id = matched_hashed_customer.customer_id + conversion.customer_id = matched_hashed_customer.id conversion.hashed_customer_id = matched_hashed_customer.id conversion.directly_attributable = True conversion.guest_matched = True @@ -1447,7 +1446,7 @@ class ConversionService: ) elif matched_hashed_customer and conversion.customer_id is None: # Only count new customer matches (conversions that didn't have a customer before) - conversion.customer_id = matched_hashed_customer.customer_id + conversion.customer_id = matched_hashed_customer.id conversion.hashed_customer_id = matched_hashed_customer.id conversion.directly_attributable = False conversion.guest_matched = True @@ -1458,7 +1457,7 @@ class ConversionService: # Look at ALL conversions from this guest to see if there are pre-dated payments if conversions and conversions[0].guest: await self._check_if_guest_is_regular( - guest_id, matched_hashed_customer.customer_id, session + guest_id, matched_hashed_customer.id, session ) async def _check_regularity_for_all_matched_guests( @@ -1503,15 +1502,15 @@ class ConversionService: # Get the customer ID from the hashed_customer hashed_customer_result = await session.execute( - select(HashedCustomer).where( - HashedCustomer.id == conversion_guest.hashed_customer_id + select(Customer).where( + Customer.id == conversion_guest.hashed_customer_id ) ) hashed_customer = hashed_customer_result.scalar_one_or_none() - if hashed_customer and hashed_customer.customer_id: + if hashed_customer and hashed_customer.id: await self._check_if_guest_is_regular( - conversion_guest.guest_id, hashed_customer.customer_id, session + conversion_guest.guest_id, hashed_customer.id, session ) async def _match_conversions_from_db_sequential( diff --git a/src/alpine_bits_python/customer_service.py b/src/alpine_bits_python/customer_service.py index ff73a83..80c4dc0 100644 --- a/src/alpine_bits_python/customer_service.py +++ b/src/alpine_bits_python/customer_service.py @@ -179,18 +179,18 @@ class CustomerService: # Create new customer (either no contact_id or customer doesn't exist) return await self.create_customer(customer_data, auto_commit=auto_commit) - async def get_hashed_customer(self, customer_id: int) -> HashedCustomer | None: + async def get_customer(self, customer_id: int) -> Customer | None: """Get the hashed version of a customer. Args: customer_id: The customer ID Returns: - HashedCustomer instance if found, None otherwise + Customer instance if found, None otherwise """ result = await self.session.execute( - select(HashedCustomer).where(HashedCustomer.customer_id == customer_id) + select(Customer).where(Customer.id == customer_id) ) return result.scalar_one_or_none() diff --git a/test_capi.py b/test_capi.py index 6eed501..1615991 100644 --- a/test_capi.py +++ b/test_capi.py @@ -59,7 +59,7 @@ async def load_test_data_from_db(): result = [] for reservation, customer in reservations_with_customers: # Get hashed customer data - hashed_customer = await customer_service.get_hashed_customer(customer.id) + hashed_customer = await customer_service.get_customer(customer.id) result.append( { diff --git a/tests/test_conversion_service.py b/tests/test_conversion_service.py index 8df6f34..c180198 100644 --- a/tests/test_conversion_service.py +++ b/tests/test_conversion_service.py @@ -740,9 +740,8 @@ class TestHashedMatchingLogic: test_db_session.add(customer) await test_db_session.flush() - hashed_customer = customer.create_hashed_customer() - test_db_session.add(hashed_customer) - await test_db_session.flush() + customer.update_hashed_fields() + reservation = Reservation( customer_id=customer.id, diff --git a/tests/test_customer_service.py b/tests/test_customer_service.py index 05376cf..5578e97 100644 --- a/tests/test_customer_service.py +++ b/tests/test_customer_service.py @@ -42,9 +42,9 @@ async def test_create_customer_creates_hashed_version(async_session: AsyncSessio assert customer.given_name == "John" # Check that hashed version was created - hashed = await service.get_hashed_customer(customer.id) + hashed = await service.get_customer(customer.id) assert hashed is not None - assert hashed.customer_id == customer.id + assert hashed.id == customer.id assert hashed.hashed_email is not None assert hashed.hashed_phone is not None assert hashed.hashed_given_name is not None @@ -66,7 +66,7 @@ async def test_update_customer_updates_hashed_version(async_session: AsyncSessio customer = await service.create_customer(customer_data) # Get initial hashed email - hashed = await service.get_hashed_customer(customer.id) + hashed = await service.get_customer(customer.id) original_hashed_email = hashed.hashed_email # Update customer email @@ -74,7 +74,7 @@ async def test_update_customer_updates_hashed_version(async_session: AsyncSessio updated_customer = await service.update_customer(customer, update_data) # Check that hashed version was updated - updated_hashed = await service.get_hashed_customer(updated_customer.id) + updated_hashed = await service.get_customer(updated_customer.id) assert updated_hashed.hashed_email != original_hashed_email @@ -95,7 +95,7 @@ async def test_get_or_create_customer_creates_new(async_session: AsyncSession): assert customer.contact_id == "new123" # Verify hashed version exists - hashed = await service.get_hashed_customer(customer.id) + hashed = await service.get_customer(customer.id) assert hashed is not None @@ -145,10 +145,13 @@ async def test_hash_existing_customers_backfills(async_session: AsyncSession): # Verify no hashed version exists result = await async_session.execute( - select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) + select(Customer).where(Customer.id == customer.id) ) hashed = result.scalar_one_or_none() - assert hashed is None + assert hashed, "Customer should exist." + + assert hashed.hashed_given_name is None, "Hashed given name should be None." + assert hashed.hashed_email is None, "Hashed email should be None." # Run backfill service = CustomerService(async_session) @@ -158,11 +161,12 @@ async def test_hash_existing_customers_backfills(async_session: AsyncSession): # Verify hashed version now exists result = await async_session.execute( - select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) + select(Customer).where(Customer.id == customer.id) ) hashed = result.scalar_one_or_none() - assert hashed is not None - assert hashed.hashed_email is not None + assert hashed is not None, "Customer should still exist after backfill." + assert hashed.hashed_email is not None, "Hashed email should be populated." + assert hashed.hashed_given_name is not None, "Hashed given name should be populated." @pytest.mark.asyncio @@ -201,7 +205,7 @@ async def test_hashing_normalization(async_session: AsyncSession): } customer = await service.create_customer(customer_data) - hashed = await service.get_hashed_customer(customer.id) + hashed = await service.get_customer(customer.id) # Verify hashes exist (normalization should have occurred) assert hashed.hashed_email is not None @@ -244,13 +248,17 @@ async def test_hash_existing_customers_normalizes_country_code( # Verify no hashed version exists yet result = await async_session.execute( - select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) + select(Customer).where(Customer.id == customer.id) ) hashed = result.scalar_one_or_none() - assert hashed is None + assert hashed is not None, "Customer should exist." + + assert hashed.hashed_given_name is None, "Hashed given name should be None." + assert hashed.hashed_email is None, "Hashed email should be None." + assert hashed.hashed_country_code is None, "Hashed country code should be None." # Verify the customer has the invalid country code stored in the DB - assert customer.country_code == "Italy" + assert hashed.country_code == "Italy" # Run hash_existing_customers - this should normalize "Italy" to "IT" # during validation and successfully create a hashed customer @@ -263,7 +271,7 @@ async def test_hash_existing_customers_normalizes_country_code( # Verify hashed version was created await async_session.refresh(customer) result = await async_session.execute( - select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) + select(Customer).where(Customer.id == customer.id) ) hashed = result.scalar_one_or_none() assert hashed is not None @@ -302,7 +310,7 @@ async def test_hash_existing_customers_normalizes_country_code( # Verify hashed version was created with correct hash result = await async_session.execute( - select(HashedCustomer).where(HashedCustomer.customer_id == customer2.id) + select(Customer).where(Customer.id == customer2.id) ) hashed = result.scalar_one_or_none() assert hashed is not None