HashedCustomer is now no longer necessary

This commit is contained in:
Jonas Linter
2025-12-03 11:10:27 +01:00
parent 34cb2131c4
commit df7d3c6543
5 changed files with 56 additions and 50 deletions

View File

@@ -17,7 +17,6 @@ from .db import (
ConversionGuest, ConversionGuest,
ConversionRoom, ConversionRoom,
Customer, Customer,
HashedCustomer,
Reservation, Reservation,
SessionMaker, SessionMaker,
) )
@@ -95,11 +94,11 @@ class ConversionService:
self.hotel_id = hotel_id self.hotel_id = hotel_id
# Cache for reservation and customer data within a single XML processing run # Cache for reservation and customer data within a single XML processing run
# Maps hotel_code -> list of (reservation, hashed_customer) tuples # Maps hotel_code -> list of (reservation, customer) tuples
# This significantly speeds up matching when processing large XML files # This significantly speeds up matching when processing large XML files
# Uses hashed data for matching to preserve privacy # Uses hashed data for matching to preserve privacy
self._reservation_cache: dict[ self._reservation_cache: dict[
str | None, list[tuple[Reservation, HashedCustomer | None]] str | None, list[tuple[Reservation, Customer | None]]
] = {} ] = {}
self._cache_initialized = False self._cache_initialized = False
@@ -1146,8 +1145,8 @@ class ConversionService:
guest_last_name: str | None, guest_last_name: str | None,
guest_email: str | None, guest_email: str | None,
session: AsyncSession | None = None, session: AsyncSession | None = None,
) -> HashedCustomer | None: ) -> Customer | None:
"""Match guest by name and email directly to HashedCustomer (no Reservation needed). """Match guest by name and email directly to Customer (no Reservation needed).
This method bypasses the Reservation table entirely and matches directly against This method bypasses the Reservation table entirely and matches directly against
hashed customer data. Used for guest-detail matching where we don't need to link hashed customer data. Used for guest-detail matching where we don't need to link
@@ -1160,23 +1159,23 @@ class ConversionService:
session: AsyncSession to use. If None, uses self.session. session: AsyncSession to use. If None, uses self.session.
Returns: Returns:
Matched HashedCustomer or None Matched Customer or None
""" """
if session is None: if session is None:
session = self.session session = self.session
# Query all hashed customers that match the guest details # Query all hashed customers that match the guest details
query = select(HashedCustomer).options(selectinload(HashedCustomer.customer)) query = select(Customer)
# Build filter conditions # Build filter conditions
conditions = [] conditions = []
if guest_email: if guest_email:
conditions.append(HashedCustomer.hashed_email == guest_email) conditions.append(Customer.hashed_email == guest_email)
if guest_first_name and guest_last_name: if guest_first_name and guest_last_name:
conditions.append( conditions.append(
(HashedCustomer.hashed_given_name == guest_first_name) (Customer.hashed_given_name == guest_first_name)
& (HashedCustomer.hashed_surname == guest_last_name) & (Customer.hashed_surname == guest_last_name)
) )
if not conditions: if not conditions:
@@ -1288,10 +1287,10 @@ class ConversionService:
async def _extract_unmatched_guests( async def _extract_unmatched_guests(
self, session: AsyncSession self, session: AsyncSession
) -> dict[str, HashedCustomer]: ) -> dict[str, Customer]:
"""Phase 3b: Extract unique guests from unmatched conversions and match them to customers. """Phase 3b: Extract unique guests from unmatched conversions and match them to customers.
Returns a mapping of guest_id -> HashedCustomer for all unique guests found in Returns a mapping of guest_id -> Customer for all unique guests found in
unmatched conversions. Only processes each guest once. unmatched conversions. Only processes each guest once.
This includes: This includes:
@@ -1303,7 +1302,7 @@ class ConversionService:
session: AsyncSession for database queries session: AsyncSession for database queries
Returns: Returns:
Dictionary mapping guest_id to matched HashedCustomer (or None if no match) Dictionary mapping guest_id to matched Customer (or None if no match)
""" """
# Find all conversions that either: # Find all conversions that either:
@@ -1336,7 +1335,7 @@ class ConversionService:
) )
# Match each unique guest to a hashed customer # Match each unique guest to a hashed customer
guest_to_hashed_customer: dict[str, HashedCustomer] = {} guest_to_hashed_customer: dict[str, Customer] = {}
for guest_id, conversion in unique_guests.items(): for guest_id, conversion in unique_guests.items():
conversion_guest = conversion.guest conversion_guest = conversion.guest
if not conversion_guest: if not conversion_guest:
@@ -1365,7 +1364,7 @@ class ConversionService:
async def _link_matched_guests_to_reservations( async def _link_matched_guests_to_reservations(
self, self,
guest_to_hashed_customer: dict[str, HashedCustomer], guest_to_customer_dict: dict[str, Customer],
session: AsyncSession, session: AsyncSession,
stats: dict[str, int], stats: dict[str, int],
) -> None: ) -> None:
@@ -1383,13 +1382,13 @@ class ConversionService:
by looking at whether they have paying conversions that predate any reservations. by looking at whether they have paying conversions that predate any reservations.
Args: Args:
guest_to_hashed_customer: Mapping from guest_id to matched HashedCustomer guest_to_customer: Mapping from guest_id to matched Customer
session: AsyncSession for database queries session: AsyncSession for database queries
stats: Shared stats dictionary to update stats: Shared stats dictionary to update
""" """
for guest_id, matched_hashed_customer in guest_to_hashed_customer.items(): for guest_id, matched_hashed_customer in guest_to_customer_dict.items():
if not matched_hashed_customer or not matched_hashed_customer.customer_id: if not matched_hashed_customer or not matched_hashed_customer.id:
continue continue
# Find all conversions from this guest that don't have a reservation # Find all conversions from this guest that don't have a reservation
@@ -1414,7 +1413,7 @@ class ConversionService:
"Phase 3c: Processing %d conversions for guest %s (customer_id=%d)", "Phase 3c: Processing %d conversions for guest %s (customer_id=%d)",
len(conversions), len(conversions),
guest_id, guest_id,
matched_hashed_customer.customer_id, matched_hashed_customer.id,
) )
# Try to link each conversion to a reservation for this customer # Try to link each conversion to a reservation for this customer
@@ -1423,7 +1422,7 @@ class ConversionService:
matched_reservation, matched_reservation,
is_attributable, is_attributable,
) = await self._check_if_attributable( ) = await self._check_if_attributable(
matched_hashed_customer.customer_id, conversion, session matched_hashed_customer.id, conversion, session
) )
if matched_reservation and is_attributable: if matched_reservation and is_attributable:
@@ -1431,7 +1430,7 @@ class ConversionService:
was_previously_matched = conversion.customer_id is not None was_previously_matched = conversion.customer_id is not None
conversion.reservation_id = matched_reservation.id conversion.reservation_id = matched_reservation.id
conversion.customer_id = matched_hashed_customer.customer_id conversion.customer_id = matched_hashed_customer.id
conversion.hashed_customer_id = matched_hashed_customer.id conversion.hashed_customer_id = matched_hashed_customer.id
conversion.directly_attributable = True conversion.directly_attributable = True
conversion.guest_matched = True conversion.guest_matched = True
@@ -1447,7 +1446,7 @@ class ConversionService:
) )
elif matched_hashed_customer and conversion.customer_id is None: elif matched_hashed_customer and conversion.customer_id is None:
# Only count new customer matches (conversions that didn't have a customer before) # Only count new customer matches (conversions that didn't have a customer before)
conversion.customer_id = matched_hashed_customer.customer_id conversion.customer_id = matched_hashed_customer.id
conversion.hashed_customer_id = matched_hashed_customer.id conversion.hashed_customer_id = matched_hashed_customer.id
conversion.directly_attributable = False conversion.directly_attributable = False
conversion.guest_matched = True conversion.guest_matched = True
@@ -1458,7 +1457,7 @@ class ConversionService:
# Look at ALL conversions from this guest to see if there are pre-dated payments # Look at ALL conversions from this guest to see if there are pre-dated payments
if conversions and conversions[0].guest: if conversions and conversions[0].guest:
await self._check_if_guest_is_regular( await self._check_if_guest_is_regular(
guest_id, matched_hashed_customer.customer_id, session guest_id, matched_hashed_customer.id, session
) )
async def _check_regularity_for_all_matched_guests( async def _check_regularity_for_all_matched_guests(
@@ -1503,15 +1502,15 @@ class ConversionService:
# Get the customer ID from the hashed_customer # Get the customer ID from the hashed_customer
hashed_customer_result = await session.execute( hashed_customer_result = await session.execute(
select(HashedCustomer).where( select(Customer).where(
HashedCustomer.id == conversion_guest.hashed_customer_id Customer.id == conversion_guest.hashed_customer_id
) )
) )
hashed_customer = hashed_customer_result.scalar_one_or_none() hashed_customer = hashed_customer_result.scalar_one_or_none()
if hashed_customer and hashed_customer.customer_id: if hashed_customer and hashed_customer.id:
await self._check_if_guest_is_regular( await self._check_if_guest_is_regular(
conversion_guest.guest_id, hashed_customer.customer_id, session conversion_guest.guest_id, hashed_customer.id, session
) )
async def _match_conversions_from_db_sequential( async def _match_conversions_from_db_sequential(

View File

@@ -179,18 +179,18 @@ class CustomerService:
# Create new customer (either no contact_id or customer doesn't exist) # Create new customer (either no contact_id or customer doesn't exist)
return await self.create_customer(customer_data, auto_commit=auto_commit) return await self.create_customer(customer_data, auto_commit=auto_commit)
async def get_hashed_customer(self, customer_id: int) -> HashedCustomer | None: async def get_customer(self, customer_id: int) -> Customer | None:
"""Get the hashed version of a customer. """Get the hashed version of a customer.
Args: Args:
customer_id: The customer ID customer_id: The customer ID
Returns: Returns:
HashedCustomer instance if found, None otherwise Customer instance if found, None otherwise
""" """
result = await self.session.execute( result = await self.session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer_id) select(Customer).where(Customer.id == customer_id)
) )
return result.scalar_one_or_none() return result.scalar_one_or_none()

View File

@@ -59,7 +59,7 @@ async def load_test_data_from_db():
result = [] result = []
for reservation, customer in reservations_with_customers: for reservation, customer in reservations_with_customers:
# Get hashed customer data # Get hashed customer data
hashed_customer = await customer_service.get_hashed_customer(customer.id) hashed_customer = await customer_service.get_customer(customer.id)
result.append( result.append(
{ {

View File

@@ -740,9 +740,8 @@ class TestHashedMatchingLogic:
test_db_session.add(customer) test_db_session.add(customer)
await test_db_session.flush() await test_db_session.flush()
hashed_customer = customer.create_hashed_customer() customer.update_hashed_fields()
test_db_session.add(hashed_customer)
await test_db_session.flush()
reservation = Reservation( reservation = Reservation(
customer_id=customer.id, customer_id=customer.id,

View File

@@ -42,9 +42,9 @@ async def test_create_customer_creates_hashed_version(async_session: AsyncSessio
assert customer.given_name == "John" assert customer.given_name == "John"
# Check that hashed version was created # Check that hashed version was created
hashed = await service.get_hashed_customer(customer.id) hashed = await service.get_customer(customer.id)
assert hashed is not None assert hashed is not None
assert hashed.customer_id == customer.id assert hashed.id == customer.id
assert hashed.hashed_email is not None assert hashed.hashed_email is not None
assert hashed.hashed_phone is not None assert hashed.hashed_phone is not None
assert hashed.hashed_given_name is not None assert hashed.hashed_given_name is not None
@@ -66,7 +66,7 @@ async def test_update_customer_updates_hashed_version(async_session: AsyncSessio
customer = await service.create_customer(customer_data) customer = await service.create_customer(customer_data)
# Get initial hashed email # Get initial hashed email
hashed = await service.get_hashed_customer(customer.id) hashed = await service.get_customer(customer.id)
original_hashed_email = hashed.hashed_email original_hashed_email = hashed.hashed_email
# Update customer email # Update customer email
@@ -74,7 +74,7 @@ async def test_update_customer_updates_hashed_version(async_session: AsyncSessio
updated_customer = await service.update_customer(customer, update_data) updated_customer = await service.update_customer(customer, update_data)
# Check that hashed version was updated # Check that hashed version was updated
updated_hashed = await service.get_hashed_customer(updated_customer.id) updated_hashed = await service.get_customer(updated_customer.id)
assert updated_hashed.hashed_email != original_hashed_email assert updated_hashed.hashed_email != original_hashed_email
@@ -95,7 +95,7 @@ async def test_get_or_create_customer_creates_new(async_session: AsyncSession):
assert customer.contact_id == "new123" assert customer.contact_id == "new123"
# Verify hashed version exists # Verify hashed version exists
hashed = await service.get_hashed_customer(customer.id) hashed = await service.get_customer(customer.id)
assert hashed is not None assert hashed is not None
@@ -145,10 +145,13 @@ async def test_hash_existing_customers_backfills(async_session: AsyncSession):
# Verify no hashed version exists # Verify no hashed version exists
result = await async_session.execute( result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) select(Customer).where(Customer.id == customer.id)
) )
hashed = result.scalar_one_or_none() hashed = result.scalar_one_or_none()
assert hashed is None assert hashed, "Customer should exist."
assert hashed.hashed_given_name is None, "Hashed given name should be None."
assert hashed.hashed_email is None, "Hashed email should be None."
# Run backfill # Run backfill
service = CustomerService(async_session) service = CustomerService(async_session)
@@ -158,11 +161,12 @@ async def test_hash_existing_customers_backfills(async_session: AsyncSession):
# Verify hashed version now exists # Verify hashed version now exists
result = await async_session.execute( result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) select(Customer).where(Customer.id == customer.id)
) )
hashed = result.scalar_one_or_none() hashed = result.scalar_one_or_none()
assert hashed is not None assert hashed is not None, "Customer should still exist after backfill."
assert hashed.hashed_email is not None assert hashed.hashed_email is not None, "Hashed email should be populated."
assert hashed.hashed_given_name is not None, "Hashed given name should be populated."
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -201,7 +205,7 @@ async def test_hashing_normalization(async_session: AsyncSession):
} }
customer = await service.create_customer(customer_data) customer = await service.create_customer(customer_data)
hashed = await service.get_hashed_customer(customer.id) hashed = await service.get_customer(customer.id)
# Verify hashes exist (normalization should have occurred) # Verify hashes exist (normalization should have occurred)
assert hashed.hashed_email is not None assert hashed.hashed_email is not None
@@ -244,13 +248,17 @@ async def test_hash_existing_customers_normalizes_country_code(
# Verify no hashed version exists yet # Verify no hashed version exists yet
result = await async_session.execute( result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) select(Customer).where(Customer.id == customer.id)
) )
hashed = result.scalar_one_or_none() hashed = result.scalar_one_or_none()
assert hashed is None assert hashed is not None, "Customer should exist."
assert hashed.hashed_given_name is None, "Hashed given name should be None."
assert hashed.hashed_email is None, "Hashed email should be None."
assert hashed.hashed_country_code is None, "Hashed country code should be None."
# Verify the customer has the invalid country code stored in the DB # Verify the customer has the invalid country code stored in the DB
assert customer.country_code == "Italy" assert hashed.country_code == "Italy"
# Run hash_existing_customers - this should normalize "Italy" to "IT" # Run hash_existing_customers - this should normalize "Italy" to "IT"
# during validation and successfully create a hashed customer # during validation and successfully create a hashed customer
@@ -263,7 +271,7 @@ async def test_hash_existing_customers_normalizes_country_code(
# Verify hashed version was created # Verify hashed version was created
await async_session.refresh(customer) await async_session.refresh(customer)
result = await async_session.execute( result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) select(Customer).where(Customer.id == customer.id)
) )
hashed = result.scalar_one_or_none() hashed = result.scalar_one_or_none()
assert hashed is not None assert hashed is not None
@@ -302,7 +310,7 @@ async def test_hash_existing_customers_normalizes_country_code(
# Verify hashed version was created with correct hash # Verify hashed version was created with correct hash
result = await async_session.execute( result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer2.id) select(Customer).where(Customer.id == customer2.id)
) )
hashed = result.scalar_one_or_none() hashed = result.scalar_one_or_none()
assert hashed is not None assert hashed is not None