merge_db_fixes_to_main #16

Merged
jonas merged 40 commits from merge_db_fixes_to_main into main 2025-12-09 11:37:21 +00:00
5 changed files with 56 additions and 50 deletions
Showing only changes of commit 1bdef3ee11 - Show all commits

View File

@@ -17,7 +17,6 @@ from .db import (
ConversionGuest,
ConversionRoom,
Customer,
HashedCustomer,
Reservation,
SessionMaker,
)
@@ -95,11 +94,11 @@ class ConversionService:
self.hotel_id = hotel_id
# Cache for reservation and customer data within a single XML processing run
# Maps hotel_code -> list of (reservation, hashed_customer) tuples
# Maps hotel_code -> list of (reservation, customer) tuples
# This significantly speeds up matching when processing large XML files
# Uses hashed data for matching to preserve privacy
self._reservation_cache: dict[
str | None, list[tuple[Reservation, HashedCustomer | None]]
str | None, list[tuple[Reservation, Customer | None]]
] = {}
self._cache_initialized = False
@@ -1146,8 +1145,8 @@ class ConversionService:
guest_last_name: str | None,
guest_email: str | None,
session: AsyncSession | None = None,
) -> HashedCustomer | None:
"""Match guest by name and email directly to HashedCustomer (no Reservation needed).
) -> Customer | None:
"""Match guest by name and email directly to Customer (no Reservation needed).
This method bypasses the Reservation table entirely and matches directly against
hashed customer data. Used for guest-detail matching where we don't need to link
@@ -1160,23 +1159,23 @@ class ConversionService:
session: AsyncSession to use. If None, uses self.session.
Returns:
Matched HashedCustomer or None
Matched Customer or None
"""
if session is None:
session = self.session
# Query all hashed customers that match the guest details
query = select(HashedCustomer).options(selectinload(HashedCustomer.customer))
query = select(Customer)
# Build filter conditions
conditions = []
if guest_email:
conditions.append(HashedCustomer.hashed_email == guest_email)
conditions.append(Customer.hashed_email == guest_email)
if guest_first_name and guest_last_name:
conditions.append(
(HashedCustomer.hashed_given_name == guest_first_name)
& (HashedCustomer.hashed_surname == guest_last_name)
(Customer.hashed_given_name == guest_first_name)
& (Customer.hashed_surname == guest_last_name)
)
if not conditions:
@@ -1288,10 +1287,10 @@ class ConversionService:
async def _extract_unmatched_guests(
self, session: AsyncSession
) -> dict[str, HashedCustomer]:
) -> dict[str, Customer]:
"""Phase 3b: Extract unique guests from unmatched conversions and match them to customers.
Returns a mapping of guest_id -> HashedCustomer for all unique guests found in
Returns a mapping of guest_id -> Customer for all unique guests found in
unmatched conversions. Only processes each guest once.
This includes:
@@ -1303,7 +1302,7 @@ class ConversionService:
session: AsyncSession for database queries
Returns:
Dictionary mapping guest_id to matched HashedCustomer (or None if no match)
Dictionary mapping guest_id to matched Customer (or None if no match)
"""
# Find all conversions that either:
@@ -1336,7 +1335,7 @@ class ConversionService:
)
# Match each unique guest to a hashed customer
guest_to_hashed_customer: dict[str, HashedCustomer] = {}
guest_to_hashed_customer: dict[str, Customer] = {}
for guest_id, conversion in unique_guests.items():
conversion_guest = conversion.guest
if not conversion_guest:
@@ -1365,7 +1364,7 @@ class ConversionService:
async def _link_matched_guests_to_reservations(
self,
guest_to_hashed_customer: dict[str, HashedCustomer],
guest_to_customer_dict: dict[str, Customer],
session: AsyncSession,
stats: dict[str, int],
) -> None:
@@ -1383,13 +1382,13 @@ class ConversionService:
by looking at whether they have paying conversions that predate any reservations.
Args:
guest_to_hashed_customer: Mapping from guest_id to matched HashedCustomer
guest_to_customer: Mapping from guest_id to matched Customer
session: AsyncSession for database queries
stats: Shared stats dictionary to update
"""
for guest_id, matched_hashed_customer in guest_to_hashed_customer.items():
if not matched_hashed_customer or not matched_hashed_customer.customer_id:
for guest_id, matched_hashed_customer in guest_to_customer_dict.items():
if not matched_hashed_customer or not matched_hashed_customer.id:
continue
# Find all conversions from this guest that don't have a reservation
@@ -1414,7 +1413,7 @@ class ConversionService:
"Phase 3c: Processing %d conversions for guest %s (customer_id=%d)",
len(conversions),
guest_id,
matched_hashed_customer.customer_id,
matched_hashed_customer.id,
)
# Try to link each conversion to a reservation for this customer
@@ -1423,7 +1422,7 @@ class ConversionService:
matched_reservation,
is_attributable,
) = await self._check_if_attributable(
matched_hashed_customer.customer_id, conversion, session
matched_hashed_customer.id, conversion, session
)
if matched_reservation and is_attributable:
@@ -1431,7 +1430,7 @@ class ConversionService:
was_previously_matched = conversion.customer_id is not None
conversion.reservation_id = matched_reservation.id
conversion.customer_id = matched_hashed_customer.customer_id
conversion.customer_id = matched_hashed_customer.id
conversion.hashed_customer_id = matched_hashed_customer.id
conversion.directly_attributable = True
conversion.guest_matched = True
@@ -1447,7 +1446,7 @@ class ConversionService:
)
elif matched_hashed_customer and conversion.customer_id is None:
# Only count new customer matches (conversions that didn't have a customer before)
conversion.customer_id = matched_hashed_customer.customer_id
conversion.customer_id = matched_hashed_customer.id
conversion.hashed_customer_id = matched_hashed_customer.id
conversion.directly_attributable = False
conversion.guest_matched = True
@@ -1458,7 +1457,7 @@ class ConversionService:
# Look at ALL conversions from this guest to see if there are pre-dated payments
if conversions and conversions[0].guest:
await self._check_if_guest_is_regular(
guest_id, matched_hashed_customer.customer_id, session
guest_id, matched_hashed_customer.id, session
)
async def _check_regularity_for_all_matched_guests(
@@ -1503,15 +1502,15 @@ class ConversionService:
# Get the customer ID from the hashed_customer
hashed_customer_result = await session.execute(
select(HashedCustomer).where(
HashedCustomer.id == conversion_guest.hashed_customer_id
select(Customer).where(
Customer.id == conversion_guest.hashed_customer_id
)
)
hashed_customer = hashed_customer_result.scalar_one_or_none()
if hashed_customer and hashed_customer.customer_id:
if hashed_customer and hashed_customer.id:
await self._check_if_guest_is_regular(
conversion_guest.guest_id, hashed_customer.customer_id, session
conversion_guest.guest_id, hashed_customer.id, session
)
async def _match_conversions_from_db_sequential(

View File

@@ -179,18 +179,18 @@ class CustomerService:
# Create new customer (either no contact_id or customer doesn't exist)
return await self.create_customer(customer_data, auto_commit=auto_commit)
async def get_hashed_customer(self, customer_id: int) -> HashedCustomer | None:
async def get_customer(self, customer_id: int) -> Customer | None:
"""Get the hashed version of a customer.
Args:
customer_id: The customer ID
Returns:
HashedCustomer instance if found, None otherwise
Customer instance if found, None otherwise
"""
result = await self.session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer_id)
select(Customer).where(Customer.id == customer_id)
)
return result.scalar_one_or_none()

View File

@@ -59,7 +59,7 @@ async def load_test_data_from_db():
result = []
for reservation, customer in reservations_with_customers:
# Get hashed customer data
hashed_customer = await customer_service.get_hashed_customer(customer.id)
hashed_customer = await customer_service.get_customer(customer.id)
result.append(
{

View File

@@ -740,9 +740,8 @@ class TestHashedMatchingLogic:
test_db_session.add(customer)
await test_db_session.flush()
hashed_customer = customer.create_hashed_customer()
test_db_session.add(hashed_customer)
await test_db_session.flush()
customer.update_hashed_fields()
reservation = Reservation(
customer_id=customer.id,

View File

@@ -42,9 +42,9 @@ async def test_create_customer_creates_hashed_version(async_session: AsyncSessio
assert customer.given_name == "John"
# Check that hashed version was created
hashed = await service.get_hashed_customer(customer.id)
hashed = await service.get_customer(customer.id)
assert hashed is not None
assert hashed.customer_id == customer.id
assert hashed.id == customer.id
assert hashed.hashed_email is not None
assert hashed.hashed_phone is not None
assert hashed.hashed_given_name is not None
@@ -66,7 +66,7 @@ async def test_update_customer_updates_hashed_version(async_session: AsyncSessio
customer = await service.create_customer(customer_data)
# Get initial hashed email
hashed = await service.get_hashed_customer(customer.id)
hashed = await service.get_customer(customer.id)
original_hashed_email = hashed.hashed_email
# Update customer email
@@ -74,7 +74,7 @@ async def test_update_customer_updates_hashed_version(async_session: AsyncSessio
updated_customer = await service.update_customer(customer, update_data)
# Check that hashed version was updated
updated_hashed = await service.get_hashed_customer(updated_customer.id)
updated_hashed = await service.get_customer(updated_customer.id)
assert updated_hashed.hashed_email != original_hashed_email
@@ -95,7 +95,7 @@ async def test_get_or_create_customer_creates_new(async_session: AsyncSession):
assert customer.contact_id == "new123"
# Verify hashed version exists
hashed = await service.get_hashed_customer(customer.id)
hashed = await service.get_customer(customer.id)
assert hashed is not None
@@ -145,10 +145,13 @@ async def test_hash_existing_customers_backfills(async_session: AsyncSession):
# Verify no hashed version exists
result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id)
select(Customer).where(Customer.id == customer.id)
)
hashed = result.scalar_one_or_none()
assert hashed is None
assert hashed, "Customer should exist."
assert hashed.hashed_given_name is None, "Hashed given name should be None."
assert hashed.hashed_email is None, "Hashed email should be None."
# Run backfill
service = CustomerService(async_session)
@@ -158,11 +161,12 @@ async def test_hash_existing_customers_backfills(async_session: AsyncSession):
# Verify hashed version now exists
result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id)
select(Customer).where(Customer.id == customer.id)
)
hashed = result.scalar_one_or_none()
assert hashed is not None
assert hashed.hashed_email is not None
assert hashed is not None, "Customer should still exist after backfill."
assert hashed.hashed_email is not None, "Hashed email should be populated."
assert hashed.hashed_given_name is not None, "Hashed given name should be populated."
@pytest.mark.asyncio
@@ -201,7 +205,7 @@ async def test_hashing_normalization(async_session: AsyncSession):
}
customer = await service.create_customer(customer_data)
hashed = await service.get_hashed_customer(customer.id)
hashed = await service.get_customer(customer.id)
# Verify hashes exist (normalization should have occurred)
assert hashed.hashed_email is not None
@@ -244,13 +248,17 @@ async def test_hash_existing_customers_normalizes_country_code(
# Verify no hashed version exists yet
result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id)
select(Customer).where(Customer.id == customer.id)
)
hashed = result.scalar_one_or_none()
assert hashed is None
assert hashed is not None, "Customer should exist."
assert hashed.hashed_given_name is None, "Hashed given name should be None."
assert hashed.hashed_email is None, "Hashed email should be None."
assert hashed.hashed_country_code is None, "Hashed country code should be None."
# Verify the customer has the invalid country code stored in the DB
assert customer.country_code == "Italy"
assert hashed.country_code == "Italy"
# Run hash_existing_customers - this should normalize "Italy" to "IT"
# during validation and successfully create a hashed customer
@@ -263,7 +271,7 @@ async def test_hash_existing_customers_normalizes_country_code(
# Verify hashed version was created
await async_session.refresh(customer)
result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id)
select(Customer).where(Customer.id == customer.id)
)
hashed = result.scalar_one_or_none()
assert hashed is not None
@@ -302,7 +310,7 @@ async def test_hash_existing_customers_normalizes_country_code(
# Verify hashed version was created with correct hash
result = await async_session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer2.id)
select(Customer).where(Customer.id == customer2.id)
)
hashed = result.scalar_one_or_none()
assert hashed is not None