Hashed conversion matching and more. #12
@@ -1246,90 +1246,6 @@ class ConversionService:
|
||||
all_reservations, guest_first_name, guest_last_name, guest_email
|
||||
)
|
||||
|
||||
def _match_reservations_by_guest_details(
|
||||
self,
|
||||
reservations: list[Reservation],
|
||||
guest_first_name: str | None,
|
||||
guest_last_name: str | None,
|
||||
guest_email: str | None,
|
||||
) -> Reservation | None:
|
||||
"""Match a reservation from a list by guest name and email (non-async).
|
||||
|
||||
Uses hashed data matching for privacy. The guest_first_name, guest_last_name,
|
||||
and guest_email parameters should be pre-hashed values from ConversionGuest.
|
||||
Compares hashed values directly against hashed_customer hash fields.
|
||||
|
||||
Args:
|
||||
reservations: List of reservations to search through
|
||||
guest_first_name: Pre-hashed guest first name (or None)
|
||||
guest_last_name: Pre-hashed guest last name (or None)
|
||||
guest_email: Pre-hashed guest email (or None)
|
||||
|
||||
Returns:
|
||||
Matched Reservation or None
|
||||
|
||||
"""
|
||||
# Filter by guest details using hashed data
|
||||
candidates = []
|
||||
|
||||
for reservation in reservations:
|
||||
customer = reservation.customer
|
||||
if not customer:
|
||||
continue
|
||||
|
||||
# Access hashed_version - handle both direct objects and relationships
|
||||
hashed_customer = customer.hashed_version
|
||||
if not hashed_customer:
|
||||
continue
|
||||
|
||||
# Defensive check: ensure hashed_customer is actually a HashedCustomer object
|
||||
# (not an InstrumentedList or other SQLAlchemy construct)
|
||||
if not hasattr(hashed_customer, 'hashed_email'):
|
||||
_LOGGER.debug(
|
||||
"Customer %s has invalid hashed_version type: %s",
|
||||
customer.id,
|
||||
type(hashed_customer),
|
||||
)
|
||||
continue
|
||||
|
||||
# Match by email (highest priority) using hashed comparison
|
||||
if guest_email and hashed_customer.hashed_email:
|
||||
if hashed_customer.hashed_email == guest_email:
|
||||
_LOGGER.info(
|
||||
"Found exact email match (reservation_id=%s, cust=%s, email=%s) via hash",
|
||||
reservation.id,
|
||||
customer.email_address,
|
||||
guest_email,
|
||||
)
|
||||
candidates.append((reservation, 3)) # Highest score
|
||||
continue
|
||||
|
||||
# Match by name (first + last) using hashed comparison
|
||||
if guest_first_name and guest_last_name:
|
||||
first_match = (
|
||||
hashed_customer.hashed_given_name
|
||||
and hashed_customer.hashed_given_name == guest_first_name
|
||||
)
|
||||
last_match = (
|
||||
hashed_customer.hashed_surname
|
||||
and hashed_customer.hashed_surname == guest_last_name
|
||||
)
|
||||
|
||||
if first_match and last_match:
|
||||
_LOGGER.info(
|
||||
"Found exact name match (reservation_id=%s) via hash",
|
||||
reservation.id,
|
||||
)
|
||||
candidates.append((reservation, 2)) # Medium-high score
|
||||
continue
|
||||
|
||||
# Return highest-scoring match
|
||||
if candidates:
|
||||
candidates.sort(key=lambda x: x[1], reverse=True)
|
||||
return candidates[0][0]
|
||||
|
||||
return None
|
||||
|
||||
def _filter_reservations_by_guest_details(
|
||||
self,
|
||||
reservations: list[Reservation],
|
||||
@@ -1569,10 +1485,6 @@ class ConversionService:
|
||||
|
||||
# Update the conversion with matched entities if found
|
||||
if matched_reservation or matched_customer or matched_hashed_customer:
|
||||
# Set attribution flags and matched entities based on match type
|
||||
if match_type == "id":
|
||||
# ID-based matches (fbclid/gclid/md5_unique_id) are always directly attributable
|
||||
# Link to both reservation and customer
|
||||
conversion.reservation_id = (
|
||||
matched_reservation.id if matched_reservation else None
|
||||
)
|
||||
@@ -1582,37 +1494,25 @@ class ConversionService:
|
||||
conversion.hashed_customer_id = (
|
||||
matched_hashed_customer.id if matched_hashed_customer else None
|
||||
)
|
||||
|
||||
# Set attribution flags based on match type and check for regular guest
|
||||
is_attributable = False
|
||||
if match_type == "id":
|
||||
# ID-based matches (fbclid/gclid/md5_unique_id) are always directly attributable
|
||||
conversion.directly_attributable = True
|
||||
conversion.guest_matched = False
|
||||
is_attributable = True
|
||||
|
||||
# Check if guest is regular for ID matches
|
||||
if matched_reservation:
|
||||
await self._check_if_regular(conversion, matched_reservation, session)
|
||||
|
||||
elif match_type == "guest_details":
|
||||
# Guest detail matches: link to customer/hashed_customer directly (NO reservation)
|
||||
# Only link to reservation if dates match
|
||||
conversion.customer_id = (
|
||||
matched_customer.id if matched_customer else None
|
||||
)
|
||||
conversion.hashed_customer_id = (
|
||||
matched_hashed_customer.id if matched_hashed_customer else None
|
||||
)
|
||||
conversion.guest_matched = True
|
||||
|
||||
# For guest-detail matches, we don't have a matched_reservation
|
||||
# Instead, check if dates align with any existing reservations for this customer
|
||||
is_attributable = await self._check_if_attributable_guest_match(
|
||||
matched_customer, conversion, session
|
||||
# Guest detail matches: check if guest is regular or dates match
|
||||
is_attributable = await self._check_if_attributable(
|
||||
matched_reservation, conversion, session
|
||||
)
|
||||
conversion.directly_attributable = is_attributable
|
||||
conversion.guest_matched = True
|
||||
|
||||
# Check if guest is regular (if we have a customer to reference)
|
||||
if matched_customer:
|
||||
await self._check_if_regular_by_customer(
|
||||
conversion, matched_customer, session
|
||||
)
|
||||
# Check if guest is regular for both ID and guest detail matches
|
||||
if matched_reservation:
|
||||
await self._check_if_regular(conversion, matched_reservation, session)
|
||||
|
||||
# Update conversion_guest with hashed_customer reference if matched
|
||||
if conversion_guest and matched_hashed_customer:
|
||||
@@ -1743,127 +1643,3 @@ class ConversionService:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def _check_if_attributable_guest_match(
|
||||
self,
|
||||
matched_customer: Customer | None,
|
||||
conversion: Conversion,
|
||||
session: AsyncSession,
|
||||
) -> bool:
|
||||
"""Check if a guest-detail matched conversion is attributable based on date alignment.
|
||||
|
||||
For guest-detail matches (without a specific reservation), check if the conversion's
|
||||
room dates align with ANY of the customer's reservations (date tolerance ±7 days).
|
||||
|
||||
Args:
|
||||
matched_customer: The matched Customer record
|
||||
conversion: The Conversion record being evaluated
|
||||
session: AsyncSession for database queries
|
||||
|
||||
Returns:
|
||||
True if conversion dates align with any reservation, False otherwise
|
||||
"""
|
||||
if not matched_customer or not conversion.conversion_rooms:
|
||||
return False
|
||||
|
||||
# Find all reservations for this customer
|
||||
reservations_result = await session.execute(
|
||||
select(Reservation).where(Reservation.customer_id == matched_customer.id)
|
||||
)
|
||||
reservations = reservations_result.scalars().all()
|
||||
|
||||
if not reservations:
|
||||
return False
|
||||
|
||||
# Check if any conversion_room dates match any reservation dates
|
||||
for room in conversion.conversion_rooms:
|
||||
if not room.arrival_date or not room.departure_date:
|
||||
continue
|
||||
|
||||
for reservation in reservations:
|
||||
if not reservation.start_date or not reservation.end_date:
|
||||
continue
|
||||
|
||||
# Check if dates match (within ±7 day tolerance)
|
||||
arrival_match = abs(
|
||||
(room.arrival_date - reservation.start_date).days
|
||||
) <= 7
|
||||
departure_match = abs(
|
||||
(room.departure_date - reservation.end_date).days
|
||||
) <= 7
|
||||
|
||||
if arrival_match and departure_match:
|
||||
_LOGGER.info(
|
||||
"Marking guest-detail match as attributable: room dates %s-%s match reservation dates %s-%s",
|
||||
room.arrival_date,
|
||||
room.departure_date,
|
||||
reservation.start_date,
|
||||
reservation.end_date,
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def _check_if_regular_by_customer(
|
||||
self,
|
||||
conversion: Conversion,
|
||||
matched_customer: Customer,
|
||||
session: AsyncSession,
|
||||
) -> None:
|
||||
"""Check if guest is regular based on customer without a specific reservation.
|
||||
|
||||
For guest-detail matches, determine if the guest is regular by checking if
|
||||
their earliest paying conversion predates their earliest reservation.
|
||||
|
||||
Args:
|
||||
conversion: The Conversion record being evaluated
|
||||
matched_customer: The matched Customer record
|
||||
session: AsyncSession for database queries
|
||||
"""
|
||||
if not conversion.guest or not matched_customer.id:
|
||||
return
|
||||
|
||||
# Find the earliest paying conversion for this guest
|
||||
# (booked reservations from hotel with actual revenue)
|
||||
earliest_paying_conversion_result = await session.execute(
|
||||
select(Conversion)
|
||||
.join(ConversionRoom, Conversion.id == ConversionRoom.conversion_id)
|
||||
.where(
|
||||
Conversion.hotel_id == conversion.hotel_id,
|
||||
Conversion.guest_id == conversion.guest_id,
|
||||
ConversionRoom.total_revenue.isnot(None),
|
||||
ConversionRoom.total_revenue > Decimal(0),
|
||||
)
|
||||
.order_by(Conversion.reservation_date.asc())
|
||||
.limit(1)
|
||||
)
|
||||
earliest_paying_conversion = earliest_paying_conversion_result.scalar_one_or_none()
|
||||
|
||||
if not earliest_paying_conversion:
|
||||
conversion.guest.is_regular = False
|
||||
return
|
||||
|
||||
# Find the earliest reservation (booking request we sent) for this customer
|
||||
earliest_reservation_result = await session.execute(
|
||||
select(Reservation)
|
||||
.where(Reservation.customer_id == matched_customer.id)
|
||||
.order_by(Reservation.start_date.asc())
|
||||
.limit(1)
|
||||
)
|
||||
earliest_reservation = earliest_reservation_result.scalar_one_or_none()
|
||||
|
||||
if not earliest_reservation:
|
||||
conversion.guest.is_regular = False
|
||||
return
|
||||
|
||||
# Guest is regular if their earliest paying conversion predates all their reservations
|
||||
# (meaning they were already a customer before we started tracking reservations)
|
||||
is_regular = earliest_paying_conversion.reservation_date < earliest_reservation.start_date
|
||||
conversion.guest.is_regular = is_regular
|
||||
|
||||
if is_regular:
|
||||
_LOGGER.info(
|
||||
"Marking guest as regular (via customer): earliest paying conversion date %s is before first reservation %s",
|
||||
earliest_paying_conversion.reservation_date,
|
||||
earliest_reservation.start_date,
|
||||
)
|
||||
|
||||
@@ -159,7 +159,9 @@ class TestConversionServiceWithImportedData:
|
||||
EXPECTED_TOTAL_ROOMS = 539
|
||||
# Note: Currently no matches by tracking ID because XML data uses different formats
|
||||
# This is expected with the test data. Real PMS data would have higher match rates.
|
||||
EXPECTED_MATCHED_TO_RESERVATION = 19
|
||||
EXPECTED_MATCHED_TO_RESERVATION = 0
|
||||
|
||||
EXPECTED_MATCHED_TO_CUSTOMER = 19
|
||||
|
||||
print(f"\nBaseline Match Counts:")
|
||||
print(f" Total reservations in XML: {EXPECTED_TOTAL_RESERVATIONS}")
|
||||
@@ -168,6 +170,8 @@ class TestConversionServiceWithImportedData:
|
||||
print(f" Matched to reservation: {EXPECTED_MATCHED_TO_RESERVATION}")
|
||||
match_rate = (EXPECTED_MATCHED_TO_RESERVATION / EXPECTED_TOTAL_RESERVATIONS * 100) if EXPECTED_TOTAL_RESERVATIONS > 0 else 0
|
||||
print(f" Match rate: {match_rate:.1f}%")
|
||||
print(f" Matched to customer: {EXPECTED_MATCHED_TO_CUSTOMER}")
|
||||
print(f" Match rate (to customer): {(EXPECTED_MATCHED_TO_CUSTOMER / EXPECTED_TOTAL_RESERVATIONS * 100) if EXPECTED_TOTAL_RESERVATIONS > 0 else 0:.1f}%")
|
||||
|
||||
# Verify baseline stability on subsequent runs
|
||||
assert (
|
||||
@@ -180,6 +184,10 @@ class TestConversionServiceWithImportedData:
|
||||
stats["matched_to_reservation"] == EXPECTED_MATCHED_TO_RESERVATION
|
||||
), f"Matched reservations should be {EXPECTED_MATCHED_TO_RESERVATION}, got {stats['matched_to_reservation']}"
|
||||
|
||||
assert (
|
||||
stats["matched_to_customer"] == EXPECTED_MATCHED_TO_CUSTOMER
|
||||
), f"Matched customers should be {EXPECTED_MATCHED_TO_CUSTOMER}, got {stats['matched_to_customer']}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversion_room_revenue_aggregation(
|
||||
self, test_db_session, test_config, test_data_dir
|
||||
@@ -334,53 +342,6 @@ class TestHashedMatchingLogic:
|
||||
"""Test the hashed matching logic used in ConversionService."""
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_match_without_hashed_customer(self, test_db_session):
|
||||
"""Test that matching fails gracefully when customer has no hashed version."""
|
||||
# Create a customer WITHOUT hashed data
|
||||
customer = Customer(
|
||||
given_name="Bob",
|
||||
surname="Jones",
|
||||
email_address="bob@example.com",
|
||||
contact_id="test_contact_3",
|
||||
)
|
||||
test_db_session.add(customer)
|
||||
await test_db_session.commit()
|
||||
|
||||
# Create a reservation
|
||||
reservation = Reservation(
|
||||
customer_id=customer.id,
|
||||
unique_id="res_3",
|
||||
hotel_code="test_hotel",
|
||||
)
|
||||
test_db_session.add(reservation)
|
||||
await test_db_session.commit()
|
||||
|
||||
# Test the matching logic
|
||||
service = ConversionService(test_db_session)
|
||||
|
||||
# Eagerly load reservations
|
||||
from sqlalchemy.orm import selectinload
|
||||
result = await test_db_session.execute(
|
||||
select(Reservation)
|
||||
.where(Reservation.id == reservation.id)
|
||||
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
|
||||
)
|
||||
reservations = result.scalars().all()
|
||||
|
||||
hashed_email = hashlib.sha256(
|
||||
"bob@example.com".lower().strip().encode("utf-8")
|
||||
).hexdigest()
|
||||
|
||||
matched = service._match_reservations_by_guest_details(
|
||||
reservations,
|
||||
guest_first_name=None,
|
||||
guest_last_name=None,
|
||||
guest_email=hashed_email,
|
||||
)
|
||||
|
||||
# Should not match because customer has no hashed version
|
||||
assert matched is None, "Should not match without hashed customer"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -505,66 +466,7 @@ class TestHashedMatchingLogic:
|
||||
assert stats["total_reservations"] == 1
|
||||
assert stats["total_daily_sales"] == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_hashed_customer_missing_fields_handled_gracefully(
|
||||
self, test_db_session
|
||||
):
|
||||
"""Test that matching handles customers with missing hashed fields gracefully."""
|
||||
# Create a customer
|
||||
customer = Customer(
|
||||
given_name="Eve",
|
||||
surname="Taylor",
|
||||
email_address="eve@example.com",
|
||||
contact_id="test_contact_7",
|
||||
)
|
||||
test_db_session.add(customer)
|
||||
await test_db_session.flush()
|
||||
|
||||
# Create hashed customer but simulate missing fields by manually setting to None
|
||||
hashed_customer = HashedCustomer(
|
||||
customer_id=customer.id,
|
||||
contact_id="test_contact_7_hashed",
|
||||
hashed_email=None, # Simulate missing hashed email
|
||||
hashed_given_name=None, # Simulate missing hashed name
|
||||
hashed_surname=None,
|
||||
)
|
||||
test_db_session.add(hashed_customer)
|
||||
await test_db_session.flush()
|
||||
|
||||
# Create reservation
|
||||
reservation = Reservation(
|
||||
customer_id=customer.id,
|
||||
unique_id="res_7",
|
||||
hotel_code="test_hotel",
|
||||
)
|
||||
test_db_session.add(reservation)
|
||||
await test_db_session.commit()
|
||||
|
||||
# Test matching - should not crash even with missing hashed fields
|
||||
service = ConversionService(test_db_session)
|
||||
|
||||
# Eagerly load reservations
|
||||
from sqlalchemy.orm import selectinload
|
||||
result = await test_db_session.execute(
|
||||
select(Reservation)
|
||||
.where(Reservation.id == reservation.id)
|
||||
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
|
||||
)
|
||||
reservations = result.scalars().all()
|
||||
|
||||
hashed_email = hashlib.sha256(
|
||||
"eve@example.com".lower().strip().encode("utf-8")
|
||||
).hexdigest()
|
||||
|
||||
matched = service._match_reservations_by_guest_details(
|
||||
reservations,
|
||||
guest_first_name=None,
|
||||
guest_last_name=None,
|
||||
guest_email=hashed_email,
|
||||
)
|
||||
|
||||
# Should not match because hashed customer fields are None
|
||||
assert matched is None, "Should not match with missing hashed fields"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversion_guest_composite_key_prevents_duplicates(
|
||||
|
||||
Reference in New Issue
Block a user