Hashed conversion matching and more. #12
@@ -1246,90 +1246,6 @@ class ConversionService:
|
|||||||
all_reservations, guest_first_name, guest_last_name, guest_email
|
all_reservations, guest_first_name, guest_last_name, guest_email
|
||||||
)
|
)
|
||||||
|
|
||||||
def _match_reservations_by_guest_details(
|
|
||||||
self,
|
|
||||||
reservations: list[Reservation],
|
|
||||||
guest_first_name: str | None,
|
|
||||||
guest_last_name: str | None,
|
|
||||||
guest_email: str | None,
|
|
||||||
) -> Reservation | None:
|
|
||||||
"""Match a reservation from a list by guest name and email (non-async).
|
|
||||||
|
|
||||||
Uses hashed data matching for privacy. The guest_first_name, guest_last_name,
|
|
||||||
and guest_email parameters should be pre-hashed values from ConversionGuest.
|
|
||||||
Compares hashed values directly against hashed_customer hash fields.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
reservations: List of reservations to search through
|
|
||||||
guest_first_name: Pre-hashed guest first name (or None)
|
|
||||||
guest_last_name: Pre-hashed guest last name (or None)
|
|
||||||
guest_email: Pre-hashed guest email (or None)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Matched Reservation or None
|
|
||||||
|
|
||||||
"""
|
|
||||||
# Filter by guest details using hashed data
|
|
||||||
candidates = []
|
|
||||||
|
|
||||||
for reservation in reservations:
|
|
||||||
customer = reservation.customer
|
|
||||||
if not customer:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Access hashed_version - handle both direct objects and relationships
|
|
||||||
hashed_customer = customer.hashed_version
|
|
||||||
if not hashed_customer:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Defensive check: ensure hashed_customer is actually a HashedCustomer object
|
|
||||||
# (not an InstrumentedList or other SQLAlchemy construct)
|
|
||||||
if not hasattr(hashed_customer, 'hashed_email'):
|
|
||||||
_LOGGER.debug(
|
|
||||||
"Customer %s has invalid hashed_version type: %s",
|
|
||||||
customer.id,
|
|
||||||
type(hashed_customer),
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Match by email (highest priority) using hashed comparison
|
|
||||||
if guest_email and hashed_customer.hashed_email:
|
|
||||||
if hashed_customer.hashed_email == guest_email:
|
|
||||||
_LOGGER.info(
|
|
||||||
"Found exact email match (reservation_id=%s, cust=%s, email=%s) via hash",
|
|
||||||
reservation.id,
|
|
||||||
customer.email_address,
|
|
||||||
guest_email,
|
|
||||||
)
|
|
||||||
candidates.append((reservation, 3)) # Highest score
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Match by name (first + last) using hashed comparison
|
|
||||||
if guest_first_name and guest_last_name:
|
|
||||||
first_match = (
|
|
||||||
hashed_customer.hashed_given_name
|
|
||||||
and hashed_customer.hashed_given_name == guest_first_name
|
|
||||||
)
|
|
||||||
last_match = (
|
|
||||||
hashed_customer.hashed_surname
|
|
||||||
and hashed_customer.hashed_surname == guest_last_name
|
|
||||||
)
|
|
||||||
|
|
||||||
if first_match and last_match:
|
|
||||||
_LOGGER.info(
|
|
||||||
"Found exact name match (reservation_id=%s) via hash",
|
|
||||||
reservation.id,
|
|
||||||
)
|
|
||||||
candidates.append((reservation, 2)) # Medium-high score
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Return highest-scoring match
|
|
||||||
if candidates:
|
|
||||||
candidates.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
return candidates[0][0]
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _filter_reservations_by_guest_details(
|
def _filter_reservations_by_guest_details(
|
||||||
self,
|
self,
|
||||||
reservations: list[Reservation],
|
reservations: list[Reservation],
|
||||||
@@ -1569,50 +1485,34 @@ class ConversionService:
|
|||||||
|
|
||||||
# Update the conversion with matched entities if found
|
# Update the conversion with matched entities if found
|
||||||
if matched_reservation or matched_customer or matched_hashed_customer:
|
if matched_reservation or matched_customer or matched_hashed_customer:
|
||||||
# Set attribution flags and matched entities based on match type
|
conversion.reservation_id = (
|
||||||
|
matched_reservation.id if matched_reservation else None
|
||||||
|
)
|
||||||
|
conversion.customer_id = (
|
||||||
|
matched_customer.id if matched_customer else None
|
||||||
|
)
|
||||||
|
conversion.hashed_customer_id = (
|
||||||
|
matched_hashed_customer.id if matched_hashed_customer else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set attribution flags based on match type and check for regular guest
|
||||||
|
is_attributable = False
|
||||||
if match_type == "id":
|
if match_type == "id":
|
||||||
# ID-based matches (fbclid/gclid/md5_unique_id) are always directly attributable
|
# ID-based matches (fbclid/gclid/md5_unique_id) are always directly attributable
|
||||||
# Link to both reservation and customer
|
|
||||||
conversion.reservation_id = (
|
|
||||||
matched_reservation.id if matched_reservation else None
|
|
||||||
)
|
|
||||||
conversion.customer_id = (
|
|
||||||
matched_customer.id if matched_customer else None
|
|
||||||
)
|
|
||||||
conversion.hashed_customer_id = (
|
|
||||||
matched_hashed_customer.id if matched_hashed_customer else None
|
|
||||||
)
|
|
||||||
conversion.directly_attributable = True
|
conversion.directly_attributable = True
|
||||||
conversion.guest_matched = False
|
conversion.guest_matched = False
|
||||||
is_attributable = True
|
is_attributable = True
|
||||||
|
|
||||||
# Check if guest is regular for ID matches
|
|
||||||
if matched_reservation:
|
|
||||||
await self._check_if_regular(conversion, matched_reservation, session)
|
|
||||||
|
|
||||||
elif match_type == "guest_details":
|
elif match_type == "guest_details":
|
||||||
# Guest detail matches: link to customer/hashed_customer directly (NO reservation)
|
# Guest detail matches: check if guest is regular or dates match
|
||||||
# Only link to reservation if dates match
|
is_attributable = await self._check_if_attributable(
|
||||||
conversion.customer_id = (
|
matched_reservation, conversion, session
|
||||||
matched_customer.id if matched_customer else None
|
|
||||||
)
|
|
||||||
conversion.hashed_customer_id = (
|
|
||||||
matched_hashed_customer.id if matched_hashed_customer else None
|
|
||||||
)
|
|
||||||
conversion.guest_matched = True
|
|
||||||
|
|
||||||
# For guest-detail matches, we don't have a matched_reservation
|
|
||||||
# Instead, check if dates align with any existing reservations for this customer
|
|
||||||
is_attributable = await self._check_if_attributable_guest_match(
|
|
||||||
matched_customer, conversion, session
|
|
||||||
)
|
)
|
||||||
conversion.directly_attributable = is_attributable
|
conversion.directly_attributable = is_attributable
|
||||||
|
conversion.guest_matched = True
|
||||||
|
|
||||||
# Check if guest is regular (if we have a customer to reference)
|
# Check if guest is regular for both ID and guest detail matches
|
||||||
if matched_customer:
|
if matched_reservation:
|
||||||
await self._check_if_regular_by_customer(
|
await self._check_if_regular(conversion, matched_reservation, session)
|
||||||
conversion, matched_customer, session
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update conversion_guest with hashed_customer reference if matched
|
# Update conversion_guest with hashed_customer reference if matched
|
||||||
if conversion_guest and matched_hashed_customer:
|
if conversion_guest and matched_hashed_customer:
|
||||||
@@ -1743,127 +1643,3 @@ class ConversionService:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def _check_if_attributable_guest_match(
|
|
||||||
self,
|
|
||||||
matched_customer: Customer | None,
|
|
||||||
conversion: Conversion,
|
|
||||||
session: AsyncSession,
|
|
||||||
) -> bool:
|
|
||||||
"""Check if a guest-detail matched conversion is attributable based on date alignment.
|
|
||||||
|
|
||||||
For guest-detail matches (without a specific reservation), check if the conversion's
|
|
||||||
room dates align with ANY of the customer's reservations (date tolerance ±7 days).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
matched_customer: The matched Customer record
|
|
||||||
conversion: The Conversion record being evaluated
|
|
||||||
session: AsyncSession for database queries
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if conversion dates align with any reservation, False otherwise
|
|
||||||
"""
|
|
||||||
if not matched_customer or not conversion.conversion_rooms:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Find all reservations for this customer
|
|
||||||
reservations_result = await session.execute(
|
|
||||||
select(Reservation).where(Reservation.customer_id == matched_customer.id)
|
|
||||||
)
|
|
||||||
reservations = reservations_result.scalars().all()
|
|
||||||
|
|
||||||
if not reservations:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check if any conversion_room dates match any reservation dates
|
|
||||||
for room in conversion.conversion_rooms:
|
|
||||||
if not room.arrival_date or not room.departure_date:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for reservation in reservations:
|
|
||||||
if not reservation.start_date or not reservation.end_date:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check if dates match (within ±7 day tolerance)
|
|
||||||
arrival_match = abs(
|
|
||||||
(room.arrival_date - reservation.start_date).days
|
|
||||||
) <= 7
|
|
||||||
departure_match = abs(
|
|
||||||
(room.departure_date - reservation.end_date).days
|
|
||||||
) <= 7
|
|
||||||
|
|
||||||
if arrival_match and departure_match:
|
|
||||||
_LOGGER.info(
|
|
||||||
"Marking guest-detail match as attributable: room dates %s-%s match reservation dates %s-%s",
|
|
||||||
room.arrival_date,
|
|
||||||
room.departure_date,
|
|
||||||
reservation.start_date,
|
|
||||||
reservation.end_date,
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _check_if_regular_by_customer(
|
|
||||||
self,
|
|
||||||
conversion: Conversion,
|
|
||||||
matched_customer: Customer,
|
|
||||||
session: AsyncSession,
|
|
||||||
) -> None:
|
|
||||||
"""Check if guest is regular based on customer without a specific reservation.
|
|
||||||
|
|
||||||
For guest-detail matches, determine if the guest is regular by checking if
|
|
||||||
their earliest paying conversion predates their earliest reservation.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
conversion: The Conversion record being evaluated
|
|
||||||
matched_customer: The matched Customer record
|
|
||||||
session: AsyncSession for database queries
|
|
||||||
"""
|
|
||||||
if not conversion.guest or not matched_customer.id:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Find the earliest paying conversion for this guest
|
|
||||||
# (booked reservations from hotel with actual revenue)
|
|
||||||
earliest_paying_conversion_result = await session.execute(
|
|
||||||
select(Conversion)
|
|
||||||
.join(ConversionRoom, Conversion.id == ConversionRoom.conversion_id)
|
|
||||||
.where(
|
|
||||||
Conversion.hotel_id == conversion.hotel_id,
|
|
||||||
Conversion.guest_id == conversion.guest_id,
|
|
||||||
ConversionRoom.total_revenue.isnot(None),
|
|
||||||
ConversionRoom.total_revenue > Decimal(0),
|
|
||||||
)
|
|
||||||
.order_by(Conversion.reservation_date.asc())
|
|
||||||
.limit(1)
|
|
||||||
)
|
|
||||||
earliest_paying_conversion = earliest_paying_conversion_result.scalar_one_or_none()
|
|
||||||
|
|
||||||
if not earliest_paying_conversion:
|
|
||||||
conversion.guest.is_regular = False
|
|
||||||
return
|
|
||||||
|
|
||||||
# Find the earliest reservation (booking request we sent) for this customer
|
|
||||||
earliest_reservation_result = await session.execute(
|
|
||||||
select(Reservation)
|
|
||||||
.where(Reservation.customer_id == matched_customer.id)
|
|
||||||
.order_by(Reservation.start_date.asc())
|
|
||||||
.limit(1)
|
|
||||||
)
|
|
||||||
earliest_reservation = earliest_reservation_result.scalar_one_or_none()
|
|
||||||
|
|
||||||
if not earliest_reservation:
|
|
||||||
conversion.guest.is_regular = False
|
|
||||||
return
|
|
||||||
|
|
||||||
# Guest is regular if their earliest paying conversion predates all their reservations
|
|
||||||
# (meaning they were already a customer before we started tracking reservations)
|
|
||||||
is_regular = earliest_paying_conversion.reservation_date < earliest_reservation.start_date
|
|
||||||
conversion.guest.is_regular = is_regular
|
|
||||||
|
|
||||||
if is_regular:
|
|
||||||
_LOGGER.info(
|
|
||||||
"Marking guest as regular (via customer): earliest paying conversion date %s is before first reservation %s",
|
|
||||||
earliest_paying_conversion.reservation_date,
|
|
||||||
earliest_reservation.start_date,
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -159,7 +159,9 @@ class TestConversionServiceWithImportedData:
|
|||||||
EXPECTED_TOTAL_ROOMS = 539
|
EXPECTED_TOTAL_ROOMS = 539
|
||||||
# Note: Currently no matches by tracking ID because XML data uses different formats
|
# Note: Currently no matches by tracking ID because XML data uses different formats
|
||||||
# This is expected with the test data. Real PMS data would have higher match rates.
|
# This is expected with the test data. Real PMS data would have higher match rates.
|
||||||
EXPECTED_MATCHED_TO_RESERVATION = 19
|
EXPECTED_MATCHED_TO_RESERVATION = 0
|
||||||
|
|
||||||
|
EXPECTED_MATCHED_TO_CUSTOMER = 19
|
||||||
|
|
||||||
print(f"\nBaseline Match Counts:")
|
print(f"\nBaseline Match Counts:")
|
||||||
print(f" Total reservations in XML: {EXPECTED_TOTAL_RESERVATIONS}")
|
print(f" Total reservations in XML: {EXPECTED_TOTAL_RESERVATIONS}")
|
||||||
@@ -168,6 +170,8 @@ class TestConversionServiceWithImportedData:
|
|||||||
print(f" Matched to reservation: {EXPECTED_MATCHED_TO_RESERVATION}")
|
print(f" Matched to reservation: {EXPECTED_MATCHED_TO_RESERVATION}")
|
||||||
match_rate = (EXPECTED_MATCHED_TO_RESERVATION / EXPECTED_TOTAL_RESERVATIONS * 100) if EXPECTED_TOTAL_RESERVATIONS > 0 else 0
|
match_rate = (EXPECTED_MATCHED_TO_RESERVATION / EXPECTED_TOTAL_RESERVATIONS * 100) if EXPECTED_TOTAL_RESERVATIONS > 0 else 0
|
||||||
print(f" Match rate: {match_rate:.1f}%")
|
print(f" Match rate: {match_rate:.1f}%")
|
||||||
|
print(f" Matched to customer: {EXPECTED_MATCHED_TO_CUSTOMER}")
|
||||||
|
print(f" Match rate (to customer): {(EXPECTED_MATCHED_TO_CUSTOMER / EXPECTED_TOTAL_RESERVATIONS * 100) if EXPECTED_TOTAL_RESERVATIONS > 0 else 0:.1f}%")
|
||||||
|
|
||||||
# Verify baseline stability on subsequent runs
|
# Verify baseline stability on subsequent runs
|
||||||
assert (
|
assert (
|
||||||
@@ -180,6 +184,10 @@ class TestConversionServiceWithImportedData:
|
|||||||
stats["matched_to_reservation"] == EXPECTED_MATCHED_TO_RESERVATION
|
stats["matched_to_reservation"] == EXPECTED_MATCHED_TO_RESERVATION
|
||||||
), f"Matched reservations should be {EXPECTED_MATCHED_TO_RESERVATION}, got {stats['matched_to_reservation']}"
|
), f"Matched reservations should be {EXPECTED_MATCHED_TO_RESERVATION}, got {stats['matched_to_reservation']}"
|
||||||
|
|
||||||
|
assert (
|
||||||
|
stats["matched_to_customer"] == EXPECTED_MATCHED_TO_CUSTOMER
|
||||||
|
), f"Matched customers should be {EXPECTED_MATCHED_TO_CUSTOMER}, got {stats['matched_to_customer']}"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_conversion_room_revenue_aggregation(
|
async def test_conversion_room_revenue_aggregation(
|
||||||
self, test_db_session, test_config, test_data_dir
|
self, test_db_session, test_config, test_data_dir
|
||||||
@@ -334,53 +342,6 @@ class TestHashedMatchingLogic:
|
|||||||
"""Test the hashed matching logic used in ConversionService."""
|
"""Test the hashed matching logic used in ConversionService."""
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_no_match_without_hashed_customer(self, test_db_session):
|
|
||||||
"""Test that matching fails gracefully when customer has no hashed version."""
|
|
||||||
# Create a customer WITHOUT hashed data
|
|
||||||
customer = Customer(
|
|
||||||
given_name="Bob",
|
|
||||||
surname="Jones",
|
|
||||||
email_address="bob@example.com",
|
|
||||||
contact_id="test_contact_3",
|
|
||||||
)
|
|
||||||
test_db_session.add(customer)
|
|
||||||
await test_db_session.commit()
|
|
||||||
|
|
||||||
# Create a reservation
|
|
||||||
reservation = Reservation(
|
|
||||||
customer_id=customer.id,
|
|
||||||
unique_id="res_3",
|
|
||||||
hotel_code="test_hotel",
|
|
||||||
)
|
|
||||||
test_db_session.add(reservation)
|
|
||||||
await test_db_session.commit()
|
|
||||||
|
|
||||||
# Test the matching logic
|
|
||||||
service = ConversionService(test_db_session)
|
|
||||||
|
|
||||||
# Eagerly load reservations
|
|
||||||
from sqlalchemy.orm import selectinload
|
|
||||||
result = await test_db_session.execute(
|
|
||||||
select(Reservation)
|
|
||||||
.where(Reservation.id == reservation.id)
|
|
||||||
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
|
|
||||||
)
|
|
||||||
reservations = result.scalars().all()
|
|
||||||
|
|
||||||
hashed_email = hashlib.sha256(
|
|
||||||
"bob@example.com".lower().strip().encode("utf-8")
|
|
||||||
).hexdigest()
|
|
||||||
|
|
||||||
matched = service._match_reservations_by_guest_details(
|
|
||||||
reservations,
|
|
||||||
guest_first_name=None,
|
|
||||||
guest_last_name=None,
|
|
||||||
guest_email=hashed_email,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should not match because customer has no hashed version
|
|
||||||
assert matched is None, "Should not match without hashed customer"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -505,66 +466,7 @@ class TestHashedMatchingLogic:
|
|||||||
assert stats["total_reservations"] == 1
|
assert stats["total_reservations"] == 1
|
||||||
assert stats["total_daily_sales"] == 1
|
assert stats["total_daily_sales"] == 1
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_hashed_customer_missing_fields_handled_gracefully(
|
|
||||||
self, test_db_session
|
|
||||||
):
|
|
||||||
"""Test that matching handles customers with missing hashed fields gracefully."""
|
|
||||||
# Create a customer
|
|
||||||
customer = Customer(
|
|
||||||
given_name="Eve",
|
|
||||||
surname="Taylor",
|
|
||||||
email_address="eve@example.com",
|
|
||||||
contact_id="test_contact_7",
|
|
||||||
)
|
|
||||||
test_db_session.add(customer)
|
|
||||||
await test_db_session.flush()
|
|
||||||
|
|
||||||
# Create hashed customer but simulate missing fields by manually setting to None
|
|
||||||
hashed_customer = HashedCustomer(
|
|
||||||
customer_id=customer.id,
|
|
||||||
contact_id="test_contact_7_hashed",
|
|
||||||
hashed_email=None, # Simulate missing hashed email
|
|
||||||
hashed_given_name=None, # Simulate missing hashed name
|
|
||||||
hashed_surname=None,
|
|
||||||
)
|
|
||||||
test_db_session.add(hashed_customer)
|
|
||||||
await test_db_session.flush()
|
|
||||||
|
|
||||||
# Create reservation
|
|
||||||
reservation = Reservation(
|
|
||||||
customer_id=customer.id,
|
|
||||||
unique_id="res_7",
|
|
||||||
hotel_code="test_hotel",
|
|
||||||
)
|
|
||||||
test_db_session.add(reservation)
|
|
||||||
await test_db_session.commit()
|
|
||||||
|
|
||||||
# Test matching - should not crash even with missing hashed fields
|
|
||||||
service = ConversionService(test_db_session)
|
|
||||||
|
|
||||||
# Eagerly load reservations
|
|
||||||
from sqlalchemy.orm import selectinload
|
|
||||||
result = await test_db_session.execute(
|
|
||||||
select(Reservation)
|
|
||||||
.where(Reservation.id == reservation.id)
|
|
||||||
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
|
|
||||||
)
|
|
||||||
reservations = result.scalars().all()
|
|
||||||
|
|
||||||
hashed_email = hashlib.sha256(
|
|
||||||
"eve@example.com".lower().strip().encode("utf-8")
|
|
||||||
).hexdigest()
|
|
||||||
|
|
||||||
matched = service._match_reservations_by_guest_details(
|
|
||||||
reservations,
|
|
||||||
guest_first_name=None,
|
|
||||||
guest_last_name=None,
|
|
||||||
guest_email=hashed_email,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should not match because hashed customer fields are None
|
|
||||||
assert matched is None, "Should not match with missing hashed fields"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_conversion_guest_composite_key_prevents_duplicates(
|
async def test_conversion_guest_composite_key_prevents_duplicates(
|
||||||
|
|||||||
Reference in New Issue
Block a user