Hashed comparisions don't work unfortunatly
This commit is contained in:
@@ -7,20 +7,31 @@ This test module:
|
||||
|
||||
The test data is designed to test realistic matching scenarios:
|
||||
- Matching by advertising campaign data (fbclid/gclid)
|
||||
- Matching by guest name and email
|
||||
- Matching by guest name and email using hashed data
|
||||
- Handling unmatched conversions
|
||||
- Processing daily sales revenue data
|
||||
- Testing hashed matching logic and edge cases
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||
|
||||
from alpine_bits_python.conversion_service import ConversionService
|
||||
from alpine_bits_python.csv_import import CSVImporter
|
||||
from alpine_bits_python.db import Base, Conversion, ConversionRoom
|
||||
from alpine_bits_python.db import (
|
||||
Base,
|
||||
Conversion,
|
||||
ConversionGuest,
|
||||
ConversionRoom,
|
||||
Customer,
|
||||
HashedCustomer,
|
||||
Reservation,
|
||||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
@@ -317,5 +328,315 @@ class TestConversionServiceWithImportedData:
|
||||
assert stats["errors"] == 0
|
||||
|
||||
|
||||
class TestHashedMatchingLogic:
|
||||
"""Test the hashed matching logic used in ConversionService."""
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_match_without_hashed_customer(self, test_db_session):
|
||||
"""Test that matching fails gracefully when customer has no hashed version."""
|
||||
# Create a customer WITHOUT hashed data
|
||||
customer = Customer(
|
||||
given_name="Bob",
|
||||
surname="Jones",
|
||||
email_address="bob@example.com",
|
||||
contact_id="test_contact_3",
|
||||
)
|
||||
test_db_session.add(customer)
|
||||
await test_db_session.commit()
|
||||
|
||||
# Create a reservation
|
||||
reservation = Reservation(
|
||||
customer_id=customer.id,
|
||||
unique_id="res_3",
|
||||
hotel_code="test_hotel",
|
||||
)
|
||||
test_db_session.add(reservation)
|
||||
await test_db_session.commit()
|
||||
|
||||
# Test the matching logic
|
||||
service = ConversionService(test_db_session)
|
||||
|
||||
# Eagerly load reservations
|
||||
from sqlalchemy.orm import selectinload
|
||||
result = await test_db_session.execute(
|
||||
select(Reservation)
|
||||
.where(Reservation.id == reservation.id)
|
||||
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
|
||||
)
|
||||
reservations = result.scalars().all()
|
||||
|
||||
hashed_email = hashlib.sha256(
|
||||
"bob@example.com".lower().strip().encode("utf-8")
|
||||
).hexdigest()
|
||||
|
||||
matched = service._match_reservations_by_guest_details(
|
||||
reservations,
|
||||
guest_first_name=None,
|
||||
guest_last_name=None,
|
||||
guest_email=hashed_email,
|
||||
)
|
||||
|
||||
# Should not match because customer has no hashed version
|
||||
assert matched is None, "Should not match without hashed customer"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversion_guest_hashed_fields_are_populated(
|
||||
self, test_db_session
|
||||
):
|
||||
"""Test that ConversionGuest properly stores hashed versions of guest data."""
|
||||
# Create a conversion guest
|
||||
conversion_guest = ConversionGuest.create_from_conversion_data(
|
||||
hotel_id="test_hotel",
|
||||
guest_id="guest_123",
|
||||
guest_first_name="Margaret",
|
||||
guest_last_name="Brown",
|
||||
guest_email="margaret@example.com",
|
||||
guest_country_code="GB",
|
||||
guest_birth_date=None,
|
||||
now=None,
|
||||
)
|
||||
test_db_session.add(conversion_guest)
|
||||
await test_db_session.flush()
|
||||
|
||||
# Verify hashed fields are populated
|
||||
assert conversion_guest.hashed_first_name is not None
|
||||
assert conversion_guest.hashed_last_name is not None
|
||||
assert conversion_guest.hashed_email is not None
|
||||
|
||||
# Verify hashes are correct (SHA256)
|
||||
expected_hashed_first = hashlib.sha256(
|
||||
"margaret".lower().strip().encode("utf-8")
|
||||
).hexdigest()
|
||||
expected_hashed_last = hashlib.sha256(
|
||||
"brown".lower().strip().encode("utf-8")
|
||||
).hexdigest()
|
||||
expected_hashed_email = hashlib.sha256(
|
||||
"margaret@example.com".lower().strip().encode("utf-8")
|
||||
).hexdigest()
|
||||
|
||||
assert conversion_guest.hashed_first_name == expected_hashed_first
|
||||
assert conversion_guest.hashed_last_name == expected_hashed_last
|
||||
assert conversion_guest.hashed_email == expected_hashed_email
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversion_records_created_before_matching(
|
||||
self, test_db_session, test_config
|
||||
):
|
||||
"""Test that conversion records exist before matching occurs."""
|
||||
# Create customer and reservation for matching
|
||||
customer = Customer(
|
||||
given_name="David",
|
||||
surname="Miller",
|
||||
email_address="david@example.com",
|
||||
contact_id="test_contact_6",
|
||||
)
|
||||
test_db_session.add(customer)
|
||||
await test_db_session.flush()
|
||||
|
||||
hashed_customer = customer.create_hashed_customer()
|
||||
test_db_session.add(hashed_customer)
|
||||
await test_db_session.flush()
|
||||
|
||||
reservation = Reservation(
|
||||
customer_id=customer.id,
|
||||
unique_id="res_6",
|
||||
hotel_code="hotel_1",
|
||||
)
|
||||
test_db_session.add(reservation)
|
||||
await test_db_session.commit()
|
||||
|
||||
# Create conversion XML with matching hashed data
|
||||
xml_content = f"""<?xml version="1.0"?>
|
||||
<root>
|
||||
<reservation id="pms_123" hotelID="hotel_1" number="RES001" date="2025-01-15">
|
||||
<guest firstName="David" lastName="Miller" email="david@example.com"/>
|
||||
<roomReservations>
|
||||
<roomReservation roomNumber="101" arrival="2025-01-15" departure="2025-01-17" status="confirmed">
|
||||
<dailySales>
|
||||
<dailySale date="2025-01-15" revenueTotal="100.00"/>
|
||||
</dailySales>
|
||||
</roomReservation>
|
||||
</roomReservations>
|
||||
</reservation>
|
||||
</root>"""
|
||||
|
||||
service = ConversionService(test_db_session)
|
||||
stats = await service.process_conversion_xml(xml_content)
|
||||
|
||||
# Verify conversion was created
|
||||
result = await test_db_session.execute(
|
||||
select(Conversion).where(Conversion.pms_reservation_id == "pms_123")
|
||||
)
|
||||
conversion = result.scalar_one_or_none()
|
||||
|
||||
assert conversion is not None, "Conversion should be created"
|
||||
assert conversion.hotel_id == "hotel_1"
|
||||
assert conversion.guest_first_name == "David"
|
||||
assert conversion.guest_last_name == "Miller"
|
||||
assert conversion.guest_email == "david@example.com"
|
||||
|
||||
# Verify conversion_guest was created
|
||||
assert conversion.conversion_guest_id is not None, "ConversionGuest should be created"
|
||||
|
||||
# Verify conversion_room was created
|
||||
room_result = await test_db_session.execute(
|
||||
select(ConversionRoom).where(
|
||||
ConversionRoom.conversion_id == conversion.id
|
||||
)
|
||||
)
|
||||
rooms = room_result.scalars().all()
|
||||
assert len(rooms) > 0, "ConversionRoom should be created"
|
||||
|
||||
# Verify matching occurred (may or may not have matched depending on data)
|
||||
# The important thing is that the records exist
|
||||
assert stats["total_reservations"] == 1
|
||||
assert stats["total_daily_sales"] == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_hashed_customer_missing_fields_handled_gracefully(
|
||||
self, test_db_session
|
||||
):
|
||||
"""Test that matching handles customers with missing hashed fields gracefully."""
|
||||
# Create a customer
|
||||
customer = Customer(
|
||||
given_name="Eve",
|
||||
surname="Taylor",
|
||||
email_address="eve@example.com",
|
||||
contact_id="test_contact_7",
|
||||
)
|
||||
test_db_session.add(customer)
|
||||
await test_db_session.flush()
|
||||
|
||||
# Create hashed customer but simulate missing fields by manually setting to None
|
||||
hashed_customer = HashedCustomer(
|
||||
customer_id=customer.id,
|
||||
contact_id="test_contact_7_hashed",
|
||||
hashed_email=None, # Simulate missing hashed email
|
||||
hashed_given_name=None, # Simulate missing hashed name
|
||||
hashed_surname=None,
|
||||
)
|
||||
test_db_session.add(hashed_customer)
|
||||
await test_db_session.flush()
|
||||
|
||||
# Create reservation
|
||||
reservation = Reservation(
|
||||
customer_id=customer.id,
|
||||
unique_id="res_7",
|
||||
hotel_code="test_hotel",
|
||||
)
|
||||
test_db_session.add(reservation)
|
||||
await test_db_session.commit()
|
||||
|
||||
# Test matching - should not crash even with missing hashed fields
|
||||
service = ConversionService(test_db_session)
|
||||
|
||||
# Eagerly load reservations
|
||||
from sqlalchemy.orm import selectinload
|
||||
result = await test_db_session.execute(
|
||||
select(Reservation)
|
||||
.where(Reservation.id == reservation.id)
|
||||
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
|
||||
)
|
||||
reservations = result.scalars().all()
|
||||
|
||||
hashed_email = hashlib.sha256(
|
||||
"eve@example.com".lower().strip().encode("utf-8")
|
||||
).hexdigest()
|
||||
|
||||
matched = service._match_reservations_by_guest_details(
|
||||
reservations,
|
||||
guest_first_name=None,
|
||||
guest_last_name=None,
|
||||
guest_email=hashed_email,
|
||||
)
|
||||
|
||||
# Should not match because hashed customer fields are None
|
||||
assert matched is None, "Should not match with missing hashed fields"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_duplicate_conversion_guests_with_same_hotel_and_guest_id(
|
||||
self, test_db_session
|
||||
):
|
||||
"""Test handling of duplicate ConversionGuest records with same (hotel_id, guest_id).
|
||||
|
||||
This test reproduces the production issue where multiple ConversionGuest records
|
||||
can be created with the same (hotel_id, guest_id) combination, causing
|
||||
scalar_one_or_none() to fail with "Multiple rows were found when one or none was required".
|
||||
|
||||
This can happen when:
|
||||
- Multiple conversions arrive for the same hotel and PMS guest within the same batch
|
||||
- The XML is processed multiple times
|
||||
- Race conditions in concurrent processing
|
||||
"""
|
||||
hotel_id = "test_hotel"
|
||||
guest_id = "guest_123"
|
||||
|
||||
# Simulate the production scenario: multiple conversion guests with same (hotel_id, guest_id)
|
||||
guest1 = ConversionGuest.create_from_conversion_data(
|
||||
hotel_id=hotel_id,
|
||||
guest_id=guest_id,
|
||||
guest_first_name="John",
|
||||
guest_last_name="Doe",
|
||||
guest_email="john@example.com",
|
||||
guest_country_code="US",
|
||||
guest_birth_date=None,
|
||||
now=None,
|
||||
)
|
||||
test_db_session.add(guest1)
|
||||
await test_db_session.flush()
|
||||
|
||||
# Create a second guest with the SAME (hotel_id, guest_id)
|
||||
# This should not happen, but can occur in production
|
||||
guest2 = ConversionGuest.create_from_conversion_data(
|
||||
hotel_id=hotel_id,
|
||||
guest_id=guest_id,
|
||||
guest_first_name="Jane", # Different first name
|
||||
guest_last_name="Doe",
|
||||
guest_email="jane@example.com",
|
||||
guest_country_code="US",
|
||||
guest_birth_date=None,
|
||||
now=None,
|
||||
)
|
||||
test_db_session.add(guest2)
|
||||
await test_db_session.commit()
|
||||
|
||||
# Now try to query for the guest by (hotel_id, guest_id)
|
||||
# This should return multiple results
|
||||
result = await test_db_session.execute(
|
||||
select(ConversionGuest).where(
|
||||
(ConversionGuest.hotel_id == hotel_id)
|
||||
& (ConversionGuest.guest_id == guest_id)
|
||||
)
|
||||
)
|
||||
guests = result.scalars().all()
|
||||
|
||||
# Verify we have duplicates (the production bug condition)
|
||||
assert len(guests) == 2, "Should have created duplicate conversion guests"
|
||||
|
||||
# Verify that scalars().first() returns one of the guests (the fixed behavior)
|
||||
result2 = await test_db_session.execute(
|
||||
select(ConversionGuest).where(
|
||||
(ConversionGuest.hotel_id == hotel_id)
|
||||
& (ConversionGuest.guest_id == guest_id)
|
||||
)
|
||||
)
|
||||
first_guest = result2.scalars().first()
|
||||
assert first_guest is not None, "Should find at least one guest with scalars().first()"
|
||||
|
||||
# The old code would have raised an error here with scalar_one_or_none()
|
||||
# when finding multiple results. Now it's fixed to use .first() instead.
|
||||
result3 = await test_db_session.execute(
|
||||
select(ConversionGuest).where(
|
||||
(ConversionGuest.hotel_id == hotel_id)
|
||||
& (ConversionGuest.guest_id == guest_id)
|
||||
)
|
||||
)
|
||||
with pytest.raises(Exception): # MultipleResultsFound from old code path
|
||||
result3.scalar_one_or_none()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
||||
Reference in New Issue
Block a user