Hashed comparisions don't work unfortunatly

This commit is contained in:
Jonas Linter
2025-11-19 14:17:13 +01:00
parent a087a312a7
commit 75bc01545f
2 changed files with 443 additions and 81 deletions

View File

@@ -7,20 +7,31 @@ This test module:
The test data is designed to test realistic matching scenarios:
- Matching by advertising campaign data (fbclid/gclid)
- Matching by guest name and email
- Matching by guest name and email using hashed data
- Handling unmatched conversions
- Processing daily sales revenue data
- Testing hashed matching logic and edge cases
"""
import hashlib
from pathlib import Path
import pytest
import pytest_asyncio
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from alpine_bits_python.conversion_service import ConversionService
from alpine_bits_python.csv_import import CSVImporter
from alpine_bits_python.db import Base, Conversion, ConversionRoom
from alpine_bits_python.db import (
Base,
Conversion,
ConversionGuest,
ConversionRoom,
Customer,
HashedCustomer,
Reservation,
)
@pytest_asyncio.fixture
@@ -317,5 +328,315 @@ class TestConversionServiceWithImportedData:
assert stats["errors"] == 0
class TestHashedMatchingLogic:
"""Test the hashed matching logic used in ConversionService."""
@pytest.mark.asyncio
async def test_no_match_without_hashed_customer(self, test_db_session):
"""Test that matching fails gracefully when customer has no hashed version."""
# Create a customer WITHOUT hashed data
customer = Customer(
given_name="Bob",
surname="Jones",
email_address="bob@example.com",
contact_id="test_contact_3",
)
test_db_session.add(customer)
await test_db_session.commit()
# Create a reservation
reservation = Reservation(
customer_id=customer.id,
unique_id="res_3",
hotel_code="test_hotel",
)
test_db_session.add(reservation)
await test_db_session.commit()
# Test the matching logic
service = ConversionService(test_db_session)
# Eagerly load reservations
from sqlalchemy.orm import selectinload
result = await test_db_session.execute(
select(Reservation)
.where(Reservation.id == reservation.id)
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
)
reservations = result.scalars().all()
hashed_email = hashlib.sha256(
"bob@example.com".lower().strip().encode("utf-8")
).hexdigest()
matched = service._match_reservations_by_guest_details(
reservations,
guest_first_name=None,
guest_last_name=None,
guest_email=hashed_email,
)
# Should not match because customer has no hashed version
assert matched is None, "Should not match without hashed customer"
@pytest.mark.asyncio
async def test_conversion_guest_hashed_fields_are_populated(
self, test_db_session
):
"""Test that ConversionGuest properly stores hashed versions of guest data."""
# Create a conversion guest
conversion_guest = ConversionGuest.create_from_conversion_data(
hotel_id="test_hotel",
guest_id="guest_123",
guest_first_name="Margaret",
guest_last_name="Brown",
guest_email="margaret@example.com",
guest_country_code="GB",
guest_birth_date=None,
now=None,
)
test_db_session.add(conversion_guest)
await test_db_session.flush()
# Verify hashed fields are populated
assert conversion_guest.hashed_first_name is not None
assert conversion_guest.hashed_last_name is not None
assert conversion_guest.hashed_email is not None
# Verify hashes are correct (SHA256)
expected_hashed_first = hashlib.sha256(
"margaret".lower().strip().encode("utf-8")
).hexdigest()
expected_hashed_last = hashlib.sha256(
"brown".lower().strip().encode("utf-8")
).hexdigest()
expected_hashed_email = hashlib.sha256(
"margaret@example.com".lower().strip().encode("utf-8")
).hexdigest()
assert conversion_guest.hashed_first_name == expected_hashed_first
assert conversion_guest.hashed_last_name == expected_hashed_last
assert conversion_guest.hashed_email == expected_hashed_email
@pytest.mark.asyncio
async def test_conversion_records_created_before_matching(
self, test_db_session, test_config
):
"""Test that conversion records exist before matching occurs."""
# Create customer and reservation for matching
customer = Customer(
given_name="David",
surname="Miller",
email_address="david@example.com",
contact_id="test_contact_6",
)
test_db_session.add(customer)
await test_db_session.flush()
hashed_customer = customer.create_hashed_customer()
test_db_session.add(hashed_customer)
await test_db_session.flush()
reservation = Reservation(
customer_id=customer.id,
unique_id="res_6",
hotel_code="hotel_1",
)
test_db_session.add(reservation)
await test_db_session.commit()
# Create conversion XML with matching hashed data
xml_content = f"""<?xml version="1.0"?>
<root>
<reservation id="pms_123" hotelID="hotel_1" number="RES001" date="2025-01-15">
<guest firstName="David" lastName="Miller" email="david@example.com"/>
<roomReservations>
<roomReservation roomNumber="101" arrival="2025-01-15" departure="2025-01-17" status="confirmed">
<dailySales>
<dailySale date="2025-01-15" revenueTotal="100.00"/>
</dailySales>
</roomReservation>
</roomReservations>
</reservation>
</root>"""
service = ConversionService(test_db_session)
stats = await service.process_conversion_xml(xml_content)
# Verify conversion was created
result = await test_db_session.execute(
select(Conversion).where(Conversion.pms_reservation_id == "pms_123")
)
conversion = result.scalar_one_or_none()
assert conversion is not None, "Conversion should be created"
assert conversion.hotel_id == "hotel_1"
assert conversion.guest_first_name == "David"
assert conversion.guest_last_name == "Miller"
assert conversion.guest_email == "david@example.com"
# Verify conversion_guest was created
assert conversion.conversion_guest_id is not None, "ConversionGuest should be created"
# Verify conversion_room was created
room_result = await test_db_session.execute(
select(ConversionRoom).where(
ConversionRoom.conversion_id == conversion.id
)
)
rooms = room_result.scalars().all()
assert len(rooms) > 0, "ConversionRoom should be created"
# Verify matching occurred (may or may not have matched depending on data)
# The important thing is that the records exist
assert stats["total_reservations"] == 1
assert stats["total_daily_sales"] == 1
@pytest.mark.asyncio
async def test_hashed_customer_missing_fields_handled_gracefully(
self, test_db_session
):
"""Test that matching handles customers with missing hashed fields gracefully."""
# Create a customer
customer = Customer(
given_name="Eve",
surname="Taylor",
email_address="eve@example.com",
contact_id="test_contact_7",
)
test_db_session.add(customer)
await test_db_session.flush()
# Create hashed customer but simulate missing fields by manually setting to None
hashed_customer = HashedCustomer(
customer_id=customer.id,
contact_id="test_contact_7_hashed",
hashed_email=None, # Simulate missing hashed email
hashed_given_name=None, # Simulate missing hashed name
hashed_surname=None,
)
test_db_session.add(hashed_customer)
await test_db_session.flush()
# Create reservation
reservation = Reservation(
customer_id=customer.id,
unique_id="res_7",
hotel_code="test_hotel",
)
test_db_session.add(reservation)
await test_db_session.commit()
# Test matching - should not crash even with missing hashed fields
service = ConversionService(test_db_session)
# Eagerly load reservations
from sqlalchemy.orm import selectinload
result = await test_db_session.execute(
select(Reservation)
.where(Reservation.id == reservation.id)
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
)
reservations = result.scalars().all()
hashed_email = hashlib.sha256(
"eve@example.com".lower().strip().encode("utf-8")
).hexdigest()
matched = service._match_reservations_by_guest_details(
reservations,
guest_first_name=None,
guest_last_name=None,
guest_email=hashed_email,
)
# Should not match because hashed customer fields are None
assert matched is None, "Should not match with missing hashed fields"
@pytest.mark.asyncio
async def test_duplicate_conversion_guests_with_same_hotel_and_guest_id(
self, test_db_session
):
"""Test handling of duplicate ConversionGuest records with same (hotel_id, guest_id).
This test reproduces the production issue where multiple ConversionGuest records
can be created with the same (hotel_id, guest_id) combination, causing
scalar_one_or_none() to fail with "Multiple rows were found when one or none was required".
This can happen when:
- Multiple conversions arrive for the same hotel and PMS guest within the same batch
- The XML is processed multiple times
- Race conditions in concurrent processing
"""
hotel_id = "test_hotel"
guest_id = "guest_123"
# Simulate the production scenario: multiple conversion guests with same (hotel_id, guest_id)
guest1 = ConversionGuest.create_from_conversion_data(
hotel_id=hotel_id,
guest_id=guest_id,
guest_first_name="John",
guest_last_name="Doe",
guest_email="john@example.com",
guest_country_code="US",
guest_birth_date=None,
now=None,
)
test_db_session.add(guest1)
await test_db_session.flush()
# Create a second guest with the SAME (hotel_id, guest_id)
# This should not happen, but can occur in production
guest2 = ConversionGuest.create_from_conversion_data(
hotel_id=hotel_id,
guest_id=guest_id,
guest_first_name="Jane", # Different first name
guest_last_name="Doe",
guest_email="jane@example.com",
guest_country_code="US",
guest_birth_date=None,
now=None,
)
test_db_session.add(guest2)
await test_db_session.commit()
# Now try to query for the guest by (hotel_id, guest_id)
# This should return multiple results
result = await test_db_session.execute(
select(ConversionGuest).where(
(ConversionGuest.hotel_id == hotel_id)
& (ConversionGuest.guest_id == guest_id)
)
)
guests = result.scalars().all()
# Verify we have duplicates (the production bug condition)
assert len(guests) == 2, "Should have created duplicate conversion guests"
# Verify that scalars().first() returns one of the guests (the fixed behavior)
result2 = await test_db_session.execute(
select(ConversionGuest).where(
(ConversionGuest.hotel_id == hotel_id)
& (ConversionGuest.guest_id == guest_id)
)
)
first_guest = result2.scalars().first()
assert first_guest is not None, "Should find at least one guest with scalars().first()"
# The old code would have raised an error here with scalar_one_or_none()
# when finding multiple results. Now it's fixed to use .first() instead.
result3 = await test_db_session.execute(
select(ConversionGuest).where(
(ConversionGuest.hotel_id == hotel_id)
& (ConversionGuest.guest_id == guest_id)
)
)
with pytest.raises(Exception): # MultipleResultsFound from old code path
result3.scalar_one_or_none()
if __name__ == "__main__":
pytest.main([__file__, "-v"])