Hashed comparisions don't work unfortunatly
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
"""Service for handling conversion data from hotel PMS XML files."""
|
"""Service for handling conversion data from hotel PMS XML files."""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import hashlib
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
@@ -50,10 +51,11 @@ class ConversionService:
|
|||||||
self.supports_concurrent = False
|
self.supports_concurrent = False
|
||||||
|
|
||||||
# Cache for reservation and customer data within a single XML processing run
|
# Cache for reservation and customer data within a single XML processing run
|
||||||
# Maps hotel_code -> list of (reservation, customer) tuples
|
# Maps hotel_code -> list of (reservation, hashed_customer) tuples
|
||||||
# This significantly speeds up matching when processing large XML files
|
# This significantly speeds up matching when processing large XML files
|
||||||
|
# Uses hashed data for matching to preserve privacy
|
||||||
self._reservation_cache: dict[
|
self._reservation_cache: dict[
|
||||||
str | None, list[tuple[Reservation, Customer | None]]
|
str | None, list[tuple[Reservation, HashedCustomer | None]]
|
||||||
] = {}
|
] = {}
|
||||||
self._cache_initialized = False
|
self._cache_initialized = False
|
||||||
|
|
||||||
@@ -91,6 +93,11 @@ class ConversionService:
|
|||||||
If a guest with this key exists, updates it with new data.
|
If a guest with this key exists, updates it with new data.
|
||||||
If not, creates a new guest record.
|
If not, creates a new guest record.
|
||||||
|
|
||||||
|
NOTE: There is no database-level unique constraint on (hotel_id, guest_id),
|
||||||
|
so multiple ConversionGuest records can exist with the same key. This method
|
||||||
|
uses first() instead of scalar_one_or_none() to handle this gracefully and
|
||||||
|
update the most recently created record when duplicates exist.
|
||||||
|
|
||||||
Returns the ConversionGuest record, or None if no guest data provided.
|
Returns the ConversionGuest record, or None if no guest data provided.
|
||||||
"""
|
"""
|
||||||
# Don't create a ConversionGuest if we have no guest information
|
# Don't create a ConversionGuest if we have no guest information
|
||||||
@@ -104,12 +111,14 @@ class ConversionService:
|
|||||||
# Try to find existing guest by (hotel_id, guest_id)
|
# Try to find existing guest by (hotel_id, guest_id)
|
||||||
if guest_id:
|
if guest_id:
|
||||||
result = await session.execute(
|
result = await session.execute(
|
||||||
select(ConversionGuest).where(
|
select(ConversionGuest)
|
||||||
|
.where(
|
||||||
(ConversionGuest.hotel_id == hotel_id)
|
(ConversionGuest.hotel_id == hotel_id)
|
||||||
& (ConversionGuest.guest_id == guest_id)
|
& (ConversionGuest.guest_id == guest_id)
|
||||||
)
|
)
|
||||||
|
.order_by(ConversionGuest.last_seen.desc()) # Get most recently updated
|
||||||
)
|
)
|
||||||
existing_guest = result.scalar_one_or_none()
|
existing_guest = result.scalars().first()
|
||||||
|
|
||||||
if existing_guest:
|
if existing_guest:
|
||||||
# Update with new data
|
# Update with new data
|
||||||
@@ -210,16 +219,16 @@ class ConversionService:
|
|||||||
return stats
|
return stats
|
||||||
|
|
||||||
async def _load_reservation_cache(self) -> None:
|
async def _load_reservation_cache(self) -> None:
|
||||||
"""Load all reservations and customers into cache for fast matching.
|
"""Load all reservations and hashed customers into cache for fast matching.
|
||||||
|
|
||||||
This method is called once at the start of processing a large XML file.
|
This method is called once at the start of processing a large XML file.
|
||||||
It loads all reservations with their associated customers into an in-memory
|
It loads all reservations with their associated hashed customers into an in-memory
|
||||||
cache organized by hotel_code. This avoids repeated database queries during
|
cache organized by hotel_code. This avoids repeated database queries during
|
||||||
matching operations.
|
matching operations and uses hashed data for privacy-preserving matching.
|
||||||
|
|
||||||
The cache structure:
|
The cache structure:
|
||||||
- Key: hotel_code (str or None)
|
- Key: hotel_code (str or None)
|
||||||
- Value: List of (reservation, customer) tuples
|
- Value: List of (reservation, hashed_customer) tuples
|
||||||
|
|
||||||
This is especially beneficial for large XML files with many reservations
|
This is especially beneficial for large XML files with many reservations
|
||||||
where matching criteria is the same across multiple reservations.
|
where matching criteria is the same across multiple reservations.
|
||||||
@@ -241,8 +250,11 @@ class ConversionService:
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Load all reservations with their customers in one query
|
# Load all reservations with their hashed customers in one query
|
||||||
query = select(Reservation).options(selectinload(Reservation.customer))
|
from sqlalchemy.orm import selectinload
|
||||||
|
query = select(Reservation).options(
|
||||||
|
selectinload(Reservation.customer).selectinload(Customer.hashed_version)
|
||||||
|
)
|
||||||
result = await session.execute(query)
|
result = await session.execute(query)
|
||||||
reservations = result.scalars().all()
|
reservations = result.scalars().all()
|
||||||
|
|
||||||
@@ -253,8 +265,12 @@ class ConversionService:
|
|||||||
hotel_code = reservation.hotel_code
|
hotel_code = reservation.hotel_code
|
||||||
if hotel_code not in self._reservation_cache:
|
if hotel_code not in self._reservation_cache:
|
||||||
self._reservation_cache[hotel_code] = []
|
self._reservation_cache[hotel_code] = []
|
||||||
|
# Cache the hashed customer instead of raw customer
|
||||||
|
hashed_customer = None
|
||||||
|
if reservation.customer and reservation.customer.hashed_version:
|
||||||
|
hashed_customer = reservation.customer.hashed_version
|
||||||
self._reservation_cache[hotel_code].append(
|
self._reservation_cache[hotel_code].append(
|
||||||
(reservation, reservation.customer)
|
(reservation, hashed_customer)
|
||||||
)
|
)
|
||||||
|
|
||||||
self._cache_initialized = True
|
self._cache_initialized = True
|
||||||
@@ -482,26 +498,6 @@ class ConversionService:
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
_LOGGER.warning("Invalid creation time format: %s", creation_time_str)
|
_LOGGER.warning("Invalid creation time format: %s", creation_time_str)
|
||||||
|
|
||||||
# Find matching reservation, customer, and hashed_customer using advertising data and guest details
|
|
||||||
matched_reservation = None
|
|
||||||
matched_customer = None
|
|
||||||
matched_hashed_customer = None
|
|
||||||
|
|
||||||
if advertising_campagne or True:
|
|
||||||
match_result = await self._find_matching_entities(
|
|
||||||
advertising_campagne,
|
|
||||||
hotel_id,
|
|
||||||
reservation_date,
|
|
||||||
guest_first_name,
|
|
||||||
guest_last_name,
|
|
||||||
guest_email,
|
|
||||||
advertising_partner,
|
|
||||||
session,
|
|
||||||
)
|
|
||||||
matched_reservation = match_result["reservation"]
|
|
||||||
matched_customer = match_result["customer"]
|
|
||||||
matched_hashed_customer = match_result["hashed_customer"]
|
|
||||||
|
|
||||||
# Process all room reservations
|
# Process all room reservations
|
||||||
room_reservations = reservation_elem.find("roomReservations")
|
room_reservations = reservation_elem.find("roomReservations")
|
||||||
if room_reservations is None:
|
if room_reservations is None:
|
||||||
@@ -519,16 +515,7 @@ class ConversionService:
|
|||||||
existing_conversion = existing_result.scalar_one_or_none()
|
existing_conversion = existing_result.scalar_one_or_none()
|
||||||
|
|
||||||
if existing_conversion:
|
if existing_conversion:
|
||||||
# Update existing conversion
|
# Update existing conversion (matching will be done later)
|
||||||
existing_conversion.reservation_id = (
|
|
||||||
matched_reservation.id if matched_reservation else None
|
|
||||||
)
|
|
||||||
existing_conversion.customer_id = (
|
|
||||||
matched_customer.id if matched_customer else None
|
|
||||||
)
|
|
||||||
existing_conversion.hashed_customer_id = (
|
|
||||||
matched_hashed_customer.id if matched_hashed_customer else None
|
|
||||||
)
|
|
||||||
existing_conversion.reservation_number = reservation_number
|
existing_conversion.reservation_number = reservation_number
|
||||||
existing_conversion.reservation_date = reservation_date
|
existing_conversion.reservation_date = reservation_date
|
||||||
existing_conversion.creation_time = creation_time
|
existing_conversion.creation_time = creation_time
|
||||||
@@ -550,14 +537,12 @@ class ConversionService:
|
|||||||
pms_reservation_id,
|
pms_reservation_id,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Create new conversion entry
|
# Create new conversion entry (without matching - will be done later)
|
||||||
conversion = Conversion(
|
conversion = Conversion(
|
||||||
# Links to existing entities (nullable)
|
# Links to existing entities (nullable, will be filled in after matching)
|
||||||
reservation_id=matched_reservation.id if matched_reservation else None,
|
reservation_id=None,
|
||||||
customer_id=matched_customer.id if matched_customer else None,
|
customer_id=None,
|
||||||
hashed_customer_id=matched_hashed_customer.id
|
hashed_customer_id=None,
|
||||||
if matched_hashed_customer
|
|
||||||
else None,
|
|
||||||
# Reservation metadata
|
# Reservation metadata
|
||||||
hotel_id=hotel_id,
|
hotel_id=hotel_id,
|
||||||
pms_reservation_id=pms_reservation_id,
|
pms_reservation_id=pms_reservation_id,
|
||||||
@@ -604,16 +589,6 @@ class ConversionService:
|
|||||||
if conversion_guest:
|
if conversion_guest:
|
||||||
conversion.conversion_guest_id = conversion_guest.id
|
conversion.conversion_guest_id = conversion_guest.id
|
||||||
|
|
||||||
# Update stats for the conversion record itself
|
|
||||||
if matched_reservation:
|
|
||||||
stats["matched_to_reservation"] += 1
|
|
||||||
if matched_customer:
|
|
||||||
stats["matched_to_customer"] += 1
|
|
||||||
if matched_hashed_customer:
|
|
||||||
stats["matched_to_hashed_customer"] += 1
|
|
||||||
if not any([matched_reservation, matched_customer, matched_hashed_customer]):
|
|
||||||
stats["unmatched"] += 1
|
|
||||||
|
|
||||||
# Batch-load existing room reservations to avoid N+1 queries
|
# Batch-load existing room reservations to avoid N+1 queries
|
||||||
room_numbers = [
|
room_numbers = [
|
||||||
rm.get("roomNumber") for rm in room_reservations.findall("roomReservation")
|
rm.get("roomNumber") for rm in room_reservations.findall("roomReservation")
|
||||||
@@ -772,6 +747,55 @@ class ConversionService:
|
|||||||
num_adults,
|
num_adults,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Now that conversion, conversion_guest, and conversion_room records exist,
|
||||||
|
# perform matching using hashed guest data from conversion_guest
|
||||||
|
matched_reservation = None
|
||||||
|
matched_customer = None
|
||||||
|
matched_hashed_customer = None
|
||||||
|
|
||||||
|
if advertising_campagne or True:
|
||||||
|
# Use hashed data from conversion_guest for matching
|
||||||
|
hashed_first_name = conversion_guest.hashed_first_name if conversion_guest else None
|
||||||
|
hashed_last_name = conversion_guest.hashed_last_name if conversion_guest else None
|
||||||
|
hashed_email = conversion_guest.hashed_email if conversion_guest else None
|
||||||
|
|
||||||
|
match_result = await self._find_matching_entities(
|
||||||
|
advertising_campagne,
|
||||||
|
hotel_id,
|
||||||
|
reservation_date,
|
||||||
|
hashed_first_name,
|
||||||
|
hashed_last_name,
|
||||||
|
hashed_email,
|
||||||
|
advertising_partner,
|
||||||
|
session,
|
||||||
|
)
|
||||||
|
matched_reservation = match_result["reservation"]
|
||||||
|
matched_customer = match_result["customer"]
|
||||||
|
matched_hashed_customer = match_result["hashed_customer"]
|
||||||
|
|
||||||
|
# Update the conversion with matched entities if found
|
||||||
|
if matched_reservation or matched_customer or matched_hashed_customer:
|
||||||
|
conversion.reservation_id = (
|
||||||
|
matched_reservation.id if matched_reservation else None
|
||||||
|
)
|
||||||
|
conversion.customer_id = (
|
||||||
|
matched_customer.id if matched_customer else None
|
||||||
|
)
|
||||||
|
conversion.hashed_customer_id = (
|
||||||
|
matched_hashed_customer.id if matched_hashed_customer else None
|
||||||
|
)
|
||||||
|
conversion.updated_at = datetime.now()
|
||||||
|
|
||||||
|
# Update stats for the conversion record
|
||||||
|
if matched_reservation:
|
||||||
|
stats["matched_to_reservation"] += 1
|
||||||
|
if matched_customer:
|
||||||
|
stats["matched_to_customer"] += 1
|
||||||
|
if matched_hashed_customer:
|
||||||
|
stats["matched_to_hashed_customer"] += 1
|
||||||
|
if not any([matched_reservation, matched_customer, matched_hashed_customer]):
|
||||||
|
stats["unmatched"] += 1
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
async def _find_matching_entities(
|
async def _find_matching_entities(
|
||||||
@@ -1028,7 +1052,10 @@ class ConversionService:
|
|||||||
"Cache unavailable or empty, falling back to database query (hotel=%s)",
|
"Cache unavailable or empty, falling back to database query (hotel=%s)",
|
||||||
hotel_id,
|
hotel_id,
|
||||||
)
|
)
|
||||||
query = select(Reservation).options(selectinload(Reservation.customer))
|
from sqlalchemy.orm import selectinload
|
||||||
|
query = select(Reservation).options(
|
||||||
|
selectinload(Reservation.customer).selectinload(Customer.hashed_version)
|
||||||
|
)
|
||||||
|
|
||||||
if hotel_id:
|
if hotel_id:
|
||||||
query = query.where(Reservation.hotel_code == hotel_id)
|
query = query.where(Reservation.hotel_code == hotel_id)
|
||||||
@@ -1049,53 +1076,67 @@ class ConversionService:
|
|||||||
) -> Reservation | None:
|
) -> Reservation | None:
|
||||||
"""Match a reservation from a list by guest name and email (non-async).
|
"""Match a reservation from a list by guest name and email (non-async).
|
||||||
|
|
||||||
|
Uses hashed data matching for privacy. The guest_first_name, guest_last_name,
|
||||||
|
and guest_email parameters should be pre-hashed values from ConversionGuest.
|
||||||
|
Compares hashed values directly against hashed_customer hash fields.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
reservations: List of reservations to search through
|
reservations: List of reservations to search through
|
||||||
guest_first_name: Guest first name
|
guest_first_name: Pre-hashed guest first name (or None)
|
||||||
guest_last_name: Guest last name
|
guest_last_name: Pre-hashed guest last name (or None)
|
||||||
guest_email: Guest email
|
guest_email: Pre-hashed guest email (or None)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Matched Reservation or None
|
Matched Reservation or None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# Filter by guest details
|
# Filter by guest details using hashed data
|
||||||
candidates = []
|
candidates = []
|
||||||
|
|
||||||
for reservation in reservations:
|
for reservation in reservations:
|
||||||
customer = reservation.customer
|
customer = reservation.customer
|
||||||
if not customer:
|
if not customer:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Match by email (highest priority)
|
# Access hashed_version - handle both direct objects and relationships
|
||||||
if guest_email:
|
hashed_customer = customer.hashed_version
|
||||||
if (
|
if not hashed_customer:
|
||||||
customer.email_address
|
continue
|
||||||
and customer.email_address.lower() == guest_email.lower()
|
|
||||||
):
|
# Defensive check: ensure hashed_customer is actually a HashedCustomer object
|
||||||
|
# (not an InstrumentedList or other SQLAlchemy construct)
|
||||||
|
if not hasattr(hashed_customer, 'hashed_email'):
|
||||||
|
_LOGGER.debug(
|
||||||
|
"Customer %s has invalid hashed_version type: %s",
|
||||||
|
customer.id,
|
||||||
|
type(hashed_customer),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Match by email (highest priority) using hashed comparison
|
||||||
|
if guest_email and hashed_customer.hashed_email:
|
||||||
|
if hashed_customer.hashed_email == guest_email:
|
||||||
_LOGGER.info(
|
_LOGGER.info(
|
||||||
"Found exact email match for %s (reservation_id=%s)",
|
"Found exact email match (reservation_id=%s) via hash",
|
||||||
guest_email,
|
|
||||||
reservation.id,
|
reservation.id,
|
||||||
)
|
)
|
||||||
candidates.append((reservation, 3)) # Highest score
|
candidates.append((reservation, 3)) # Highest score
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Match by name (first + last)
|
# Match by name (first + last) using hashed comparison
|
||||||
if guest_first_name and guest_last_name:
|
if guest_first_name and guest_last_name:
|
||||||
first_match = (
|
first_match = (
|
||||||
customer.given_name
|
hashed_customer.hashed_given_name
|
||||||
and customer.given_name.lower() == guest_first_name.lower()
|
and hashed_customer.hashed_given_name == guest_first_name
|
||||||
)
|
)
|
||||||
last_match = (
|
last_match = (
|
||||||
customer.surname
|
hashed_customer.hashed_surname
|
||||||
and customer.surname.lower() == guest_last_name.lower()
|
and hashed_customer.hashed_surname == guest_last_name
|
||||||
)
|
)
|
||||||
|
|
||||||
if first_match and last_match:
|
if first_match and last_match:
|
||||||
_LOGGER.info(
|
_LOGGER.info(
|
||||||
"Found exact name match for %s %s (reservation_id=%s)",
|
"Found exact name match (reservation_id=%s) via hash",
|
||||||
guest_first_name,
|
|
||||||
guest_last_name,
|
|
||||||
reservation.id,
|
reservation.id,
|
||||||
)
|
)
|
||||||
candidates.append((reservation, 2)) # Medium-high score
|
candidates.append((reservation, 2)) # Medium-high score
|
||||||
|
|||||||
@@ -7,20 +7,31 @@ This test module:
|
|||||||
|
|
||||||
The test data is designed to test realistic matching scenarios:
|
The test data is designed to test realistic matching scenarios:
|
||||||
- Matching by advertising campaign data (fbclid/gclid)
|
- Matching by advertising campaign data (fbclid/gclid)
|
||||||
- Matching by guest name and email
|
- Matching by guest name and email using hashed data
|
||||||
- Handling unmatched conversions
|
- Handling unmatched conversions
|
||||||
- Processing daily sales revenue data
|
- Processing daily sales revenue data
|
||||||
|
- Testing hashed matching logic and edge cases
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
from sqlalchemy import select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||||
|
|
||||||
from alpine_bits_python.conversion_service import ConversionService
|
from alpine_bits_python.conversion_service import ConversionService
|
||||||
from alpine_bits_python.csv_import import CSVImporter
|
from alpine_bits_python.csv_import import CSVImporter
|
||||||
from alpine_bits_python.db import Base, Conversion, ConversionRoom
|
from alpine_bits_python.db import (
|
||||||
|
Base,
|
||||||
|
Conversion,
|
||||||
|
ConversionGuest,
|
||||||
|
ConversionRoom,
|
||||||
|
Customer,
|
||||||
|
HashedCustomer,
|
||||||
|
Reservation,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest_asyncio.fixture
|
||||||
@@ -317,5 +328,315 @@ class TestConversionServiceWithImportedData:
|
|||||||
assert stats["errors"] == 0
|
assert stats["errors"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestHashedMatchingLogic:
|
||||||
|
"""Test the hashed matching logic used in ConversionService."""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_match_without_hashed_customer(self, test_db_session):
|
||||||
|
"""Test that matching fails gracefully when customer has no hashed version."""
|
||||||
|
# Create a customer WITHOUT hashed data
|
||||||
|
customer = Customer(
|
||||||
|
given_name="Bob",
|
||||||
|
surname="Jones",
|
||||||
|
email_address="bob@example.com",
|
||||||
|
contact_id="test_contact_3",
|
||||||
|
)
|
||||||
|
test_db_session.add(customer)
|
||||||
|
await test_db_session.commit()
|
||||||
|
|
||||||
|
# Create a reservation
|
||||||
|
reservation = Reservation(
|
||||||
|
customer_id=customer.id,
|
||||||
|
unique_id="res_3",
|
||||||
|
hotel_code="test_hotel",
|
||||||
|
)
|
||||||
|
test_db_session.add(reservation)
|
||||||
|
await test_db_session.commit()
|
||||||
|
|
||||||
|
# Test the matching logic
|
||||||
|
service = ConversionService(test_db_session)
|
||||||
|
|
||||||
|
# Eagerly load reservations
|
||||||
|
from sqlalchemy.orm import selectinload
|
||||||
|
result = await test_db_session.execute(
|
||||||
|
select(Reservation)
|
||||||
|
.where(Reservation.id == reservation.id)
|
||||||
|
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
|
||||||
|
)
|
||||||
|
reservations = result.scalars().all()
|
||||||
|
|
||||||
|
hashed_email = hashlib.sha256(
|
||||||
|
"bob@example.com".lower().strip().encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
matched = service._match_reservations_by_guest_details(
|
||||||
|
reservations,
|
||||||
|
guest_first_name=None,
|
||||||
|
guest_last_name=None,
|
||||||
|
guest_email=hashed_email,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should not match because customer has no hashed version
|
||||||
|
assert matched is None, "Should not match without hashed customer"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_conversion_guest_hashed_fields_are_populated(
|
||||||
|
self, test_db_session
|
||||||
|
):
|
||||||
|
"""Test that ConversionGuest properly stores hashed versions of guest data."""
|
||||||
|
# Create a conversion guest
|
||||||
|
conversion_guest = ConversionGuest.create_from_conversion_data(
|
||||||
|
hotel_id="test_hotel",
|
||||||
|
guest_id="guest_123",
|
||||||
|
guest_first_name="Margaret",
|
||||||
|
guest_last_name="Brown",
|
||||||
|
guest_email="margaret@example.com",
|
||||||
|
guest_country_code="GB",
|
||||||
|
guest_birth_date=None,
|
||||||
|
now=None,
|
||||||
|
)
|
||||||
|
test_db_session.add(conversion_guest)
|
||||||
|
await test_db_session.flush()
|
||||||
|
|
||||||
|
# Verify hashed fields are populated
|
||||||
|
assert conversion_guest.hashed_first_name is not None
|
||||||
|
assert conversion_guest.hashed_last_name is not None
|
||||||
|
assert conversion_guest.hashed_email is not None
|
||||||
|
|
||||||
|
# Verify hashes are correct (SHA256)
|
||||||
|
expected_hashed_first = hashlib.sha256(
|
||||||
|
"margaret".lower().strip().encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
expected_hashed_last = hashlib.sha256(
|
||||||
|
"brown".lower().strip().encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
expected_hashed_email = hashlib.sha256(
|
||||||
|
"margaret@example.com".lower().strip().encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
assert conversion_guest.hashed_first_name == expected_hashed_first
|
||||||
|
assert conversion_guest.hashed_last_name == expected_hashed_last
|
||||||
|
assert conversion_guest.hashed_email == expected_hashed_email
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_conversion_records_created_before_matching(
|
||||||
|
self, test_db_session, test_config
|
||||||
|
):
|
||||||
|
"""Test that conversion records exist before matching occurs."""
|
||||||
|
# Create customer and reservation for matching
|
||||||
|
customer = Customer(
|
||||||
|
given_name="David",
|
||||||
|
surname="Miller",
|
||||||
|
email_address="david@example.com",
|
||||||
|
contact_id="test_contact_6",
|
||||||
|
)
|
||||||
|
test_db_session.add(customer)
|
||||||
|
await test_db_session.flush()
|
||||||
|
|
||||||
|
hashed_customer = customer.create_hashed_customer()
|
||||||
|
test_db_session.add(hashed_customer)
|
||||||
|
await test_db_session.flush()
|
||||||
|
|
||||||
|
reservation = Reservation(
|
||||||
|
customer_id=customer.id,
|
||||||
|
unique_id="res_6",
|
||||||
|
hotel_code="hotel_1",
|
||||||
|
)
|
||||||
|
test_db_session.add(reservation)
|
||||||
|
await test_db_session.commit()
|
||||||
|
|
||||||
|
# Create conversion XML with matching hashed data
|
||||||
|
xml_content = f"""<?xml version="1.0"?>
|
||||||
|
<root>
|
||||||
|
<reservation id="pms_123" hotelID="hotel_1" number="RES001" date="2025-01-15">
|
||||||
|
<guest firstName="David" lastName="Miller" email="david@example.com"/>
|
||||||
|
<roomReservations>
|
||||||
|
<roomReservation roomNumber="101" arrival="2025-01-15" departure="2025-01-17" status="confirmed">
|
||||||
|
<dailySales>
|
||||||
|
<dailySale date="2025-01-15" revenueTotal="100.00"/>
|
||||||
|
</dailySales>
|
||||||
|
</roomReservation>
|
||||||
|
</roomReservations>
|
||||||
|
</reservation>
|
||||||
|
</root>"""
|
||||||
|
|
||||||
|
service = ConversionService(test_db_session)
|
||||||
|
stats = await service.process_conversion_xml(xml_content)
|
||||||
|
|
||||||
|
# Verify conversion was created
|
||||||
|
result = await test_db_session.execute(
|
||||||
|
select(Conversion).where(Conversion.pms_reservation_id == "pms_123")
|
||||||
|
)
|
||||||
|
conversion = result.scalar_one_or_none()
|
||||||
|
|
||||||
|
assert conversion is not None, "Conversion should be created"
|
||||||
|
assert conversion.hotel_id == "hotel_1"
|
||||||
|
assert conversion.guest_first_name == "David"
|
||||||
|
assert conversion.guest_last_name == "Miller"
|
||||||
|
assert conversion.guest_email == "david@example.com"
|
||||||
|
|
||||||
|
# Verify conversion_guest was created
|
||||||
|
assert conversion.conversion_guest_id is not None, "ConversionGuest should be created"
|
||||||
|
|
||||||
|
# Verify conversion_room was created
|
||||||
|
room_result = await test_db_session.execute(
|
||||||
|
select(ConversionRoom).where(
|
||||||
|
ConversionRoom.conversion_id == conversion.id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rooms = room_result.scalars().all()
|
||||||
|
assert len(rooms) > 0, "ConversionRoom should be created"
|
||||||
|
|
||||||
|
# Verify matching occurred (may or may not have matched depending on data)
|
||||||
|
# The important thing is that the records exist
|
||||||
|
assert stats["total_reservations"] == 1
|
||||||
|
assert stats["total_daily_sales"] == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_hashed_customer_missing_fields_handled_gracefully(
|
||||||
|
self, test_db_session
|
||||||
|
):
|
||||||
|
"""Test that matching handles customers with missing hashed fields gracefully."""
|
||||||
|
# Create a customer
|
||||||
|
customer = Customer(
|
||||||
|
given_name="Eve",
|
||||||
|
surname="Taylor",
|
||||||
|
email_address="eve@example.com",
|
||||||
|
contact_id="test_contact_7",
|
||||||
|
)
|
||||||
|
test_db_session.add(customer)
|
||||||
|
await test_db_session.flush()
|
||||||
|
|
||||||
|
# Create hashed customer but simulate missing fields by manually setting to None
|
||||||
|
hashed_customer = HashedCustomer(
|
||||||
|
customer_id=customer.id,
|
||||||
|
contact_id="test_contact_7_hashed",
|
||||||
|
hashed_email=None, # Simulate missing hashed email
|
||||||
|
hashed_given_name=None, # Simulate missing hashed name
|
||||||
|
hashed_surname=None,
|
||||||
|
)
|
||||||
|
test_db_session.add(hashed_customer)
|
||||||
|
await test_db_session.flush()
|
||||||
|
|
||||||
|
# Create reservation
|
||||||
|
reservation = Reservation(
|
||||||
|
customer_id=customer.id,
|
||||||
|
unique_id="res_7",
|
||||||
|
hotel_code="test_hotel",
|
||||||
|
)
|
||||||
|
test_db_session.add(reservation)
|
||||||
|
await test_db_session.commit()
|
||||||
|
|
||||||
|
# Test matching - should not crash even with missing hashed fields
|
||||||
|
service = ConversionService(test_db_session)
|
||||||
|
|
||||||
|
# Eagerly load reservations
|
||||||
|
from sqlalchemy.orm import selectinload
|
||||||
|
result = await test_db_session.execute(
|
||||||
|
select(Reservation)
|
||||||
|
.where(Reservation.id == reservation.id)
|
||||||
|
.options(selectinload(Reservation.customer).selectinload(Customer.hashed_version))
|
||||||
|
)
|
||||||
|
reservations = result.scalars().all()
|
||||||
|
|
||||||
|
hashed_email = hashlib.sha256(
|
||||||
|
"eve@example.com".lower().strip().encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
matched = service._match_reservations_by_guest_details(
|
||||||
|
reservations,
|
||||||
|
guest_first_name=None,
|
||||||
|
guest_last_name=None,
|
||||||
|
guest_email=hashed_email,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should not match because hashed customer fields are None
|
||||||
|
assert matched is None, "Should not match with missing hashed fields"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_duplicate_conversion_guests_with_same_hotel_and_guest_id(
|
||||||
|
self, test_db_session
|
||||||
|
):
|
||||||
|
"""Test handling of duplicate ConversionGuest records with same (hotel_id, guest_id).
|
||||||
|
|
||||||
|
This test reproduces the production issue where multiple ConversionGuest records
|
||||||
|
can be created with the same (hotel_id, guest_id) combination, causing
|
||||||
|
scalar_one_or_none() to fail with "Multiple rows were found when one or none was required".
|
||||||
|
|
||||||
|
This can happen when:
|
||||||
|
- Multiple conversions arrive for the same hotel and PMS guest within the same batch
|
||||||
|
- The XML is processed multiple times
|
||||||
|
- Race conditions in concurrent processing
|
||||||
|
"""
|
||||||
|
hotel_id = "test_hotel"
|
||||||
|
guest_id = "guest_123"
|
||||||
|
|
||||||
|
# Simulate the production scenario: multiple conversion guests with same (hotel_id, guest_id)
|
||||||
|
guest1 = ConversionGuest.create_from_conversion_data(
|
||||||
|
hotel_id=hotel_id,
|
||||||
|
guest_id=guest_id,
|
||||||
|
guest_first_name="John",
|
||||||
|
guest_last_name="Doe",
|
||||||
|
guest_email="john@example.com",
|
||||||
|
guest_country_code="US",
|
||||||
|
guest_birth_date=None,
|
||||||
|
now=None,
|
||||||
|
)
|
||||||
|
test_db_session.add(guest1)
|
||||||
|
await test_db_session.flush()
|
||||||
|
|
||||||
|
# Create a second guest with the SAME (hotel_id, guest_id)
|
||||||
|
# This should not happen, but can occur in production
|
||||||
|
guest2 = ConversionGuest.create_from_conversion_data(
|
||||||
|
hotel_id=hotel_id,
|
||||||
|
guest_id=guest_id,
|
||||||
|
guest_first_name="Jane", # Different first name
|
||||||
|
guest_last_name="Doe",
|
||||||
|
guest_email="jane@example.com",
|
||||||
|
guest_country_code="US",
|
||||||
|
guest_birth_date=None,
|
||||||
|
now=None,
|
||||||
|
)
|
||||||
|
test_db_session.add(guest2)
|
||||||
|
await test_db_session.commit()
|
||||||
|
|
||||||
|
# Now try to query for the guest by (hotel_id, guest_id)
|
||||||
|
# This should return multiple results
|
||||||
|
result = await test_db_session.execute(
|
||||||
|
select(ConversionGuest).where(
|
||||||
|
(ConversionGuest.hotel_id == hotel_id)
|
||||||
|
& (ConversionGuest.guest_id == guest_id)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
guests = result.scalars().all()
|
||||||
|
|
||||||
|
# Verify we have duplicates (the production bug condition)
|
||||||
|
assert len(guests) == 2, "Should have created duplicate conversion guests"
|
||||||
|
|
||||||
|
# Verify that scalars().first() returns one of the guests (the fixed behavior)
|
||||||
|
result2 = await test_db_session.execute(
|
||||||
|
select(ConversionGuest).where(
|
||||||
|
(ConversionGuest.hotel_id == hotel_id)
|
||||||
|
& (ConversionGuest.guest_id == guest_id)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
first_guest = result2.scalars().first()
|
||||||
|
assert first_guest is not None, "Should find at least one guest with scalars().first()"
|
||||||
|
|
||||||
|
# The old code would have raised an error here with scalar_one_or_none()
|
||||||
|
# when finding multiple results. Now it's fixed to use .first() instead.
|
||||||
|
result3 = await test_db_session.execute(
|
||||||
|
select(ConversionGuest).where(
|
||||||
|
(ConversionGuest.hotel_id == hotel_id)
|
||||||
|
& (ConversionGuest.guest_id == guest_id)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
with pytest.raises(Exception): # MultipleResultsFound from old code path
|
||||||
|
result3.scalar_one_or_none()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pytest.main([__file__, "-v"])
|
pytest.main([__file__, "-v"])
|
||||||
|
|||||||
Reference in New Issue
Block a user