Hashed comparisions don't work unfortunatly
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""Service for handling conversion data from hotel PMS XML files."""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import UTC, datetime
|
||||
from decimal import Decimal
|
||||
@@ -50,10 +51,11 @@ class ConversionService:
|
||||
self.supports_concurrent = False
|
||||
|
||||
# Cache for reservation and customer data within a single XML processing run
|
||||
# Maps hotel_code -> list of (reservation, customer) tuples
|
||||
# Maps hotel_code -> list of (reservation, hashed_customer) tuples
|
||||
# This significantly speeds up matching when processing large XML files
|
||||
# Uses hashed data for matching to preserve privacy
|
||||
self._reservation_cache: dict[
|
||||
str | None, list[tuple[Reservation, Customer | None]]
|
||||
str | None, list[tuple[Reservation, HashedCustomer | None]]
|
||||
] = {}
|
||||
self._cache_initialized = False
|
||||
|
||||
@@ -91,6 +93,11 @@ class ConversionService:
|
||||
If a guest with this key exists, updates it with new data.
|
||||
If not, creates a new guest record.
|
||||
|
||||
NOTE: There is no database-level unique constraint on (hotel_id, guest_id),
|
||||
so multiple ConversionGuest records can exist with the same key. This method
|
||||
uses first() instead of scalar_one_or_none() to handle this gracefully and
|
||||
update the most recently created record when duplicates exist.
|
||||
|
||||
Returns the ConversionGuest record, or None if no guest data provided.
|
||||
"""
|
||||
# Don't create a ConversionGuest if we have no guest information
|
||||
@@ -104,12 +111,14 @@ class ConversionService:
|
||||
# Try to find existing guest by (hotel_id, guest_id)
|
||||
if guest_id:
|
||||
result = await session.execute(
|
||||
select(ConversionGuest).where(
|
||||
select(ConversionGuest)
|
||||
.where(
|
||||
(ConversionGuest.hotel_id == hotel_id)
|
||||
& (ConversionGuest.guest_id == guest_id)
|
||||
)
|
||||
.order_by(ConversionGuest.last_seen.desc()) # Get most recently updated
|
||||
)
|
||||
existing_guest = result.scalar_one_or_none()
|
||||
existing_guest = result.scalars().first()
|
||||
|
||||
if existing_guest:
|
||||
# Update with new data
|
||||
@@ -210,16 +219,16 @@ class ConversionService:
|
||||
return stats
|
||||
|
||||
async def _load_reservation_cache(self) -> None:
|
||||
"""Load all reservations and customers into cache for fast matching.
|
||||
"""Load all reservations and hashed customers into cache for fast matching.
|
||||
|
||||
This method is called once at the start of processing a large XML file.
|
||||
It loads all reservations with their associated customers into an in-memory
|
||||
It loads all reservations with their associated hashed customers into an in-memory
|
||||
cache organized by hotel_code. This avoids repeated database queries during
|
||||
matching operations.
|
||||
matching operations and uses hashed data for privacy-preserving matching.
|
||||
|
||||
The cache structure:
|
||||
- Key: hotel_code (str or None)
|
||||
- Value: List of (reservation, customer) tuples
|
||||
- Value: List of (reservation, hashed_customer) tuples
|
||||
|
||||
This is especially beneficial for large XML files with many reservations
|
||||
where matching criteria is the same across multiple reservations.
|
||||
@@ -241,8 +250,11 @@ class ConversionService:
|
||||
return
|
||||
|
||||
try:
|
||||
# Load all reservations with their customers in one query
|
||||
query = select(Reservation).options(selectinload(Reservation.customer))
|
||||
# Load all reservations with their hashed customers in one query
|
||||
from sqlalchemy.orm import selectinload
|
||||
query = select(Reservation).options(
|
||||
selectinload(Reservation.customer).selectinload(Customer.hashed_version)
|
||||
)
|
||||
result = await session.execute(query)
|
||||
reservations = result.scalars().all()
|
||||
|
||||
@@ -253,8 +265,12 @@ class ConversionService:
|
||||
hotel_code = reservation.hotel_code
|
||||
if hotel_code not in self._reservation_cache:
|
||||
self._reservation_cache[hotel_code] = []
|
||||
# Cache the hashed customer instead of raw customer
|
||||
hashed_customer = None
|
||||
if reservation.customer and reservation.customer.hashed_version:
|
||||
hashed_customer = reservation.customer.hashed_version
|
||||
self._reservation_cache[hotel_code].append(
|
||||
(reservation, reservation.customer)
|
||||
(reservation, hashed_customer)
|
||||
)
|
||||
|
||||
self._cache_initialized = True
|
||||
@@ -482,26 +498,6 @@ class ConversionService:
|
||||
except ValueError:
|
||||
_LOGGER.warning("Invalid creation time format: %s", creation_time_str)
|
||||
|
||||
# Find matching reservation, customer, and hashed_customer using advertising data and guest details
|
||||
matched_reservation = None
|
||||
matched_customer = None
|
||||
matched_hashed_customer = None
|
||||
|
||||
if advertising_campagne or True:
|
||||
match_result = await self._find_matching_entities(
|
||||
advertising_campagne,
|
||||
hotel_id,
|
||||
reservation_date,
|
||||
guest_first_name,
|
||||
guest_last_name,
|
||||
guest_email,
|
||||
advertising_partner,
|
||||
session,
|
||||
)
|
||||
matched_reservation = match_result["reservation"]
|
||||
matched_customer = match_result["customer"]
|
||||
matched_hashed_customer = match_result["hashed_customer"]
|
||||
|
||||
# Process all room reservations
|
||||
room_reservations = reservation_elem.find("roomReservations")
|
||||
if room_reservations is None:
|
||||
@@ -519,16 +515,7 @@ class ConversionService:
|
||||
existing_conversion = existing_result.scalar_one_or_none()
|
||||
|
||||
if existing_conversion:
|
||||
# Update existing conversion
|
||||
existing_conversion.reservation_id = (
|
||||
matched_reservation.id if matched_reservation else None
|
||||
)
|
||||
existing_conversion.customer_id = (
|
||||
matched_customer.id if matched_customer else None
|
||||
)
|
||||
existing_conversion.hashed_customer_id = (
|
||||
matched_hashed_customer.id if matched_hashed_customer else None
|
||||
)
|
||||
# Update existing conversion (matching will be done later)
|
||||
existing_conversion.reservation_number = reservation_number
|
||||
existing_conversion.reservation_date = reservation_date
|
||||
existing_conversion.creation_time = creation_time
|
||||
@@ -550,14 +537,12 @@ class ConversionService:
|
||||
pms_reservation_id,
|
||||
)
|
||||
else:
|
||||
# Create new conversion entry
|
||||
# Create new conversion entry (without matching - will be done later)
|
||||
conversion = Conversion(
|
||||
# Links to existing entities (nullable)
|
||||
reservation_id=matched_reservation.id if matched_reservation else None,
|
||||
customer_id=matched_customer.id if matched_customer else None,
|
||||
hashed_customer_id=matched_hashed_customer.id
|
||||
if matched_hashed_customer
|
||||
else None,
|
||||
# Links to existing entities (nullable, will be filled in after matching)
|
||||
reservation_id=None,
|
||||
customer_id=None,
|
||||
hashed_customer_id=None,
|
||||
# Reservation metadata
|
||||
hotel_id=hotel_id,
|
||||
pms_reservation_id=pms_reservation_id,
|
||||
@@ -604,16 +589,6 @@ class ConversionService:
|
||||
if conversion_guest:
|
||||
conversion.conversion_guest_id = conversion_guest.id
|
||||
|
||||
# Update stats for the conversion record itself
|
||||
if matched_reservation:
|
||||
stats["matched_to_reservation"] += 1
|
||||
if matched_customer:
|
||||
stats["matched_to_customer"] += 1
|
||||
if matched_hashed_customer:
|
||||
stats["matched_to_hashed_customer"] += 1
|
||||
if not any([matched_reservation, matched_customer, matched_hashed_customer]):
|
||||
stats["unmatched"] += 1
|
||||
|
||||
# Batch-load existing room reservations to avoid N+1 queries
|
||||
room_numbers = [
|
||||
rm.get("roomNumber") for rm in room_reservations.findall("roomReservation")
|
||||
@@ -772,6 +747,55 @@ class ConversionService:
|
||||
num_adults,
|
||||
)
|
||||
|
||||
# Now that conversion, conversion_guest, and conversion_room records exist,
|
||||
# perform matching using hashed guest data from conversion_guest
|
||||
matched_reservation = None
|
||||
matched_customer = None
|
||||
matched_hashed_customer = None
|
||||
|
||||
if advertising_campagne or True:
|
||||
# Use hashed data from conversion_guest for matching
|
||||
hashed_first_name = conversion_guest.hashed_first_name if conversion_guest else None
|
||||
hashed_last_name = conversion_guest.hashed_last_name if conversion_guest else None
|
||||
hashed_email = conversion_guest.hashed_email if conversion_guest else None
|
||||
|
||||
match_result = await self._find_matching_entities(
|
||||
advertising_campagne,
|
||||
hotel_id,
|
||||
reservation_date,
|
||||
hashed_first_name,
|
||||
hashed_last_name,
|
||||
hashed_email,
|
||||
advertising_partner,
|
||||
session,
|
||||
)
|
||||
matched_reservation = match_result["reservation"]
|
||||
matched_customer = match_result["customer"]
|
||||
matched_hashed_customer = match_result["hashed_customer"]
|
||||
|
||||
# Update the conversion with matched entities if found
|
||||
if matched_reservation or matched_customer or matched_hashed_customer:
|
||||
conversion.reservation_id = (
|
||||
matched_reservation.id if matched_reservation else None
|
||||
)
|
||||
conversion.customer_id = (
|
||||
matched_customer.id if matched_customer else None
|
||||
)
|
||||
conversion.hashed_customer_id = (
|
||||
matched_hashed_customer.id if matched_hashed_customer else None
|
||||
)
|
||||
conversion.updated_at = datetime.now()
|
||||
|
||||
# Update stats for the conversion record
|
||||
if matched_reservation:
|
||||
stats["matched_to_reservation"] += 1
|
||||
if matched_customer:
|
||||
stats["matched_to_customer"] += 1
|
||||
if matched_hashed_customer:
|
||||
stats["matched_to_hashed_customer"] += 1
|
||||
if not any([matched_reservation, matched_customer, matched_hashed_customer]):
|
||||
stats["unmatched"] += 1
|
||||
|
||||
return stats
|
||||
|
||||
async def _find_matching_entities(
|
||||
@@ -1028,7 +1052,10 @@ class ConversionService:
|
||||
"Cache unavailable or empty, falling back to database query (hotel=%s)",
|
||||
hotel_id,
|
||||
)
|
||||
query = select(Reservation).options(selectinload(Reservation.customer))
|
||||
from sqlalchemy.orm import selectinload
|
||||
query = select(Reservation).options(
|
||||
selectinload(Reservation.customer).selectinload(Customer.hashed_version)
|
||||
)
|
||||
|
||||
if hotel_id:
|
||||
query = query.where(Reservation.hotel_code == hotel_id)
|
||||
@@ -1049,53 +1076,67 @@ class ConversionService:
|
||||
) -> Reservation | None:
|
||||
"""Match a reservation from a list by guest name and email (non-async).
|
||||
|
||||
Uses hashed data matching for privacy. The guest_first_name, guest_last_name,
|
||||
and guest_email parameters should be pre-hashed values from ConversionGuest.
|
||||
Compares hashed values directly against hashed_customer hash fields.
|
||||
|
||||
Args:
|
||||
reservations: List of reservations to search through
|
||||
guest_first_name: Guest first name
|
||||
guest_last_name: Guest last name
|
||||
guest_email: Guest email
|
||||
guest_first_name: Pre-hashed guest first name (or None)
|
||||
guest_last_name: Pre-hashed guest last name (or None)
|
||||
guest_email: Pre-hashed guest email (or None)
|
||||
|
||||
Returns:
|
||||
Matched Reservation or None
|
||||
|
||||
"""
|
||||
# Filter by guest details
|
||||
# Filter by guest details using hashed data
|
||||
candidates = []
|
||||
|
||||
for reservation in reservations:
|
||||
customer = reservation.customer
|
||||
if not customer:
|
||||
continue
|
||||
|
||||
# Match by email (highest priority)
|
||||
if guest_email:
|
||||
if (
|
||||
customer.email_address
|
||||
and customer.email_address.lower() == guest_email.lower()
|
||||
):
|
||||
# Access hashed_version - handle both direct objects and relationships
|
||||
hashed_customer = customer.hashed_version
|
||||
if not hashed_customer:
|
||||
continue
|
||||
|
||||
# Defensive check: ensure hashed_customer is actually a HashedCustomer object
|
||||
# (not an InstrumentedList or other SQLAlchemy construct)
|
||||
if not hasattr(hashed_customer, 'hashed_email'):
|
||||
_LOGGER.debug(
|
||||
"Customer %s has invalid hashed_version type: %s",
|
||||
customer.id,
|
||||
type(hashed_customer),
|
||||
)
|
||||
continue
|
||||
|
||||
# Match by email (highest priority) using hashed comparison
|
||||
if guest_email and hashed_customer.hashed_email:
|
||||
if hashed_customer.hashed_email == guest_email:
|
||||
_LOGGER.info(
|
||||
"Found exact email match for %s (reservation_id=%s)",
|
||||
guest_email,
|
||||
"Found exact email match (reservation_id=%s) via hash",
|
||||
reservation.id,
|
||||
)
|
||||
candidates.append((reservation, 3)) # Highest score
|
||||
continue
|
||||
|
||||
# Match by name (first + last)
|
||||
# Match by name (first + last) using hashed comparison
|
||||
if guest_first_name and guest_last_name:
|
||||
first_match = (
|
||||
customer.given_name
|
||||
and customer.given_name.lower() == guest_first_name.lower()
|
||||
hashed_customer.hashed_given_name
|
||||
and hashed_customer.hashed_given_name == guest_first_name
|
||||
)
|
||||
last_match = (
|
||||
customer.surname
|
||||
and customer.surname.lower() == guest_last_name.lower()
|
||||
hashed_customer.hashed_surname
|
||||
and hashed_customer.hashed_surname == guest_last_name
|
||||
)
|
||||
|
||||
if first_match and last_match:
|
||||
_LOGGER.info(
|
||||
"Found exact name match for %s %s (reservation_id=%s)",
|
||||
guest_first_name,
|
||||
guest_last_name,
|
||||
"Found exact name match (reservation_id=%s) via hash",
|
||||
reservation.id,
|
||||
)
|
||||
candidates.append((reservation, 2)) # Medium-high score
|
||||
|
||||
Reference in New Issue
Block a user