Hashed conversion matching and more. #12

Merged
jonas merged 13 commits from hashed_conversion_matching into main 2025-11-19 19:40:07 +00:00
2 changed files with 132 additions and 52 deletions
Showing only changes of commit 6c46c566ed - Show all commits

View File

@@ -515,20 +515,30 @@ class ConversionService:
existing_conversion = existing_result.scalar_one_or_none() existing_conversion = existing_result.scalar_one_or_none()
if existing_conversion: if existing_conversion:
# Update existing conversion (matching will be done later) # Update existing conversion - only update reservation metadata and advertising data
# Don't overwrite guest info (will be handled by matching logic which uses hashed data)
# Don't clear reservation/customer links (matching logic will update if needed)
existing_conversion.reservation_number = reservation_number existing_conversion.reservation_number = reservation_number
existing_conversion.reservation_date = reservation_date existing_conversion.reservation_date = reservation_date
existing_conversion.creation_time = creation_time existing_conversion.creation_time = creation_time
existing_conversion.reservation_type = reservation_type existing_conversion.reservation_type = reservation_type
existing_conversion.booking_channel = booking_channel existing_conversion.booking_channel = booking_channel
existing_conversion.guest_first_name = guest_first_name
existing_conversion.guest_last_name = guest_last_name
existing_conversion.guest_email = guest_email
existing_conversion.guest_country_code = guest_country_code
existing_conversion.guest_birth_date = guest_birth_date
existing_conversion.advertising_medium = advertising_medium existing_conversion.advertising_medium = advertising_medium
existing_conversion.advertising_partner = advertising_partner existing_conversion.advertising_partner = advertising_partner
existing_conversion.advertising_campagne = advertising_campagne existing_conversion.advertising_campagne = advertising_campagne
# Update guest info only if new data is provided (not None)
if guest_first_name:
existing_conversion.guest_first_name = guest_first_name
if guest_last_name:
existing_conversion.guest_last_name = guest_last_name
if guest_email:
existing_conversion.guest_email = guest_email
if guest_country_code:
existing_conversion.guest_country_code = guest_country_code
if guest_birth_date:
existing_conversion.guest_birth_date = guest_birth_date
existing_conversion.updated_at = datetime.now() existing_conversion.updated_at = datetime.now()
conversion = existing_conversion conversion = existing_conversion
_LOGGER.info( _LOGGER.info(
@@ -748,53 +758,112 @@ class ConversionService:
) )
# Now that conversion, conversion_guest, and conversion_room records exist, # Now that conversion, conversion_guest, and conversion_room records exist,
# perform matching using hashed guest data from conversion_guest # perform matching using hashed guest data
matched_reservation = None match_stats = await self._match_conversion(
matched_customer = None conversion,
matched_hashed_customer = None guest_first_name,
guest_last_name,
guest_email,
advertising_campagne,
advertising_partner,
hotel_id,
reservation_date,
session,
)
if advertising_campagne or True: # Update stats
# Use hashed data from conversion_guest for matching stats["matched_to_reservation"] = match_stats["matched_to_reservation"]
hashed_first_name = conversion_guest.hashed_first_name if conversion_guest else None stats["matched_to_customer"] = match_stats["matched_to_customer"]
hashed_last_name = conversion_guest.hashed_last_name if conversion_guest else None stats["matched_to_hashed_customer"] = match_stats["matched_to_hashed_customer"]
hashed_email = conversion_guest.hashed_email if conversion_guest else None stats["unmatched"] = match_stats["unmatched"]
match_result = await self._find_matching_entities( return stats
advertising_campagne,
hotel_id, async def _match_conversion(
reservation_date, self,
hashed_first_name, conversion: Conversion,
hashed_last_name, guest_first_name: str | None,
hashed_email, guest_last_name: str | None,
advertising_partner, guest_email: str | None,
session, advertising_campagne: str | None,
advertising_partner: str | None,
hotel_id: str | None,
reservation_date: Any,
session: AsyncSession | None = None,
) -> dict[str, int]:
"""Match a conversion to reservations and customers using guest and advertising data.
This is the matching phase that runs AFTER conversion data has been stored.
It uses hashed guest data to match conversions to existing reservations/customers.
Args:
conversion: The Conversion record to match
guest_first_name: Guest first name (will be hashed for matching)
guest_last_name: Guest last name (will be hashed for matching)
guest_email: Guest email (will be hashed for matching)
advertising_campagne: Advertising campaign identifier
advertising_partner: Advertising partner info
hotel_id: Hotel ID for filtering matches
reservation_date: Reservation date for additional filtering
session: AsyncSession to use for database queries
Returns:
Dictionary with match statistics: matched_to_reservation, matched_to_customer,
matched_to_hashed_customer, and unmatched (all counts of 0 or 1)
"""
if session is None:
session = self.session
stats = {
"matched_to_reservation": 0,
"matched_to_customer": 0,
"matched_to_hashed_customer": 0,
"unmatched": 0,
}
# Hash guest data for matching (same hashing logic as ConversionGuest)
hashed_first_name = ConversionGuest._normalize_and_hash(guest_first_name)
hashed_last_name = ConversionGuest._normalize_and_hash(guest_last_name)
hashed_email = ConversionGuest._normalize_and_hash(guest_email)
# Find matching entities
match_result = await self._find_matching_entities(
advertising_campagne,
hotel_id,
reservation_date,
hashed_first_name,
hashed_last_name,
hashed_email,
advertising_partner,
session,
)
matched_reservation = match_result["reservation"]
matched_customer = match_result["customer"]
matched_hashed_customer = match_result["hashed_customer"]
# Update the conversion with matched entities if found
if matched_reservation or matched_customer or matched_hashed_customer:
conversion.reservation_id = (
matched_reservation.id if matched_reservation else None
) )
matched_reservation = match_result["reservation"] conversion.customer_id = (
matched_customer = match_result["customer"] matched_customer.id if matched_customer else None
matched_hashed_customer = match_result["hashed_customer"] )
conversion.hashed_customer_id = (
matched_hashed_customer.id if matched_hashed_customer else None
)
conversion.updated_at = datetime.now()
# Update the conversion with matched entities if found # Update stats
if matched_reservation or matched_customer or matched_hashed_customer:
conversion.reservation_id = (
matched_reservation.id if matched_reservation else None
)
conversion.customer_id = (
matched_customer.id if matched_customer else None
)
conversion.hashed_customer_id = (
matched_hashed_customer.id if matched_hashed_customer else None
)
conversion.updated_at = datetime.now()
# Update stats for the conversion record
if matched_reservation: if matched_reservation:
stats["matched_to_reservation"] += 1 stats["matched_to_reservation"] = 1
if matched_customer: if matched_customer:
stats["matched_to_customer"] += 1 stats["matched_to_customer"] = 1
if matched_hashed_customer: if matched_hashed_customer:
stats["matched_to_hashed_customer"] += 1 stats["matched_to_hashed_customer"] = 1
if not any([matched_reservation, matched_customer, matched_hashed_customer]): if not any([matched_reservation, matched_customer, matched_hashed_customer]):
stats["unmatched"] += 1 stats["unmatched"] = 1
return stats return stats
@@ -1028,11 +1097,20 @@ class ConversionService:
# Get reservations from cache for this hotel # Get reservations from cache for this hotel
if hotel_id and hotel_id in self._reservation_cache: if hotel_id and hotel_id in self._reservation_cache:
all_reservations = [res for res, _ in self._reservation_cache[hotel_id]] # Extract reservations AND reattach their cached hashed_customer relationships
for reservation, hashed_customer in self._reservation_cache[hotel_id]:
if reservation.customer:
# Manually set the hashed_version from cache to ensure it's available
reservation.customer.hashed_version = hashed_customer
all_reservations.append(reservation)
elif not hotel_id: elif not hotel_id:
# If no hotel_id specified, use all cached reservations # If no hotel_id specified, use all cached reservations
for reservations_list in self._reservation_cache.values(): for reservations_list in self._reservation_cache.values():
all_reservations.extend([res for res, _ in reservations_list]) for reservation, hashed_customer in reservations_list:
if reservation.customer:
# Manually set the hashed_version from cache to ensure it's available
reservation.customer.hashed_version = hashed_customer
all_reservations.append(reservation)
if all_reservations: if all_reservations:
_LOGGER.debug( _LOGGER.debug(
@@ -1117,8 +1195,10 @@ class ConversionService:
if guest_email and hashed_customer.hashed_email: if guest_email and hashed_customer.hashed_email:
if hashed_customer.hashed_email == guest_email: if hashed_customer.hashed_email == guest_email:
_LOGGER.info( _LOGGER.info(
"Found exact email match (reservation_id=%s) via hash", "Found exact email match (reservation_id=%s, cust=%s, email=%s) via hash",
reservation.id, reservation.id,
customer.email_address,
guest_email,
) )
candidates.append((reservation, 3)) # Highest score candidates.append((reservation, 3)) # Highest score
continue continue

View File

@@ -22,7 +22,7 @@ from sqlalchemy.ext.asyncio import (
async_sessionmaker, async_sessionmaker,
create_async_engine, create_async_engine,
) )
from sqlalchemy.orm import declarative_base, relationship from sqlalchemy.orm import backref, declarative_base, relationship
from .logging_config import get_logger from .logging_config import get_logger
@@ -347,7 +347,7 @@ class HashedCustomer(Base):
__tablename__ = "hashed_customers" __tablename__ = "hashed_customers"
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
customer_id = Column( customer_id = Column(
Integer, ForeignKey("customers.id"), unique=True, nullable=False Integer, ForeignKey("customers.id", ondelete="SET NULL"), unique=True, nullable=True
) )
contact_id = Column(String, unique=True) # Keep unhashed for reference contact_id = Column(String, unique=True) # Keep unhashed for reference
hashed_email = Column(String(64)) # SHA256 produces 64 hex chars hashed_email = Column(String(64)) # SHA256 produces 64 hex chars
@@ -361,7 +361,7 @@ class HashedCustomer(Base):
hashed_birth_date = Column(String(64)) hashed_birth_date = Column(String(64))
created_at = Column(DateTime(timezone=True)) created_at = Column(DateTime(timezone=True))
customer = relationship("Customer", backref="hashed_version") customer = relationship("Customer", backref=backref("hashed_version", uselist=False, lazy="joined"))
class ConversionGuest(Base): class ConversionGuest(Base):
@@ -477,7 +477,7 @@ class ConversionGuest(Base):
class Reservation(Base): class Reservation(Base):
__tablename__ = "reservations" __tablename__ = "reservations"
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
customer_id = Column(Integer, ForeignKey("customers.id")) customer_id = Column(Integer, ForeignKey("customers.id", ondelete="SET NULL"))
unique_id = Column(String, unique=True) unique_id = Column(String, unique=True)
md5_unique_id = Column(String(32), unique=True) # max length 32 guaranteed md5_unique_id = Column(String(32), unique=True) # max length 32 guaranteed
start_date = Column(Date) start_date = Column(Date)