More refactoring

This commit is contained in:
Jonas Linter
2025-11-19 16:25:18 +01:00
parent 8547326ffa
commit 0854352726
9 changed files with 301001 additions and 340 deletions

View File

@@ -558,7 +558,7 @@ class ConversionService:
if existing_conversion:
# Update existing conversion - only update reservation metadata and advertising data
# Don't overwrite guest info (will be handled by matching logic which uses hashed data)
# Guest info is stored in ConversionGuest table, not here
# Don't clear reservation/customer links (matching logic will update if needed)
existing_conversion.reservation_number = reservation_number
existing_conversion.reservation_date = reservation_date
@@ -568,19 +568,6 @@ class ConversionService:
existing_conversion.advertising_medium = advertising_medium
existing_conversion.advertising_partner = advertising_partner
existing_conversion.advertising_campagne = advertising_campagne
# Update guest info only if new data is provided (not None)
if guest_first_name:
existing_conversion.guest_first_name = guest_first_name
if guest_last_name:
existing_conversion.guest_last_name = guest_last_name
if guest_email:
existing_conversion.guest_email = guest_email
if guest_country_code:
existing_conversion.guest_country_code = guest_country_code
if guest_birth_date:
existing_conversion.guest_birth_date = guest_birth_date
existing_conversion.updated_at = datetime.now()
conversion = existing_conversion
_LOGGER.info(
@@ -590,6 +577,7 @@ class ConversionService:
)
else:
# Create new conversion entry (without matching - will be done later)
# Note: Guest information (first_name, last_name, email, etc) is stored in ConversionGuest table
conversion = Conversion(
# Links to existing entities (nullable, will be filled in after matching)
reservation_id=None,
@@ -597,19 +585,13 @@ class ConversionService:
hashed_customer_id=None,
# Reservation metadata
hotel_id=hotel_id,
guest_id=guest_id, # Links to ConversionGuest
pms_reservation_id=pms_reservation_id,
reservation_number=reservation_number,
reservation_date=reservation_date,
creation_time=creation_time,
reservation_type=reservation_type,
booking_channel=booking_channel,
# Guest information
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
guest_birth_date=guest_birth_date,
guest_id=guest_id,
# Advertising data
advertising_medium=advertising_medium,
advertising_partner=advertising_partner,
@@ -628,6 +610,8 @@ class ConversionService:
await session.flush()
# Create or update ConversionGuest and link it to the conversion
# The conversion is linked to ConversionGuest via composite FK (hotel_id, guest_id)
# So we just need to ensure ConversionGuest exists - the FK is already set via hotel_id + guest_id
conversion_guest = await self._get_or_create_conversion_guest(
hotel_id=hotel_id,
guest_id=guest_id,
@@ -638,8 +622,7 @@ class ConversionService:
guest_birth_date=guest_birth_date,
session=session,
)
if conversion_guest:
conversion.conversion_guest_id = conversion_guest.id
# guest_id is already set on conversion, so the composite FK relationship is established
# Batch-load existing room reservations to avoid N+1 queries
room_numbers = [
@@ -863,6 +846,7 @@ class ConversionService:
matched_reservation = match_result["reservation"]
matched_customer = match_result["customer"]
matched_hashed_customer = match_result["hashed_customer"]
match_type = match_result.get("match_type") # "id" or "guest_details"
# Update the conversion with matched entities if found
if matched_reservation or matched_customer or matched_hashed_customer:
@@ -875,6 +859,15 @@ class ConversionService:
conversion.hashed_customer_id = (
matched_hashed_customer.id if matched_hashed_customer else None
)
# Set attribution flags based on match type
if match_type == "id":
conversion.directly_attributable = True
conversion.guest_matched = False
elif match_type == "guest_details":
conversion.directly_attributable = False
conversion.guest_matched = True
conversion.updated_at = datetime.now()
# Update stats
@@ -902,22 +895,23 @@ class ConversionService:
) -> dict[str, Any]:
"""Find matching Reservation, Customer, and HashedCustomer.
Uses two strategies:
1. Advertising data matching (fbclid/gclid/utm_campaign) with guest details fallback
2. If no advertising data match, falls back to email/name-based matching
Uses two strategies with separate attribution:
1. ID-based matching (fbclid/gclid/md5_unique_id) - directly_attributable
2. Guest detail matching (email/name) - guest_matched only
Args:
advertising_campagne: Truncated tracking ID from conversion XML
hotel_id: Hotel ID for additional filtering
reservation_date: Reservation date for additional filtering
guest_first_name: Guest first name for matching
guest_last_name: Guest last name for matching
guest_email: Guest email for matching
guest_first_name: Guest first name (hashed) for matching
guest_last_name: Guest last name (hashed) for matching
guest_email: Guest email (hashed) for matching
advertising_partner: Partner info (matches utm_medium for additional filtering)
session: AsyncSession to use. If None, uses self.session.
Returns:
Dictionary with 'reservation', 'customer', and 'hashed_customer' keys
Dictionary with 'reservation', 'customer', 'hashed_customer', and 'match_type' keys.
match_type is either 'id' (high confidence) or 'guest_details' (lower confidence)
"""
if session is None:
@@ -926,9 +920,10 @@ class ConversionService:
"reservation": None,
"customer": None,
"hashed_customer": None,
"match_type": None, # "id" or "guest_details"
}
# Strategy 1: Try to match by advertising data (fbclid/gclid/utm_campaign)
# Strategy 1: Try to match by advertising data (fbclid/gclid/md5_unique_id) - ID-based, high confidence
if advertising_campagne:
matched_reservation = await self._match_by_advertising(
advertising_campagne,
@@ -942,19 +937,20 @@ class ConversionService:
if matched_reservation:
result["reservation"] = matched_reservation
result["match_type"] = "id" # Matched by ID
_LOGGER.info(
"Matched conversion by advertising data (advertisingCampagne=%s, hotel=%s)",
"Matched conversion by advertising ID data (advertisingCampagne=%s, hotel=%s)",
advertising_campagne,
hotel_id,
)
else:
_LOGGER.debug(
"No match found by advertising data (advertisingCampagne=%s), "
"falling back to email/name matching",
"No match found by advertising ID data (advertisingCampagne=%s), "
"falling back to guest details matching",
advertising_campagne,
)
# Strategy 2: If no advertising match, try email/name-based matching
# Strategy 2: If no ID-based match, try email/name-based matching - guest details, lower confidence
if not result["reservation"] and (
guest_email or guest_first_name or guest_last_name
):
@@ -964,6 +960,7 @@ class ConversionService:
if matched_reservation:
result["reservation"] = matched_reservation
result["match_type"] = "guest_details" # Matched by guest details only
_LOGGER.info(
"Matched conversion by guest details (name=%s %s, email=%s, hotel=%s)",
guest_first_name,
@@ -1484,6 +1481,7 @@ class ConversionService:
matched_reservation = match_result["reservation"]
matched_customer = match_result["customer"]
matched_hashed_customer = match_result["hashed_customer"]
match_type = match_result.get("match_type") # "id" or "guest_details"
# Update the conversion with matched entities if found
if matched_reservation or matched_customer or matched_hashed_customer:
@@ -1496,6 +1494,15 @@ class ConversionService:
conversion.hashed_customer_id = (
matched_hashed_customer.id if matched_hashed_customer else None
)
# Set attribution flags based on match type
if match_type == "id":
conversion.directly_attributable = True
conversion.guest_matched = False
elif match_type == "guest_details":
conversion.directly_attributable = False
conversion.guest_matched = True
conversion.updated_at = datetime.now()
# Update stats if provided

View File

@@ -12,6 +12,7 @@ from sqlalchemy import (
DateTime,
Double,
ForeignKey,
ForeignKeyConstraint,
Integer,
String,
)
@@ -368,19 +369,17 @@ class ConversionGuest(Base):
"""Guest information from hotel PMS conversions, with hashed fields for privacy.
Stores both unhashed (for reference during transition) and hashed (SHA256 per Meta API)
versions of guest PII. Multiple conversions can reference the same guest if they have
the same hotel_id and guest_id (PMS guest identifier).
versions of guest PII. Uses composite primary key (hotel_id, guest_id) from the PMS.
When multiple conversions for the same guest arrive with different guest info,
the most recent (by creation_time) data is kept as the canonical version.
the most recent (by last_seen) data is kept as the canonical version.
"""
__tablename__ = "conversion_guests"
id = Column(Integer, primary_key=True)
# Natural keys from PMS (composite unique constraint)
hotel_id = Column(String, nullable=False, index=True)
guest_id = Column(String, index=True) # PMS guest ID (nullable for unidentified guests)
# Natural keys from PMS - composite primary key
hotel_id = Column(String, nullable=False, primary_key=True, index=True)
guest_id = Column(String, nullable=False, primary_key=True, index=True)
# Unhashed guest information (for reference/transition period)
guest_first_name = Column(String)
@@ -396,6 +395,9 @@ class ConversionGuest(Base):
hashed_country_code = Column(String(64))
hashed_birth_date = Column(String(64))
# Guest classification
is_regular = Column(Boolean, default=False) # True if guest has many prior stays before appearing in our reservations
# Metadata
first_seen = Column(DateTime(timezone=True))
last_seen = Column(DateTime(timezone=True))
@@ -424,6 +426,7 @@ class ConversionGuest(Base):
guest_country_code: str | None,
guest_birth_date: Date | None,
now: DateTime,
is_regular: bool = False,
):
"""Create a ConversionGuest from conversion guest data."""
return cls(
@@ -441,6 +444,7 @@ class ConversionGuest(Base):
hashed_birth_date=cls._normalize_and_hash(
guest_birth_date.isoformat() if guest_birth_date else None
),
is_regular=is_regular,
first_seen=now,
last_seen=now,
)
@@ -544,6 +548,12 @@ class Conversion(Base):
The tracking data transferered by the PMS is however somewhat shorter.
We therefore also need to match on guest name/email and other metadata.
Attribution flags:
- directly_attributable: True if matched by ID (reservation_id is set), meaning
this conversion is directly responsible for this reservation
- guest_matched: True if matched only by guest details (customer_id/hashed_customer_id set),
meaning the same person made this request but the reservation may not be directly attributable
"""
__tablename__ = "conversions"
@@ -557,12 +567,10 @@ class Conversion(Base):
hashed_customer_id = Column(
Integer, ForeignKey("hashed_customers.id"), nullable=True, index=True
)
conversion_guest_id = Column(
Integer, ForeignKey("conversion_guests.id"), nullable=True, index=True
)
# Reservation metadata from XML
hotel_id = Column(String, index=True) # hotelID attribute
guest_id = Column(String, nullable=True, index=True) # PMS guest ID, FK to conversion_guests
pms_reservation_id = Column(String, index=True) # id attribute from reservation
reservation_number = Column(String) # number attribute
reservation_date = Column(Date) # date attribute (when reservation was made)
@@ -570,13 +578,8 @@ class Conversion(Base):
reservation_type = Column(String) # type attribute (e.g., "reservation")
booking_channel = Column(String) # bookingChannel attribute
# Guest information from reservation XML - used for matching
guest_first_name = Column(String, index=True) # firstName from guest element
guest_last_name = Column(String, index=True) # lastName from guest element
guest_email = Column(String, index=True) # email from guest element
guest_country_code = Column(String) # countryCode from guest element
guest_birth_date = Column(Date) # birthDate from guest element
guest_id = Column(String) # id from guest element
# Advertising/tracking data - used for matching to existing reservations
advertising_medium = Column(
@@ -589,10 +592,23 @@ class Conversion(Base):
String, index=True
) # advertisingCampagne (contains fbclid/gclid)
# Attribution flags - track how this conversion was matched
directly_attributable = Column(Boolean, default=False) # Matched by ID (high confidence)
guest_matched = Column(Boolean, default=False) # Matched by guest details only
# Metadata
created_at = Column(DateTime(timezone=True)) # When this record was imported
updated_at = Column(DateTime(timezone=True)) # When this record was last updated
# Composite foreign key constraint for ConversionGuest (hotel_id, guest_id)
__table_args__ = (
ForeignKeyConstraint(
["hotel_id", "guest_id"],
["conversion_guests.hotel_id", "conversion_guests.guest_id"],
ondelete="SET NULL",
),
)
# Relationships
reservation = relationship("Reservation", backref="conversions")
customer = relationship("Customer", backref="conversions")