More refactoring

This commit is contained in:
Jonas Linter
2025-11-19 16:25:18 +01:00
parent 8547326ffa
commit 0854352726
9 changed files with 301001 additions and 340 deletions

View File

@@ -12,6 +12,7 @@ from sqlalchemy import (
DateTime,
Double,
ForeignKey,
ForeignKeyConstraint,
Integer,
String,
)
@@ -368,19 +369,17 @@ class ConversionGuest(Base):
"""Guest information from hotel PMS conversions, with hashed fields for privacy.
Stores both unhashed (for reference during transition) and hashed (SHA256 per Meta API)
versions of guest PII. Multiple conversions can reference the same guest if they have
the same hotel_id and guest_id (PMS guest identifier).
versions of guest PII. Uses composite primary key (hotel_id, guest_id) from the PMS.
When multiple conversions for the same guest arrive with different guest info,
the most recent (by creation_time) data is kept as the canonical version.
the most recent (by last_seen) data is kept as the canonical version.
"""
__tablename__ = "conversion_guests"
id = Column(Integer, primary_key=True)
# Natural keys from PMS (composite unique constraint)
hotel_id = Column(String, nullable=False, index=True)
guest_id = Column(String, index=True) # PMS guest ID (nullable for unidentified guests)
# Natural keys from PMS - composite primary key
hotel_id = Column(String, nullable=False, primary_key=True, index=True)
guest_id = Column(String, nullable=False, primary_key=True, index=True)
# Unhashed guest information (for reference/transition period)
guest_first_name = Column(String)
@@ -396,6 +395,9 @@ class ConversionGuest(Base):
hashed_country_code = Column(String(64))
hashed_birth_date = Column(String(64))
# Guest classification
is_regular = Column(Boolean, default=False) # True if guest has many prior stays before appearing in our reservations
# Metadata
first_seen = Column(DateTime(timezone=True))
last_seen = Column(DateTime(timezone=True))
@@ -424,6 +426,7 @@ class ConversionGuest(Base):
guest_country_code: str | None,
guest_birth_date: Date | None,
now: DateTime,
is_regular: bool = False,
):
"""Create a ConversionGuest from conversion guest data."""
return cls(
@@ -441,6 +444,7 @@ class ConversionGuest(Base):
hashed_birth_date=cls._normalize_and_hash(
guest_birth_date.isoformat() if guest_birth_date else None
),
is_regular=is_regular,
first_seen=now,
last_seen=now,
)
@@ -544,6 +548,12 @@ class Conversion(Base):
The tracking data transferered by the PMS is however somewhat shorter.
We therefore also need to match on guest name/email and other metadata.
Attribution flags:
- directly_attributable: True if matched by ID (reservation_id is set), meaning
this conversion is directly responsible for this reservation
- guest_matched: True if matched only by guest details (customer_id/hashed_customer_id set),
meaning the same person made this request but the reservation may not be directly attributable
"""
__tablename__ = "conversions"
@@ -557,12 +567,10 @@ class Conversion(Base):
hashed_customer_id = Column(
Integer, ForeignKey("hashed_customers.id"), nullable=True, index=True
)
conversion_guest_id = Column(
Integer, ForeignKey("conversion_guests.id"), nullable=True, index=True
)
# Reservation metadata from XML
hotel_id = Column(String, index=True) # hotelID attribute
guest_id = Column(String, nullable=True, index=True) # PMS guest ID, FK to conversion_guests
pms_reservation_id = Column(String, index=True) # id attribute from reservation
reservation_number = Column(String) # number attribute
reservation_date = Column(Date) # date attribute (when reservation was made)
@@ -570,13 +578,8 @@ class Conversion(Base):
reservation_type = Column(String) # type attribute (e.g., "reservation")
booking_channel = Column(String) # bookingChannel attribute
# Guest information from reservation XML - used for matching
guest_first_name = Column(String, index=True) # firstName from guest element
guest_last_name = Column(String, index=True) # lastName from guest element
guest_email = Column(String, index=True) # email from guest element
guest_country_code = Column(String) # countryCode from guest element
guest_birth_date = Column(Date) # birthDate from guest element
guest_id = Column(String) # id from guest element
# Advertising/tracking data - used for matching to existing reservations
advertising_medium = Column(
@@ -589,10 +592,23 @@ class Conversion(Base):
String, index=True
) # advertisingCampagne (contains fbclid/gclid)
# Attribution flags - track how this conversion was matched
directly_attributable = Column(Boolean, default=False) # Matched by ID (high confidence)
guest_matched = Column(Boolean, default=False) # Matched by guest details only
# Metadata
created_at = Column(DateTime(timezone=True)) # When this record was imported
updated_at = Column(DateTime(timezone=True)) # When this record was last updated
# Composite foreign key constraint for ConversionGuest (hotel_id, guest_id)
__table_args__ = (
ForeignKeyConstraint(
["hotel_id", "guest_id"],
["conversion_guests.hotel_id", "conversion_guests.guest_id"],
ondelete="SET NULL",
),
)
# Relationships
reservation = relationship("Reservation", backref="conversions")
customer = relationship("Customer", backref="conversions")