merge_db_fixes_to_main #16

Merged
jonas merged 40 commits from merge_db_fixes_to_main into main 2025-12-09 11:37:21 +00:00
Showing only changes of commit a6e4bcbe1b - Show all commits

View File

@@ -631,6 +631,8 @@ class ConversionService:
selectinload(Reservation.customer), selectinload(Reservation.customer),
) )
query = query.filter(Reservation.hotel_id == self.hotel_id) if self.hotel_id else query
result = await session.execute(query) result = await session.execute(query)
reservations = result.scalars().all() reservations = result.scalars().all()
@@ -1050,22 +1052,28 @@ class ConversionService:
self, self,
advertising_campagne: str, advertising_campagne: str,
hotel_id: str | None, hotel_id: str | None,
guest_first_name: str | None, hashed_first_name: str | None,
guest_last_name: str | None, hashed_last_name: str | None,
guest_email: str | None, hashed_email: str | None,
advertising_partner: str | None, advertising_partner: str | None,
session: AsyncSession | None = None, session: AsyncSession | None = None,
raw_first_name: str | None = None,
raw_last_name: str | None = None,
raw_email: str | None = None,
) -> Reservation | None: ) -> Reservation | None:
"""Match reservation by advertising tracking data (fbclid/gclid/md5_unique_id). """Match reservation by advertising tracking data (fbclid/gclid/md5_unique_id).
Args: Args:
advertising_campagne: Tracking ID from PMS (could be truncated click_id or md5_unique_id) advertising_campagne: Tracking ID from PMS (could be truncated click_id or md5_unique_id)
hotel_id: Hotel ID for filtering hotel_id: Hotel ID for filtering
guest_first_name: Guest first name for disambiguation hashed_first_name: Guest first name (hashed) for disambiguation
guest_last_name: Guest last name for disambiguation hashed_last_name: Guest last name (hashed) for disambiguation
guest_email: Guest email for disambiguation hashed_email: Guest email (hashed) for disambiguation
advertising_partner: Partner info (matches utm_medium) advertising_partner: Partner info (matches utm_medium)
session: AsyncSession to use. If None, uses self.session. session: AsyncSession to use. If None, uses self.session.
raw_first_name: Plain guest first name (optional fallback)
raw_last_name: Plain guest last name (optional fallback)
raw_email: Plain guest email (optional fallback)
Returns: Returns:
Matched Reservation or None Matched Reservation or None
@@ -1098,25 +1106,36 @@ class ConversionService:
if not reservations: if not reservations:
return None return None
# If single match, return it # Determine if this looks like a md5_unique_id (32 hex characters) or a click_id
if len(reservations) == 1: is_md5_lookup = len(advertising_campagne or "") == 32
needs_filtering = len(reservations) > 1 or not is_md5_lookup
if not needs_filtering:
# Confident single match via md5_unique_id
return reservations[0] return reservations[0]
# If multiple matches, try to narrow down using guest details # If multiple matches or click-id matches, try to narrow down using hashed guest details
_LOGGER.debug( _LOGGER.debug(
"Multiple reservations match advertisingCampagne %s (hotel=%s): found %d matches. " (
"Attempting to narrow down using guest details.", "Ambiguous advertising match for %s (hotel=%s, candidates=%d, md5_lookup=%s). "
"Applying guest detail filtering."
),
advertising_campagne, advertising_campagne,
hotel_id, hotel_id,
len(reservations), len(reservations),
is_md5_lookup,
) )
matched_reservation = self._filter_reservations_by_guest_details( matched_reservation = self._filter_reservations_by_guest_details(
reservations, reservations,
guest_first_name, raw_first_name,
guest_last_name, raw_last_name,
guest_email, raw_email,
advertising_partner, advertising_partner,
hashed_first_name=hashed_first_name,
hashed_last_name=hashed_last_name,
hashed_email=hashed_email,
) )
if matched_reservation is None: if matched_reservation is None:
@@ -1126,9 +1145,9 @@ class ConversionService:
"(hotel=%s, guest=%s %s, email=%s). Using first match.", "(hotel=%s, guest=%s %s, email=%s). Using first match.",
advertising_campagne, advertising_campagne,
hotel_id, hotel_id,
guest_first_name, raw_first_name,
guest_last_name, raw_last_name,
guest_email, raw_email,
) )
matched_reservation = reservations[0] matched_reservation = reservations[0]
@@ -1210,18 +1229,26 @@ class ConversionService:
guest_last_name: str | None, guest_last_name: str | None,
guest_email: str | None, guest_email: str | None,
advertising_partner: str | None, advertising_partner: str | None,
*,
hashed_first_name: str | None = None,
hashed_last_name: str | None = None,
hashed_email: str | None = None,
) -> Reservation | None: ) -> Reservation | None:
"""Filter reservations using guest details to find a single match. """Filter reservations using guest details to find a single match.
First tries to match by guest name and email. If that doesn't yield a single match, Prefers hashed comparisons (exact match on hashed email or hashed name pair) and
tries matching by advertising_partner against utm_medium. falls back to plaintext comparison if hashes are unavailable. Finally tries
advertising partner vs utm_medium.
Args: Args:
reservations: List of candidate reservations reservations: List of candidate reservations
guest_first_name: Guest first name guest_first_name: Guest first name (plaintext, optional)
guest_last_name: Guest last name guest_last_name: Guest last name (plaintext, optional)
guest_email: Guest email guest_email: Guest email (plaintext, optional)
advertising_partner: Partner info (e.g., "Facebook_Mobile_Feed") advertising_partner: Partner info (e.g., "Facebook_Mobile_Feed")
hashed_first_name: Hashed first name for cross-checking
hashed_last_name: Hashed last name for cross-checking
hashed_email: Hashed email for cross-checking
Returns: Returns:
Single best-match Reservation, or None if no good match found Single best-match Reservation, or None if no good match found
@@ -1229,40 +1256,71 @@ class ConversionService:
""" """
candidates = reservations candidates = reservations
# Try to narrow down by guest name and email # Attempt hashed email match first
if hashed_email:
email_matches = [
reservation
for reservation in candidates
if reservation.customer
and reservation.customer.hashed_email
and reservation.customer.hashed_email == hashed_email
]
if len(email_matches) == 1:
_LOGGER.debug("Found unique match via hashed email")
return email_matches[0]
if email_matches:
candidates = email_matches
# Attempt hashed name match (first + last)
if hashed_first_name and hashed_last_name:
name_matches = [
reservation
for reservation in candidates
if reservation.customer
and reservation.customer.hashed_given_name == hashed_first_name
and reservation.customer.hashed_surname == hashed_last_name
]
if len(name_matches) == 1:
_LOGGER.debug("Found unique match via hashed names")
return name_matches[0]
if name_matches:
candidates = name_matches
# Fallback to plaintext comparison if provided
if guest_first_name or guest_last_name or guest_email: if guest_first_name or guest_last_name or guest_email:
# First try exact match on all available fields
for reservation in candidates: for reservation in candidates:
customer = reservation.customer customer = reservation.customer
if customer: if not customer:
name_match = True continue
email_match = True
if guest_first_name: name_match = True
name_match = name_match and ( email_match = True
customer.given_name
and customer.given_name.lower() == guest_first_name.lower()
)
if guest_last_name: if guest_first_name:
name_match = name_match and ( name_match = name_match and (
customer.surname customer.given_name
and customer.surname.lower() == guest_last_name.lower() and customer.given_name.lower() == guest_first_name.lower()
) )
if guest_email: if guest_last_name:
email_match = ( name_match = name_match and (
customer.email_address customer.surname
and customer.email_address.lower() == guest_email.lower() and customer.surname.lower() == guest_last_name.lower()
) )
if name_match and email_match: if guest_email:
_LOGGER.debug( email_match = email_match and (
"Found exact match on guest name/email for %s %s", customer.email_address
guest_first_name, and customer.email_address.lower() == guest_email.lower()
guest_last_name, )
)
return reservation if name_match and email_match:
_LOGGER.debug(
"Found exact plaintext match on guest details for %s %s",
guest_first_name,
guest_last_name,
)
return reservation
# Try to narrow down by advertising_partner matching utm_medium # Try to narrow down by advertising_partner matching utm_medium
if advertising_partner: if advertising_partner:
@@ -1470,9 +1528,9 @@ class ConversionService:
session: AsyncSession for database queries session: AsyncSession for database queries
""" """
# Collect every guest/customer pair derived from conversions. # Collect every (hotel, guest) -> customer pair derived from conversions.
result = await session.execute( result = await session.execute(
select(Conversion.guest_id, Conversion.customer_id).where( select(Conversion.hotel_id, Conversion.guest_id, Conversion.customer_id).where(
Conversion.guest_id.isnot(None), Conversion.customer_id.isnot(None) Conversion.guest_id.isnot(None), Conversion.customer_id.isnot(None)
) )
) )
@@ -1482,27 +1540,54 @@ class ConversionService:
_LOGGER.debug("Phase 3d: No matched guests to check for regularity") _LOGGER.debug("Phase 3d: No matched guests to check for regularity")
return return
# Deduplicate by guest_id to avoid recalculating when multiple conversions share the same guest. # Group by (hotel_id, guest_id) to detect conflicts.
guest_to_customer: dict[int, int] = {} guest_customer_sets: dict[tuple[str | None, int], set[int]] = {}
for guest_id, customer_id in guest_customer_rows: for hotel_id, guest_id, customer_id in guest_customer_rows:
if guest_id is None or customer_id is None: if hotel_id is None or guest_id is None or customer_id is None:
continue continue
if guest_id not in guest_to_customer: key = (hotel_id, guest_id)
guest_to_customer[guest_id] = customer_id guest_customer_sets.setdefault(key, set()).add(customer_id)
elif guest_to_customer[guest_id] != customer_id:
_LOGGER.warning( if not guest_customer_sets:
"Guest %s linked to multiple customers (%s, %s); keeping first match", _LOGGER.debug("Phase 3d: No matched guests to check for regularity")
guest_id, return
guest_to_customer[guest_id],
customer_id, duplicates = {
key: customer_ids
for key, customer_ids in guest_customer_sets.items()
if len(customer_ids) > 1
}
if duplicates:
await self._deduplicate_guest_customer_links(duplicates, session)
guest_to_customer: dict[tuple[str | None, int], int] = {}
for key, customer_ids in guest_customer_sets.items():
hotel_id, guest_id = key
# After deduplication, reload conversions for this guest to find the remaining customer (if any)
result = await session.execute(
select(Conversion.customer_id)
.where(
Conversion.hotel_id == hotel_id,
Conversion.guest_id == guest_id,
Conversion.customer_id.isnot(None),
) )
.limit(1)
)
chosen_customer = result.scalar_one_or_none()
if chosen_customer:
guest_to_customer[key] = chosen_customer
if not guest_to_customer:
_LOGGER.debug(
"Phase 3d: No guests remained linked to customers after deduplication"
)
return
_LOGGER.debug( _LOGGER.debug(
"Phase 3d: Checking regularity for %d matched guests", "Phase 3d: Checking regularity for %d matched guests", len(guest_to_customer)
len(guest_to_customer),
) )
for guest_id, customer_id in guest_to_customer.items(): for (hotel_id, guest_id), customer_id in guest_to_customer.items():
await self._check_if_guest_is_regular(guest_id, customer_id, session) await self._check_if_guest_is_regular(guest_id, customer_id, session)
async def _match_conversions_from_db_sequential( async def _match_conversions_from_db_sequential(
@@ -1705,15 +1790,21 @@ class ConversionService:
# Get conversion_guest if it exists (has the hashed data) # Get conversion_guest if it exists (has the hashed data)
conversion_guest = conversion.guest conversion_guest = conversion.guest
# Extract hashed data from conversion_guest (already hashed) # Extract hashed and raw data from conversion_guest
hashed_first_name = None hashed_first_name = None
hashed_last_name = None hashed_last_name = None
hashed_email = None hashed_email = None
raw_first_name = None
raw_last_name = None
raw_email = None
if conversion_guest: if conversion_guest:
hashed_first_name = conversion_guest.hashed_first_name hashed_first_name = conversion_guest.hashed_first_name
hashed_last_name = conversion_guest.hashed_last_name hashed_last_name = conversion_guest.hashed_last_name
hashed_email = conversion_guest.hashed_email hashed_email = conversion_guest.hashed_email
raw_first_name = conversion_guest.guest_first_name
raw_last_name = conversion_guest.guest_last_name
raw_email = conversion_guest.guest_email
# Phase 3a: Only try ID-based matching (fbclid/gclid/md5_unique_id) # Phase 3a: Only try ID-based matching (fbclid/gclid/md5_unique_id)
# Guest detail matching is deferred to Phase 3b/3c # Guest detail matching is deferred to Phase 3b/3c
@@ -1729,6 +1820,9 @@ class ConversionService:
hashed_email, hashed_email,
conversion.advertising_partner, conversion.advertising_partner,
session, session,
raw_first_name=raw_first_name,
raw_last_name=raw_last_name,
raw_email=raw_email,
) )
if matched_reservation: if matched_reservation:
@@ -1838,21 +1932,164 @@ class ConversionService:
) )
conversion_guest.is_regular = is_regular conversion_guest.is_regular = is_regular
if is_regular: async def _deduplicate_guest_customer_links(
_LOGGER.debug( self,
"Marking guest %s as regular: earliest paying conversion %s predates first reservation created at %s", duplicates: dict[tuple[str | None, int], set[int]],
guest_id, session: AsyncSession,
earliest_paying_conversion.reservation_date, ) -> None:
earliest_reservation.created_at, """Resolve guest/customer conflicts by comparing hashed details and severing bad links."""
for (hotel_id, guest_id), customer_ids in duplicates.items():
guest_result = await session.execute(
select(ConversionGuest).where(
ConversionGuest.hotel_id == hotel_id,
ConversionGuest.guest_id == guest_id,
)
) )
else: conversion_guest = guest_result.scalar_one_or_none()
_LOGGER.debug(
"Guest %s is not regular: first paying conversion %s is after/equal to first reservation created at %s", if not conversion_guest:
guest_id, _LOGGER.warning(
earliest_paying_conversion.reservation_date, "Guest %s (hotel=%s) missing when resolving duplicates; removing links to customers %s",
earliest_reservation.created_at, guest_id,
hotel_id,
sorted(customer_ids),
)
for customer_id in customer_ids:
await self._sever_guest_customer_link(
hotel_id, guest_id, customer_id, session
)
continue
preferred_customer_id = await self._choose_best_customer_for_guest(
conversion_guest, customer_ids, session
) )
if preferred_customer_id:
_LOGGER.warning(
"Guest %s (hotel=%s) linked to multiple customers %s; keeping %s based on hashed data",
guest_id,
hotel_id,
sorted(customer_ids),
preferred_customer_id,
)
else:
_LOGGER.warning(
"Guest %s (hotel=%s) linked to multiple customers %s but none matched hashed data. Removing all links.",
guest_id,
hotel_id,
sorted(customer_ids),
)
for customer_id in customer_ids:
if customer_id != preferred_customer_id:
await self._sever_guest_customer_link(
hotel_id, guest_id, customer_id, session
)
async def _choose_best_customer_for_guest(
self,
conversion_guest: ConversionGuest,
candidate_customer_ids: set[int],
session: AsyncSession,
) -> int | None:
"""Pick the most likely customer based on hashed data."""
if not candidate_customer_ids:
return None
result = await session.execute(
select(Customer).where(Customer.id.in_(candidate_customer_ids))
)
candidates = result.scalars().all()
if not candidates:
return None
def score_customer(customer: Customer) -> int:
score = 0
if (
conversion_guest.hashed_email
and customer.hashed_email == conversion_guest.hashed_email
):
score += 100
if (
conversion_guest.hashed_first_name
and customer.hashed_given_name == conversion_guest.hashed_first_name
):
score += 10
if (
conversion_guest.hashed_last_name
and customer.hashed_surname == conversion_guest.hashed_last_name
):
score += 10
if (
conversion_guest.hashed_country_code
and customer.hashed_country_code == conversion_guest.hashed_country_code
):
score += 2
if (
conversion_guest.hashed_birth_date
and customer.hashed_birth_date == conversion_guest.hashed_birth_date
):
score += 1
return score
best_customer_id = None
best_score = -1
is_tied = False
for customer in candidates:
score = score_customer(customer)
if score > best_score:
best_score = score
best_customer_id = customer.id
is_tied = False
elif score == best_score and score != -1:
is_tied = True
if best_score <= 0 or is_tied:
return None
return best_customer_id
async def _sever_guest_customer_link(
self,
hotel_id: str | None,
guest_id: int,
customer_id: int,
session: AsyncSession,
) -> None:
"""Remove incorrect guest/customer links from conversions."""
result = await session.execute(
select(Conversion)
.where(
Conversion.hotel_id == hotel_id,
Conversion.guest_id == guest_id,
Conversion.customer_id == customer_id,
)
.options(selectinload(Conversion.conversion_rooms))
)
conversions = result.scalars().all()
if not conversions:
return
for conversion in conversions:
conversion.customer_id = None
conversion.reservation_id = None
conversion.directly_attributable = False
conversion.guest_matched = False
conversion.updated_at = datetime.now()
_LOGGER.warning(
"Removed %d conversion links for guest %s (hotel=%s) customer %s",
len(conversions),
guest_id,
hotel_id,
customer_id,
)
async def _check_if_attributable( async def _check_if_attributable(
self, self,
matched_customer_id: int, matched_customer_id: int,