From c4ecf3802fe3937b5316eb75e339ffde6e9370ab Mon Sep 17 00:00:00 2001 From: Jonas Linter <{email_address}> Date: Wed, 3 Dec 2025 16:12:07 +0100 Subject: [PATCH] Fine this needs more work --- src/alpine_bits_python/conversion_service.py | 242 ++++++++++++++----- 1 file changed, 182 insertions(+), 60 deletions(-) diff --git a/src/alpine_bits_python/conversion_service.py b/src/alpine_bits_python/conversion_service.py index e76db31..7b53558 100644 --- a/src/alpine_bits_python/conversion_service.py +++ b/src/alpine_bits_python/conversion_service.py @@ -1116,7 +1116,7 @@ class ConversionService: return reservations[0] # If multiple matches or click-id matches, try to narrow down using hashed guest details - _LOGGER.debug( + _LOGGER.info( ( "Ambiguous advertising match for %s (hotel=%s, candidates=%d, md5_lookup=%s). " "Applying guest detail filtering." @@ -1159,6 +1159,8 @@ class ConversionService: guest_last_name: str | None, guest_email: str | None, session: AsyncSession | None = None, + *, + conversion_guest: ConversionGuest | None = None, ) -> Customer | None: """Match guest by name and email directly to Customer (no Reservation needed). @@ -1208,15 +1210,33 @@ class ConversionService: if len(matches) == 1: return matches[0] - # If multiple matches, prefer email match over name match - for match in matches: - if guest_email and match.hashed_email == guest_email: - _LOGGER.debug( - "Multiple hashed customer matches, preferring email match" - ) - return match + best_customer: Customer | None = None + best_score = -1 + tie = False + + for candidate in matches: + candidate_score = self._score_guest_customer_match( + conversion_guest, + candidate, + hashed_first_name=guest_first_name, + hashed_last_name=guest_last_name, + hashed_email=guest_email, + ) + if candidate_score > best_score: + best_score = candidate_score + best_customer = candidate + tie = False + elif candidate_score == best_score: + tie = True + + if best_customer and best_score > 0 and not tie: + _LOGGER.debug( + "Multiple hashed customer matches; selected candidate %s via score %s", + best_customer.id, + best_score, + ) + return best_customer - # Otherwise return first match _LOGGER.warning( "Multiple hashed customer matches found for guest details, using first match" ) @@ -1400,6 +1420,7 @@ class ConversionService: conversion_guest.hashed_last_name, conversion_guest.hashed_email, session, + conversion_guest=conversion_guest, ) if matched_hashed_customer: @@ -1540,42 +1561,46 @@ class ConversionService: _LOGGER.debug("Phase 3d: No matched guests to check for regularity") return - # Group by (hotel_id, guest_id) to detect conflicts. + # Group by guest and by customer to detect conflicts in both directions. guest_customer_sets: dict[tuple[str | None, int], set[int]] = {} + customer_guest_sets: dict[int, set[tuple[str | None, int]]] = {} for hotel_id, guest_id, customer_id in guest_customer_rows: if hotel_id is None or guest_id is None or customer_id is None: continue - key = (hotel_id, guest_id) - guest_customer_sets.setdefault(key, set()).add(customer_id) + guest_key = (hotel_id, guest_id) + guest_customer_sets.setdefault(guest_key, set()).add(customer_id) + customer_guest_sets.setdefault(customer_id, set()).add(guest_key) if not guest_customer_sets: _LOGGER.debug("Phase 3d: No matched guests to check for regularity") return - duplicates = { + guest_duplicates = { key: customer_ids for key, customer_ids in guest_customer_sets.items() if len(customer_ids) > 1 } - if duplicates: - await self._deduplicate_guest_customer_links(duplicates, session) + if guest_duplicates: + await self._deduplicate_guest_customer_links(guest_duplicates, session) + customer_duplicates = { + customer_id: guest_keys + for customer_id, guest_keys in customer_guest_sets.items() + if len(guest_keys) > 1 + } + if customer_duplicates: + await self._deduplicate_customer_guest_links(customer_duplicates, session) + + refreshed = await session.execute( + select( + Conversion.hotel_id, Conversion.guest_id, Conversion.customer_id + ).where(Conversion.guest_id.isnot(None), Conversion.customer_id.isnot(None)) + ) guest_to_customer: dict[tuple[str | None, int], int] = {} - for key, customer_ids in guest_customer_sets.items(): - hotel_id, guest_id = key - # After deduplication, reload conversions for this guest to find the remaining customer (if any) - result = await session.execute( - select(Conversion.customer_id) - .where( - Conversion.hotel_id == hotel_id, - Conversion.guest_id == guest_id, - Conversion.customer_id.isnot(None), - ) - .limit(1) - ) - chosen_customer = result.scalar_one_or_none() - if chosen_customer: - guest_to_customer[key] = chosen_customer + for hotel_id, guest_id, customer_id in refreshed.all(): + if hotel_id is None or guest_id is None or customer_id is None: + continue + guest_to_customer[(hotel_id, guest_id)] = customer_id if not guest_to_customer: _LOGGER.debug( @@ -1986,6 +2011,60 @@ class ConversionService: hotel_id, guest_id, customer_id, session ) + def _score_guest_customer_match( + self, + conversion_guest: ConversionGuest | None, + customer: Customer | None, + *, + hashed_first_name: str | None = None, + hashed_last_name: str | None = None, + hashed_email: str | None = None, + ) -> int: + """Score how well a guest matches a customer using hashed data.""" + if not customer: + return -1 + + score = 0 + guest_email_hash = ( + hashed_email or (conversion_guest.hashed_email if conversion_guest else None) + ) + guest_first_hash = ( + hashed_first_name + or (conversion_guest.hashed_first_name if conversion_guest else None) + ) + guest_last_hash = ( + hashed_last_name + or (conversion_guest.hashed_last_name if conversion_guest else None) + ) + + if guest_email_hash and customer.hashed_email == guest_email_hash: + score += 200 + if guest_first_hash and guest_last_hash: + if ( + customer.hashed_given_name == guest_first_hash + and customer.hashed_surname == guest_last_hash + ): + score += 50 + elif guest_first_hash and customer.hashed_given_name == guest_first_hash: + score += 10 + elif guest_last_hash and customer.hashed_surname == guest_last_hash: + score += 10 + + if conversion_guest: + if ( + conversion_guest.hashed_country_code + and customer.hashed_country_code + == conversion_guest.hashed_country_code + ): + score += 5 + if ( + conversion_guest.hashed_birth_date + and customer.hashed_birth_date == conversion_guest.hashed_birth_date + ): + score += 2 + + return score + async def _choose_best_customer_for_guest( self, conversion_guest: ConversionGuest, @@ -2004,41 +2083,12 @@ class ConversionService: if not candidates: return None - def score_customer(customer: Customer) -> int: - score = 0 - if ( - conversion_guest.hashed_email - and customer.hashed_email == conversion_guest.hashed_email - ): - score += 100 - if ( - conversion_guest.hashed_first_name - and customer.hashed_given_name == conversion_guest.hashed_first_name - ): - score += 10 - if ( - conversion_guest.hashed_last_name - and customer.hashed_surname == conversion_guest.hashed_last_name - ): - score += 10 - if ( - conversion_guest.hashed_country_code - and customer.hashed_country_code == conversion_guest.hashed_country_code - ): - score += 2 - if ( - conversion_guest.hashed_birth_date - and customer.hashed_birth_date == conversion_guest.hashed_birth_date - ): - score += 1 - return score - best_customer_id = None best_score = -1 is_tied = False for customer in candidates: - score = score_customer(customer) + score = self._score_guest_customer_match(conversion_guest, customer) if score > best_score: best_score = score best_customer_id = customer.id @@ -2051,6 +2101,78 @@ class ConversionService: return best_customer_id + async def _deduplicate_customer_guest_links( + self, + duplicates: dict[int, set[tuple[str | None, int]]], + session: AsyncSession, + ) -> None: + """Ensure each customer is linked to at most one guest.""" + for customer_id, guest_keys in duplicates.items(): + customer_result = await session.execute( + select(Customer).where(Customer.id == customer_id) + ) + customer = customer_result.scalar_one_or_none() + + guest_records: list[tuple[str | None, int, ConversionGuest | None]] = [] + for hotel_id, guest_id in guest_keys: + guest_result = await session.execute( + select(ConversionGuest).where( + ConversionGuest.hotel_id == hotel_id, + ConversionGuest.guest_id == guest_id, + ) + ) + guest_records.append((hotel_id, guest_id, guest_result.scalar_one_or_none())) + + if not customer: + _LOGGER.warning( + "Customer %s missing while deduplicating guests; severing links %s", + customer_id, + guest_keys, + ) + for hotel_id, guest_id, _ in guest_records: + await self._sever_guest_customer_link( + hotel_id, guest_id, customer_id, session + ) + continue + + best_key: tuple[str | None, int] | None = None + best_score = -1 + is_tied = False + for hotel_id, guest_id, guest in guest_records: + score = self._score_guest_customer_match(guest, customer) + if score > best_score: + best_score = score + best_key = (hotel_id, guest_id) + is_tied = False + elif score == best_score: + is_tied = True + + if not best_key or best_score <= 0 or is_tied: + _LOGGER.warning( + "Customer %s linked to guests %s but no clear match; removing all links", + customer_id, + guest_keys, + ) + for hotel_id, guest_id, _ in guest_records: + await self._sever_guest_customer_link( + hotel_id, guest_id, customer_id, session + ) + continue + + _LOGGER.warning( + "Customer %s linked to multiple guests %s; keeping guest %s (hotel=%s, score=%s)", + customer_id, + guest_keys, + best_key[1], + best_key[0], + best_score, + ) + for hotel_id, guest_id, _ in guest_records: + if (hotel_id, guest_id) != best_key: + await self._sever_guest_customer_link( + hotel_id, guest_id, customer_id, session + ) + async def _sever_guest_customer_link( self, hotel_id: str | None,