Fine this needs more work

This commit is contained in:
Jonas Linter
2025-12-03 16:12:07 +01:00
parent e1bbefb9a3
commit c4ecf3802f

View File

@@ -1116,7 +1116,7 @@ class ConversionService:
return reservations[0]
# If multiple matches or click-id matches, try to narrow down using hashed guest details
_LOGGER.debug(
_LOGGER.info(
(
"Ambiguous advertising match for %s (hotel=%s, candidates=%d, md5_lookup=%s). "
"Applying guest detail filtering."
@@ -1159,6 +1159,8 @@ class ConversionService:
guest_last_name: str | None,
guest_email: str | None,
session: AsyncSession | None = None,
*,
conversion_guest: ConversionGuest | None = None,
) -> Customer | None:
"""Match guest by name and email directly to Customer (no Reservation needed).
@@ -1208,15 +1210,33 @@ class ConversionService:
if len(matches) == 1:
return matches[0]
# If multiple matches, prefer email match over name match
for match in matches:
if guest_email and match.hashed_email == guest_email:
_LOGGER.debug(
"Multiple hashed customer matches, preferring email match"
)
return match
best_customer: Customer | None = None
best_score = -1
tie = False
for candidate in matches:
candidate_score = self._score_guest_customer_match(
conversion_guest,
candidate,
hashed_first_name=guest_first_name,
hashed_last_name=guest_last_name,
hashed_email=guest_email,
)
if candidate_score > best_score:
best_score = candidate_score
best_customer = candidate
tie = False
elif candidate_score == best_score:
tie = True
if best_customer and best_score > 0 and not tie:
_LOGGER.debug(
"Multiple hashed customer matches; selected candidate %s via score %s",
best_customer.id,
best_score,
)
return best_customer
# Otherwise return first match
_LOGGER.warning(
"Multiple hashed customer matches found for guest details, using first match"
)
@@ -1400,6 +1420,7 @@ class ConversionService:
conversion_guest.hashed_last_name,
conversion_guest.hashed_email,
session,
conversion_guest=conversion_guest,
)
if matched_hashed_customer:
@@ -1540,42 +1561,46 @@ class ConversionService:
_LOGGER.debug("Phase 3d: No matched guests to check for regularity")
return
# Group by (hotel_id, guest_id) to detect conflicts.
# Group by guest and by customer to detect conflicts in both directions.
guest_customer_sets: dict[tuple[str | None, int], set[int]] = {}
customer_guest_sets: dict[int, set[tuple[str | None, int]]] = {}
for hotel_id, guest_id, customer_id in guest_customer_rows:
if hotel_id is None or guest_id is None or customer_id is None:
continue
key = (hotel_id, guest_id)
guest_customer_sets.setdefault(key, set()).add(customer_id)
guest_key = (hotel_id, guest_id)
guest_customer_sets.setdefault(guest_key, set()).add(customer_id)
customer_guest_sets.setdefault(customer_id, set()).add(guest_key)
if not guest_customer_sets:
_LOGGER.debug("Phase 3d: No matched guests to check for regularity")
return
duplicates = {
guest_duplicates = {
key: customer_ids
for key, customer_ids in guest_customer_sets.items()
if len(customer_ids) > 1
}
if duplicates:
await self._deduplicate_guest_customer_links(duplicates, session)
if guest_duplicates:
await self._deduplicate_guest_customer_links(guest_duplicates, session)
customer_duplicates = {
customer_id: guest_keys
for customer_id, guest_keys in customer_guest_sets.items()
if len(guest_keys) > 1
}
if customer_duplicates:
await self._deduplicate_customer_guest_links(customer_duplicates, session)
refreshed = await session.execute(
select(
Conversion.hotel_id, Conversion.guest_id, Conversion.customer_id
).where(Conversion.guest_id.isnot(None), Conversion.customer_id.isnot(None))
)
guest_to_customer: dict[tuple[str | None, int], int] = {}
for key, customer_ids in guest_customer_sets.items():
hotel_id, guest_id = key
# After deduplication, reload conversions for this guest to find the remaining customer (if any)
result = await session.execute(
select(Conversion.customer_id)
.where(
Conversion.hotel_id == hotel_id,
Conversion.guest_id == guest_id,
Conversion.customer_id.isnot(None),
)
.limit(1)
)
chosen_customer = result.scalar_one_or_none()
if chosen_customer:
guest_to_customer[key] = chosen_customer
for hotel_id, guest_id, customer_id in refreshed.all():
if hotel_id is None or guest_id is None or customer_id is None:
continue
guest_to_customer[(hotel_id, guest_id)] = customer_id
if not guest_to_customer:
_LOGGER.debug(
@@ -1986,6 +2011,60 @@ class ConversionService:
hotel_id, guest_id, customer_id, session
)
def _score_guest_customer_match(
self,
conversion_guest: ConversionGuest | None,
customer: Customer | None,
*,
hashed_first_name: str | None = None,
hashed_last_name: str | None = None,
hashed_email: str | None = None,
) -> int:
"""Score how well a guest matches a customer using hashed data."""
if not customer:
return -1
score = 0
guest_email_hash = (
hashed_email or (conversion_guest.hashed_email if conversion_guest else None)
)
guest_first_hash = (
hashed_first_name
or (conversion_guest.hashed_first_name if conversion_guest else None)
)
guest_last_hash = (
hashed_last_name
or (conversion_guest.hashed_last_name if conversion_guest else None)
)
if guest_email_hash and customer.hashed_email == guest_email_hash:
score += 200
if guest_first_hash and guest_last_hash:
if (
customer.hashed_given_name == guest_first_hash
and customer.hashed_surname == guest_last_hash
):
score += 50
elif guest_first_hash and customer.hashed_given_name == guest_first_hash:
score += 10
elif guest_last_hash and customer.hashed_surname == guest_last_hash:
score += 10
if conversion_guest:
if (
conversion_guest.hashed_country_code
and customer.hashed_country_code
== conversion_guest.hashed_country_code
):
score += 5
if (
conversion_guest.hashed_birth_date
and customer.hashed_birth_date == conversion_guest.hashed_birth_date
):
score += 2
return score
async def _choose_best_customer_for_guest(
self,
conversion_guest: ConversionGuest,
@@ -2004,41 +2083,12 @@ class ConversionService:
if not candidates:
return None
def score_customer(customer: Customer) -> int:
score = 0
if (
conversion_guest.hashed_email
and customer.hashed_email == conversion_guest.hashed_email
):
score += 100
if (
conversion_guest.hashed_first_name
and customer.hashed_given_name == conversion_guest.hashed_first_name
):
score += 10
if (
conversion_guest.hashed_last_name
and customer.hashed_surname == conversion_guest.hashed_last_name
):
score += 10
if (
conversion_guest.hashed_country_code
and customer.hashed_country_code == conversion_guest.hashed_country_code
):
score += 2
if (
conversion_guest.hashed_birth_date
and customer.hashed_birth_date == conversion_guest.hashed_birth_date
):
score += 1
return score
best_customer_id = None
best_score = -1
is_tied = False
for customer in candidates:
score = score_customer(customer)
score = self._score_guest_customer_match(conversion_guest, customer)
if score > best_score:
best_score = score
best_customer_id = customer.id
@@ -2051,6 +2101,78 @@ class ConversionService:
return best_customer_id
async def _deduplicate_customer_guest_links(
self,
duplicates: dict[int, set[tuple[str | None, int]]],
session: AsyncSession,
) -> None:
"""Ensure each customer is linked to at most one guest."""
for customer_id, guest_keys in duplicates.items():
customer_result = await session.execute(
select(Customer).where(Customer.id == customer_id)
)
customer = customer_result.scalar_one_or_none()
guest_records: list[tuple[str | None, int, ConversionGuest | None]] = []
for hotel_id, guest_id in guest_keys:
guest_result = await session.execute(
select(ConversionGuest).where(
ConversionGuest.hotel_id == hotel_id,
ConversionGuest.guest_id == guest_id,
)
)
guest_records.append((hotel_id, guest_id, guest_result.scalar_one_or_none()))
if not customer:
_LOGGER.warning(
"Customer %s missing while deduplicating guests; severing links %s",
customer_id,
guest_keys,
)
for hotel_id, guest_id, _ in guest_records:
await self._sever_guest_customer_link(
hotel_id, guest_id, customer_id, session
)
continue
best_key: tuple[str | None, int] | None = None
best_score = -1
is_tied = False
for hotel_id, guest_id, guest in guest_records:
score = self._score_guest_customer_match(guest, customer)
if score > best_score:
best_score = score
best_key = (hotel_id, guest_id)
is_tied = False
elif score == best_score:
is_tied = True
if not best_key or best_score <= 0 or is_tied:
_LOGGER.warning(
"Customer %s linked to guests %s but no clear match; removing all links",
customer_id,
guest_keys,
)
for hotel_id, guest_id, _ in guest_records:
await self._sever_guest_customer_link(
hotel_id, guest_id, customer_id, session
)
continue
_LOGGER.warning(
"Customer %s linked to multiple guests %s; keeping guest %s (hotel=%s, score=%s)",
customer_id,
guest_keys,
best_key[1],
best_key[0],
best_score,
)
for hotel_id, guest_id, _ in guest_records:
if (hotel_id, guest_id) != best_key:
await self._sever_guest_customer_link(
hotel_id, guest_id, customer_id, session
)
async def _sever_guest_customer_link(
self,
hotel_id: str | None,