3 Commits

Author SHA1 Message Date
Jonas Linter
84caa3590a Experimented with fuzzy matching but ultimatly not a good idea. 2 false positives and nothing more 2025-11-18 19:45:37 +01:00
Jonas Linter
b4522d2e2a Csv import now works with preacknowlegdments 2025-11-18 19:25:52 +01:00
Jonas Linter
104ac5fd6d Fixed the csv_import 2025-11-18 18:37:30 +01:00
7 changed files with 176 additions and 47 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -3,7 +3,7 @@
select sum(room.total_revenue::float)
from alpinebits.conversions as con
join alpinebits.room_reservations as room on room.conversion_id = con.id
join alpinebits.conversion_rooms as room on room.conversion_id = con.id
join alpinebits.reservations as res on res.id = con.reservation_id
@@ -21,7 +21,7 @@ select res.created_at, con.reservation_date, res.start_date, room.arrival_date,r
room.room_status
from alpinebits.conversions as con
join alpinebits.room_reservations as room on room.conversion_id = con.id
join alpinebits.conversion_rooms as room on room.conversion_id = con.id
join alpinebits.reservations as res on res.id = con.reservation_id
@@ -37,7 +37,7 @@ select res.created_at, con.reservation_date, res.start_date, room.arrival_date,r
select round(sum(room.total_revenue::numeric)::numeric, 3), con.advertising_medium
from alpinebits.conversions as con
join alpinebits.room_reservations as room on room.conversion_id = con.id
join alpinebits.conversion_rooms as room on room.conversion_id = con.id

View File

@@ -39,7 +39,7 @@ from .alpinebits_server import (
Version,
)
from .auth import generate_unique_id, validate_api_key
from .config_loader import load_config
from .config_loader import load_config, get_username_for_hotel
from .const import CONF_GOOGLE_ACCOUNT, CONF_HOTEL_ID, CONF_META_ACCOUNT, HttpStatusCode
from .conversion_service import ConversionService
from .csv_import import CSVImporter
@@ -1142,15 +1142,25 @@ async def handle_wix_form_test(
async def _process_csv_import_background(
csv_content: str,
filename: str,
hotel_code: str | None,
hotel_code: str,
session_maker: SessionMaker,
config: dict[str, Any],
log_filename: Path,
):
"""Background task to process CSV import.
"""Background task to process CSV import with automatic acknowledgement.
This runs in a separate asyncio task after the HTTP response is sent.
Handles both file saving and database processing.
All imported reservations are automatically acknowledged using the username
associated with the hotel_code from the config.
Args:
csv_content: CSV content as string
filename: Original filename
hotel_code: Hotel code (mandatory) - used to get username for acknowledgements
session_maker: SessionMaker for creating database sessions
config: Application configuration
log_filename: Path to save the CSV file
"""
try:
# First, save the CSV file (in background)
@@ -1160,27 +1170,41 @@ async def _process_csv_import_background(
# Now process the CSV import
_LOGGER.info("Starting database processing of %s", filename)
# Get username for acknowledgements from config
username = get_username_for_hotel(config, hotel_code)
# Create a new session for this background task
async with session_maker() as db_session:
db_session = await session_maker.create_session()
try:
importer = CSVImporter(db_session, config)
stats = await importer.import_csv_file(str(log_filename), hotel_code, dryrun=False)
# Import with pre-acknowledgement enabled
stats = await importer.import_csv_file(
str(log_filename),
hotel_code,
dryrun=False,
pre_acknowledge=True,
client_id=hotel_code,
username=username
)
_LOGGER.info(
"CSV import complete for %s: %s", filename, stats
)
finally:
await db_session.close()
except Exception:
_LOGGER.exception(
"Error processing CSV import in background for %s", filename
)
@api_router.put("/admin/import-csv/{filename:path}")
@api_router.put("/admin/import-csv/{hotel_code}/{filename:path}")
@limiter.limit(BURST_RATE_LIMIT)
async def import_csv_endpoint(
request: Request,
background_tasks: BackgroundTasks,
hotel_code: str,
filename: str,
hotel_code: str | None = None,
credentials: tuple = Depends(validate_basic_auth),
db_session=Depends(get_async_session),
session_maker: SessionMaker = Depends(get_session_maker),
@@ -1193,16 +1217,18 @@ async def import_csv_endpoint(
- fbclid/gclid tracking IDs
Returns immediately with 202 Accepted while processing continues in background.
All imported reservations are automatically acknowledged using the username
associated with the hotel_code in the config.
Requires basic authentication and saves CSV files to log directory.
Supports gzip compression via Content-Encoding header.
Args:
hotel_code: Hotel code (mandatory) - used to get username for acknowledgements
filename: Name for the CSV file (used for logging)
hotel_code: Optional hotel code to override CSV values
credentials: Basic auth credentials
Example: PUT /api/admin/import-csv/reservations.csv
Example: PUT /api/admin/import-csv/39054_001/reservations.csv
"""
try:

View File

@@ -329,3 +329,8 @@ class Config:
# For backward compatibility
def load_config():
return Config().config
def get_username_for_hotel(config: dict, hotel_code: str) -> str:
"""Get the username associated with a hotel_code from config."""
return next(h.get("username") for h in config.get("alpine_bits_auth", []) if h.get("hotel_id") == hotel_code)

View File

@@ -2,8 +2,9 @@
import asyncio
import xml.etree.ElementTree as ET
from datetime import datetime
from datetime import datetime, date
from decimal import Decimal
from difflib import SequenceMatcher
from typing import Any
from sqlalchemy import or_, select
@@ -430,6 +431,7 @@ class ConversionService:
guest_first_name,
guest_last_name,
guest_email,
guest_birth_date,
advertising_partner,
session,
)
@@ -703,6 +705,7 @@ class ConversionService:
guest_first_name: str | None = None,
guest_last_name: str | None = None,
guest_email: str | None = None,
guest_birth_date: date | None = None,
advertising_partner: str | None = None,
session: AsyncSession | None = None,
) -> dict[str, Any]:
@@ -719,6 +722,7 @@ class ConversionService:
guest_first_name: Guest first name for matching
guest_last_name: Guest last name for matching
guest_email: Guest email for matching
guest_birth_date: Guest birth date (optional, improves matching confidence)
advertising_partner: Partner info (matches utm_medium for additional filtering)
session: AsyncSession to use. If None, uses self.session.
@@ -765,7 +769,7 @@ class ConversionService:
guest_email or guest_first_name or guest_last_name
):
matched_reservation = await self._match_by_guest_details(
hotel_id, guest_first_name, guest_last_name, guest_email, session
hotel_id, guest_first_name, guest_last_name, guest_email, guest_birth_date, session
)
if matched_reservation:
@@ -896,9 +900,10 @@ class ConversionService:
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
guest_birth_date: date | None = None,
session: AsyncSession | None = None,
) -> Reservation | None:
"""Match reservation by guest name and email using cached data.
"""Match reservation by guest name, email, and birth date using cached data.
This method uses the reservation cache populated at the start of XML processing.
If cache is not available, falls back to database queries.
@@ -908,6 +913,7 @@ class ConversionService:
guest_first_name: Guest first name
guest_last_name: Guest last name
guest_email: Guest email
guest_birth_date: Guest birth date (optional, improves matching confidence)
session: AsyncSession to use. If None, uses self.session.
Returns:
@@ -956,7 +962,7 @@ class ConversionService:
all_reservations = db_result.scalars().all()
return self._match_reservations_by_guest_details(
all_reservations, guest_first_name, guest_last_name, guest_email
all_reservations, guest_first_name, guest_last_name, guest_email, guest_birth_date
)
def _match_reservations_by_guest_details(
@@ -965,65 +971,148 @@ class ConversionService:
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
guest_birth_date: date | None = None,
) -> Reservation | None:
"""Match a reservation from a list by guest name and email (non-async).
"""Match a reservation from a list by guest name, email, and birth date (non-async).
Uses a scoring system to find the best match:
- Exact email match: 100 points (highest confidence)
- Exact name + exact birth date: 90 points (very high confidence)
- Fuzzy name + exact birth date: 75-85 points (medium-high confidence)
- Exact name only: 50 points (lower confidence, could be common names)
- Fuzzy name only: 50-60 points (requires high similarity, works without birth date)
Args:
reservations: List of reservations to search through
guest_first_name: Guest first name
guest_last_name: Guest last name
guest_email: Guest email
guest_birth_date: Guest birth date (optional, improves confidence)
Returns:
Matched Reservation or None
Matched Reservation with highest score (only if score >= 50), or None
"""
# Filter by guest details
FUZZY_THRESHOLD = 0.75 # Minimum similarity ratio for fuzzy matching
MIN_SCORE = 50 # Minimum score to consider a match valid
candidates = []
for reservation in reservations:
customer = reservation.customer
if not customer:
continue
# Match by email (highest priority)
if guest_email:
if (
customer.email_address
and customer.email_address.lower() == guest_email.lower()
):
score = 0
match_details = []
# Strategy 1: Match by email (highest priority)
if guest_email and customer.email_address:
if customer.email_address.lower() == guest_email.lower():
score = 100
match_details.append("exact_email")
_LOGGER.info(
"Found exact email match for %s (reservation_id=%s)",
"Found exact email match for %s (reservation_id=%s, score=%d)",
guest_email,
reservation.id,
score,
)
candidates.append((reservation, 3)) # Highest score
candidates.append((reservation, score))
continue
# Match by name (first + last)
# Strategy 2: Match by name and birth date
if guest_first_name and guest_last_name:
first_match = (
# Exact name matching
first_exact = (
customer.given_name
and customer.given_name.lower() == guest_first_name.lower()
)
last_match = (
last_exact = (
customer.surname
and customer.surname.lower() == guest_last_name.lower()
)
if first_match and last_match:
# Fuzzy name matching
first_fuzzy_ratio = 0.0
last_fuzzy_ratio = 0.0
if customer.given_name:
first_fuzzy_ratio = SequenceMatcher(
None,
customer.given_name.lower(),
guest_first_name.lower(),
).ratio()
if customer.surname:
last_fuzzy_ratio = SequenceMatcher(
None,
customer.surname.lower(),
guest_last_name.lower(),
).ratio()
# Birth date matching
birth_date_match = (
guest_birth_date
and customer.birth_date
and customer.birth_date == str(guest_birth_date)
)
# Calculate score based on name matching
if first_exact and last_exact:
if birth_date_match:
score = 90 # Exact name + exact birth date
match_details.append("exact_name_exact_birthdate")
else:
score = 50 # Exact name only
match_details.append("exact_name_only")
_LOGGER.info(
"Found exact name match for %s %s (reservation_id=%s)",
"Found exact name match for %s %s (reservation_id=%s, score=%d, match_details=%s)",
guest_first_name,
guest_last_name,
reservation.id,
score,
match_details,
)
candidates.append((reservation, 2)) # Medium-high score
continue
elif (
first_fuzzy_ratio >= FUZZY_THRESHOLD
and last_fuzzy_ratio >= FUZZY_THRESHOLD
):
# Both first and last names are fuzzy matched
fuzzy_score = min(first_fuzzy_ratio, last_fuzzy_ratio)
if birth_date_match:
# Fuzzy name + exact birth date (high confidence)
score = int(75 + (fuzzy_score - FUZZY_THRESHOLD) * 40)
match_details.append(
f"fuzzy_name_exact_birthdate(ratio={fuzzy_score:.2f})"
)
else:
# Fuzzy name only (lower confidence, but still usable)
# Scale from 50-60 based on similarity ratio above threshold
score = int(50 + (fuzzy_score - FUZZY_THRESHOLD) * 40)
match_details.append(f"fuzzy_name_only(ratio={fuzzy_score:.2f})")
_LOGGER.info(
"Found fuzzy name match for %s %s (reservation_id=%s, score=%d, match_details=%s)",
guest_first_name,
guest_last_name,
reservation.id,
score,
match_details,
)
# Only add candidates that meet minimum score threshold
if score >= MIN_SCORE:
candidates.append((reservation, score))
# Return highest-scoring match
if candidates:
candidates.sort(key=lambda x: x[1], reverse=True)
return candidates[0][0]
best_match = candidates[0][0]
best_score = candidates[0][1]
_LOGGER.debug(
"Selected best match (reservation_id=%s) with score=%d out of %d candidates",
best_match.id,
best_score,
len(candidates),
)
return best_match
return None

View File

@@ -116,6 +116,21 @@ class CSVImporter:
self.customer_service = CustomerService(db_session)
self.reservation_service = ReservationService(db_session)
def _get_hotel_info(self, hotel_code: str) -> tuple[str, str]:
"""Get hotel name from config by hotel_code.
Args:
hotel_code: Hotel code to look up
Returns:
Tuple of (hotel_code, hotel_name) from config
"""
for hotel in self.config.get("alpine_bits_auth", []):
if hotel.get("hotel_id") == hotel_code:
return hotel_code, hotel.get("hotel_name", "")
# Fallback to default if not found
return hotel_code, self.config.get("default_hotel_name", "Frangart Inn")
async def find_duplicate_reservation(
self,
first_name: str,
@@ -184,13 +199,13 @@ class CSVImporter:
return None
async def import_csv_file(
self, csv_file_path: str, hotel_code: Optional[str] = None, dryrun: bool = False, pre_acknowledge: bool = False, client_id: Optional[str] = None, username: Optional[str] = None
self, csv_file_path: str, hotel_code: str, dryrun: bool = False, pre_acknowledge: bool = False, client_id: Optional[str] = None, username: Optional[str] = None
) -> dict[str, Any]:
"""Import reservations from a CSV file.
Args:
csv_file_path: Path to CSV file
hotel_code: Optional hotel code to override CSV values
hotel_code: Hotel code (mandatory) - used to look up hotel name from config
dryrun: If True, parse and print first 10 rows as JSON without importing
pre_acknowledge: If True, pre-acknowledges all imported reservations
client_id: Client ID for pre-acknowledgement (required if pre_acknowledge=True)
@@ -432,16 +447,8 @@ class CSVImporter:
else:
submission_id = f"csv_import_{row_num}_{datetime.now().isoformat()}"
# Determine hotel code and name
final_hotel_code = (
hotel_code
or str(row.get("hotel_id", "")).strip()
or self.config.get("default_hotel_code", "123")
)
final_hotel_name = (
str(row.get("hotel_name", "")).strip()
or self.config.get("default_hotel_name", "Frangart Inn")
)
# Determine hotel code and name (from config)
final_hotel_code, final_hotel_name = self._get_hotel_info(hotel_code)
# Parse room type fields if available
room_type_code = str(row.get("room_type_code", "")).strip() or None