Importing mailbox leads now works

This commit is contained in:
Jonas Linter
2025-11-19 09:55:54 +01:00
parent 57dac8514c
commit e8cdc75421
8 changed files with 111063 additions and 32 deletions

View File

@@ -1,9 +1,11 @@
"""CSV import functionality for landing page forms.
"""CSV import functionality for landing page forms and email lead exports.
Handles importing CSV data from landing_page_form.csv and creating/updating
reservations and customers in the database.
Handles importing CSV data from landing_page_form.csv and email lead exports
(from extract_leads.py) and creating/updating reservations and customers in
the database. Supports both German (landing page form) and English (email lead
export) column names.
Supported CSV columns:
Supported CSV columns (German - Landing Page Form):
- Zeit der Einreichung: Submission timestamp
- Angebot auswählen: Room offer
- Anreisedatum: Check-in date (YYYY-MM-DD or DD.MM.YYYY)
@@ -24,6 +26,24 @@ Supported CSV columns:
- hotelid: Hotel ID
- hotelname: Hotel name
Supported CSV columns (English - Email Lead Export):
- name: First name (required)
- lastname: Last name (required)
- mail: Email address
- tel: Phone number
- anreise: Check-in date (YYYY-MM-DD or DD.MM.YYYY)
- abreise: Check-out date (YYYY-MM-DD or DD.MM.YYYY)
- erwachsene: Number of adults
- kinder: Number of children
- kind_ages: Child ages as comma-separated string (e.g., "3,6,10")
- apartments: Apartment preferences
- verpflegung: Meal plan preference
- sprache: Language preference
- device: Device information
- anrede: Title/salutation
- land: Country
- privacy: Privacy consent
Duplicate detection uses: name + email + dates + fbclid/gclid combination
"""
@@ -57,6 +77,7 @@ class CSVImporter:
# Column rename mapping for CSV import
COLUMN_RENAME_MAP = {
# German column names (from landing page form CSV)
"Zeit der Einreichung": "submission_timestamp",
"Angebot auswählen": "room_offer",
"Anreisedatum": "check_in_date",
@@ -82,6 +103,31 @@ class CSVImporter:
"Phone": "phone",
"Message": "message",
"Einwilligung Marketing": "newsletter_opt_in",
"Kinder": "children",
# English column names (from leads export CSV)
"name": "first_name",
"lastname": "last_name",
"mail": "email",
"tel": "phone",
"anreise": "check_in_date",
"abreise": "check_out_date",
"erwachsene": "num_adults",
"kinder": "num_children",
"kind_ages": "kind_ages_csv", # Special handling - comma-separated ages
"apartments": "room_offer",
"verpflegung": "meal_plan",
"sprache": "language",
"device": "device",
"anrede": "salutation",
"land": "country",
"privacy": "privacy_consent",
# German alternate names for leads export columns
"Erwachsene": "num_adults",
"Kinder": "num_children",
# Standard tracking columns
"utm_Source": "utm_source",
"utm_Medium": "utm_medium",
"utm_Campaign": "utm_campaign",
@@ -98,7 +144,6 @@ class CSVImporter:
"hotelname": "hotel_name",
"roomtypecode": "room_type_code",
"roomclassificationcode": "room_classification_code",
"Kinder": "children",
# Handle unnamed columns - these get default names like "Unnamed: 0"
# The age columns appear to be in positions 6-15 (0-indexed) based on dry run output
# We'll handle these via positional renaming in import_csv_file
@@ -271,8 +316,9 @@ class CSVImporter:
# Handle positional renaming for child age columns
# After "num_children" (column 5, 0-indexed), the next 10 columns are child ages
# and columns after that are duplicates (child_1_age_duplicate, child_2_age_duplicate)
# BUT only if we don't already have kind_ages_csv (from leads export format)
col_list = list(df.columns)
if "num_children" in col_list:
if "num_children" in col_list and "kind_ages_csv" not in col_list:
num_children_idx = col_list.index("num_children")
# The 10 columns after num_children are child ages (1-10)
for i in range(1, 11):
@@ -390,40 +436,81 @@ class CSVImporter:
# Extract children ages from columns (including duplicates)
children_ages = []
# Try to extract ages from renamed columns first
# Check primary child age columns (1-10)
for i in range(1, 11):
age_key = f"child_{i}_age"
age_val = row.get(age_key, "")
if age_val != "" and age_val is not None:
try:
# Handle both int and float values (e.g., 3, 3.0)
age = int(float(age_val))
if 0 <= age <= 17:
children_ages.append(age)
except (ValueError, TypeError):
pass
# Check if we have kind_ages_csv (from leads export format)
kind_ages_csv = str(row.get("kind_ages_csv", "")).strip()
if kind_ages_csv and kind_ages_csv.lower() != "nan":
# Parse comma-separated ages
try:
ages_list = [int(age.strip()) for age in kind_ages_csv.split(",") if age.strip()]
# Separate valid children (0-17) from young adults (18+)
# 18-year-olds are counted as adults, not children
valid_children = [age for age in ages_list if 0 <= age <= 17]
young_adults = [age for age in ages_list if age >= 18]
# Check for duplicate child age columns (e.g., child_1_age_duplicate, child_2_age_duplicate)
for i in range(1, 3): # Only 1.1 and 2.1 duplicates mentioned
age_key = f"child_{i}_age_duplicate"
age_val = row.get(age_key, "")
if age_val != "" and age_val is not None:
try:
# Handle both int and float values (e.g., 3, 3.0)
age = int(float(age_val))
if 0 <= age <= 17:
children_ages.append(age)
except (ValueError, TypeError):
pass
children_ages = valid_children
# If we found 18+ year olds, adjust num_children and num_adults accordingly
if young_adults:
num_children = len(valid_children)
num_adults += len(young_adults)
_LOGGER.debug(
f"Row {row_num}: Found {len(young_adults)} young adults (18+). "
f"Adjusted num_children to {num_children}, num_adults to {num_adults}"
)
except (ValueError, TypeError):
pass
# If no kind_ages_csv, try to extract ages from individual columns
if not children_ages:
young_adults = []
# Try to extract ages from renamed columns first
# Check primary child age columns (1-10)
for i in range(1, 11):
age_key = f"child_{i}_age"
age_val = row.get(age_key, "")
if age_val != "" and age_val is not None:
try:
# Handle both int and float values (e.g., 3, 3.0)
age = int(float(age_val))
if 0 <= age <= 17:
children_ages.append(age)
elif age >= 18:
young_adults.append(age)
except (ValueError, TypeError):
pass
# Check for duplicate child age columns (e.g., child_1_age_duplicate, child_2_age_duplicate)
for i in range(1, 3): # Only 1.1 and 2.1 duplicates mentioned
age_key = f"child_{i}_age_duplicate"
age_val = row.get(age_key, "")
if age_val != "" and age_val is not None:
try:
# Handle both int and float values (e.g., 3, 3.0)
age = int(float(age_val))
if 0 <= age <= 17:
children_ages.append(age)
elif age >= 18:
young_adults.append(age)
except (ValueError, TypeError):
pass
# Adjust num_children and num_adults if we found 18+ year olds
if young_adults:
num_children = len(children_ages)
num_adults += len(young_adults)
_LOGGER.debug(
f"Row {row_num}: Found {len(young_adults)} young adults (18+) in individual columns. "
f"Adjusted num_children to {num_children}, num_adults to {num_adults}"
)
# Debug: log extraction details
_LOGGER.debug(
"Row %d: num_children=%d, extracted %d ages: %s",
"Row %d: num_children=%d, extracted %d ages: %s, kind_ages_csv=%s",
row_num,
num_children,
len(children_ages),
children_ages,
kind_ages_csv,
)
# If we extracted ages but num_children says there are different number,