Importing mailbox leads now works

2025-11-19 09:55:54 +01:00
parent 57dac8514c
commit e8cdc75421
8 changed files with 111063 additions and 32 deletions
--- a/src/alpine_bits_python/csv_import.py
+++ b/src/alpine_bits_python/csv_import.py
@@ -1,9 +1,11 @@
-"""CSV import functionality for landing page forms.
+"""CSV import functionality for landing page forms and email lead exports.

-Handles importing CSV data from landing_page_form.csv and creating/updating
-reservations and customers in the database.
+Handles importing CSV data from landing_page_form.csv and email lead exports
+(from extract_leads.py) and creating/updating reservations and customers in
+the database. Supports both German (landing page form) and English (email lead
+export) column names.

-Supported CSV columns:
+Supported CSV columns (German - Landing Page Form):
 - Zeit der Einreichung: Submission timestamp
 - Angebot auswählen: Room offer
 - Anreisedatum: Check-in date (YYYY-MM-DD or DD.MM.YYYY)
@@ -24,6 +26,24 @@ Supported CSV columns:
 - hotelid: Hotel ID
 - hotelname: Hotel name

+Supported CSV columns (English - Email Lead Export):
+- name: First name (required)
+- lastname: Last name (required)
+- mail: Email address
+- tel: Phone number
+- anreise: Check-in date (YYYY-MM-DD or DD.MM.YYYY)
+- abreise: Check-out date (YYYY-MM-DD or DD.MM.YYYY)
+- erwachsene: Number of adults
+- kinder: Number of children
+- kind_ages: Child ages as comma-separated string (e.g., "3,6,10")
+- apartments: Apartment preferences
+- verpflegung: Meal plan preference
+- sprache: Language preference
+- device: Device information
+- anrede: Title/salutation
+- land: Country
+- privacy: Privacy consent
+
 Duplicate detection uses: name + email + dates + fbclid/gclid combination
 """

@@ -57,6 +77,7 @@ class CSVImporter:

    # Column rename mapping for CSV import
    COLUMN_RENAME_MAP = {
+        # German column names (from landing page form CSV)
        "Zeit der Einreichung": "submission_timestamp",
        "Angebot auswählen": "room_offer",
        "Anreisedatum": "check_in_date",
@@ -82,6 +103,31 @@ class CSVImporter:
        "Phone": "phone",
        "Message": "message",
        "Einwilligung Marketing": "newsletter_opt_in",
+        "Kinder": "children",
+
+        # English column names (from leads export CSV)
+        "name": "first_name",
+        "lastname": "last_name",
+        "mail": "email",
+        "tel": "phone",
+        "anreise": "check_in_date",
+        "abreise": "check_out_date",
+        "erwachsene": "num_adults",
+        "kinder": "num_children",
+        "kind_ages": "kind_ages_csv",  # Special handling - comma-separated ages
+        "apartments": "room_offer",
+        "verpflegung": "meal_plan",
+        "sprache": "language",
+        "device": "device",
+        "anrede": "salutation",
+        "land": "country",
+        "privacy": "privacy_consent",
+
+        # German alternate names for leads export columns
+        "Erwachsene": "num_adults",
+        "Kinder": "num_children",
+
+        # Standard tracking columns
        "utm_Source": "utm_source",
        "utm_Medium": "utm_medium",
        "utm_Campaign": "utm_campaign",
@@ -98,7 +144,6 @@ class CSVImporter:
        "hotelname": "hotel_name",
        "roomtypecode": "room_type_code",
        "roomclassificationcode": "room_classification_code",
-        "Kinder": "children",
        # Handle unnamed columns - these get default names like "Unnamed: 0"
        # The age columns appear to be in positions 6-15 (0-indexed) based on dry run output
        # We'll handle these via positional renaming in import_csv_file
@@ -271,8 +316,9 @@ class CSVImporter:
            # Handle positional renaming for child age columns
            # After "num_children" (column 5, 0-indexed), the next 10 columns are child ages
            # and columns after that are duplicates (child_1_age_duplicate, child_2_age_duplicate)
+            # BUT only if we don't already have kind_ages_csv (from leads export format)
            col_list = list(df.columns)
-            if "num_children" in col_list:
+            if "num_children" in col_list and "kind_ages_csv" not in col_list:
                num_children_idx = col_list.index("num_children")
                # The 10 columns after num_children are child ages (1-10)
                for i in range(1, 11):
@@ -390,40 +436,81 @@ class CSVImporter:
                # Extract children ages from columns (including duplicates)
                children_ages = []

-                # Try to extract ages from renamed columns first
-                # Check primary child age columns (1-10)
-                for i in range(1, 11):
-                    age_key = f"child_{i}_age"
-                    age_val = row.get(age_key, "")
-                    if age_val != "" and age_val is not None:
-                        try:
-                            # Handle both int and float values (e.g., 3, 3.0)
-                            age = int(float(age_val))
-                            if 0 <= age <= 17:
-                                children_ages.append(age)
-                        except (ValueError, TypeError):
-                            pass
+                # Check if we have kind_ages_csv (from leads export format)
+                kind_ages_csv = str(row.get("kind_ages_csv", "")).strip()
+                if kind_ages_csv and kind_ages_csv.lower() != "nan":
+                    # Parse comma-separated ages
+                    try:
+                        ages_list = [int(age.strip()) for age in kind_ages_csv.split(",") if age.strip()]
+                        # Separate valid children (0-17) from young adults (18+)
+                        # 18-year-olds are counted as adults, not children
+                        valid_children = [age for age in ages_list if 0 <= age <= 17]
+                        young_adults = [age for age in ages_list if age >= 18]

-                # Check for duplicate child age columns (e.g., child_1_age_duplicate, child_2_age_duplicate)
-                for i in range(1, 3):  # Only 1.1 and 2.1 duplicates mentioned
-                    age_key = f"child_{i}_age_duplicate"
-                    age_val = row.get(age_key, "")
-                    if age_val != "" and age_val is not None:
-                        try:
-                            # Handle both int and float values (e.g., 3, 3.0)
-                            age = int(float(age_val))
-                            if 0 <= age <= 17:
-                                children_ages.append(age)
-                        except (ValueError, TypeError): 
-                            pass
+                        children_ages = valid_children
+
+                        # If we found 18+ year olds, adjust num_children and num_adults accordingly
+                        if young_adults:
+                            num_children = len(valid_children)
+                            num_adults += len(young_adults)
+                            _LOGGER.debug(
+                                f"Row {row_num}: Found {len(young_adults)} young adults (18+). "
+                                f"Adjusted num_children to {num_children}, num_adults to {num_adults}"
+                            )
+                    except (ValueError, TypeError):
+                        pass
+
+                # If no kind_ages_csv, try to extract ages from individual columns
+                if not children_ages:
+                    young_adults = []
+                    # Try to extract ages from renamed columns first
+                    # Check primary child age columns (1-10)
+                    for i in range(1, 11):
+                        age_key = f"child_{i}_age"
+                        age_val = row.get(age_key, "")
+                        if age_val != "" and age_val is not None:
+                            try:
+                                # Handle both int and float values (e.g., 3, 3.0)
+                                age = int(float(age_val))
+                                if 0 <= age <= 17:
+                                    children_ages.append(age)
+                                elif age >= 18:
+                                    young_adults.append(age)
+                            except (ValueError, TypeError):
+                                pass
+
+                    # Check for duplicate child age columns (e.g., child_1_age_duplicate, child_2_age_duplicate)
+                    for i in range(1, 3):  # Only 1.1 and 2.1 duplicates mentioned
+                        age_key = f"child_{i}_age_duplicate"
+                        age_val = row.get(age_key, "")
+                        if age_val != "" and age_val is not None:
+                            try:
+                                # Handle both int and float values (e.g., 3, 3.0)
+                                age = int(float(age_val))
+                                if 0 <= age <= 17:
+                                    children_ages.append(age)
+                                elif age >= 18:
+                                    young_adults.append(age)
+                            except (ValueError, TypeError):
+                                pass
+
+                    # Adjust num_children and num_adults if we found 18+ year olds
+                    if young_adults:
+                        num_children = len(children_ages)
+                        num_adults += len(young_adults)
+                        _LOGGER.debug(
+                            f"Row {row_num}: Found {len(young_adults)} young adults (18+) in individual columns. "
+                            f"Adjusted num_children to {num_children}, num_adults to {num_adults}"
+                        )

                # Debug: log extraction details
                _LOGGER.debug(
-                    "Row %d: num_children=%d, extracted %d ages: %s",
+                    "Row %d: num_children=%d, extracted %d ages: %s, kind_ages_csv=%s",
                    row_num,
                    num_children,
                    len(children_ages),
                    children_ages,
+                    kind_ages_csv,
                )

                # If we extracted ages but num_children says there are different number,