"""Pydantic models for data validation in AlpineBits. These models provide validation for data before it's passed to: - SQLAlchemy database models - AlpineBits XML generation - API endpoints Separating validation (Pydantic) from persistence (SQLAlchemy) and from XML generation (xsdata) follows clean architecture principles. """ import hashlib import json from datetime import UTC, date, datetime from enum import Enum from typing import Any from pydantic import BaseModel, EmailStr, Field, field_validator, model_validator from .const import WebhookStatus # Generalized integer validator for reuse across models def convert_to_int(field_name: str, v: Any) -> int: """Convert a value to integer, handling string inputs. Args: field_name: Name of the field being validated (for error messages) v: Value to convert (can be int, str, or None) Returns: Integer value Raises: ValueError: If value is None or cannot be converted to int """ if v is None: msg = f"{field_name} cannot be None" raise ValueError(msg) if isinstance(v, int): return v if isinstance(v, str): try: return int(v) except ValueError as e: msg = f"{field_name} must be a valid integer, got: {v}" raise ValueError(msg) from e msg = f"{field_name} must be int or str, got: {type(v)}" raise ValueError(msg) # Country name to ISO 3166-1 alpha-2 code mapping COUNTRY_NAME_TO_CODE = { # English names "germany": "DE", "italy": "IT", "austria": "AT", "switzerland": "CH", "france": "FR", "netherlands": "NL", "belgium": "BE", "spain": "ES", "portugal": "PT", "united kingdom": "GB", "uk": "GB", "czech republic": "CZ", "poland": "PL", "hungary": "HU", "croatia": "HR", "slovenia": "SI", # German names "deutschland": "DE", "italien": "IT", "österreich": "AT", "schweiz": "CH", "frankreich": "FR", "niederlande": "NL", "belgien": "BE", "spanien": "ES", "vereinigtes königreich": "GB", "tschechien": "CZ", "polen": "PL", "ungarn": "HU", "kroatien": "HR", "slowenien": "SI", # Italian names "germania": "DE", "italia": "IT", "svizzera": "CH", "francia": "FR", "paesi bassi": "NL", "belgio": "BE", "spagna": "ES", "portogallo": "PT", "regno unito": "GB", "repubblica ceca": "CZ", "polonia": "PL", "ungheria": "HU", "croazia": "HR", } # phonetechtype enum 1,3,5 voice, fax, mobile class PhoneTechType(Enum): VOICE = "1" FAX = "3" MOBILE = "5" class PhoneNumber(BaseModel): """Phone number with optional type.""" number: str = Field(..., min_length=1, max_length=50, pattern=r"^\+?[0-9\s\-()]+$") tech_type: str | None = Field(None, pattern="^[135]$") # 1=voice, 3=fax, 5=mobile @field_validator("number") @classmethod def clean_phone_number(cls, v: str) -> str: """Remove extra spaces from phone number.""" return " ".join(v.split()) class ReservationData(BaseModel): """Validated reservation data.""" unique_id: str = Field(..., min_length=1, max_length=200) md5_unique_id: str | None = Field(None, min_length=1, max_length=32) start_date: date end_date: date created_at: datetime = Field(default_factory=datetime.now) num_adults: int = Field(..., ge=1) num_children: int = Field(0, ge=0, le=10) children_ages: list[int] = Field(default_factory=list) hotel_id: str = Field(..., min_length=1, max_length=50) hotel_name: str | None = Field(None, max_length=200) offer: str | None = Field(None, max_length=500) user_comment: str | None = Field(None, max_length=2000) fbclid: str | None = Field(None, max_length=300) gclid: str | None = Field(None, max_length=300) # Advertising account IDs (populated conditionally based on fbclid/gclid) meta_account_id: str | None = Field(None, max_length=200) google_account_id: str | None = Field(None, max_length=200) utm_source: str | None = Field(None, max_length=150) utm_medium: str | None = Field(None, max_length=150) utm_campaign: str | None = Field(None, max_length=150) utm_term: str | None = Field(None, max_length=150) utm_content: str | None = Field(None, max_length=150) # RoomTypes fields (optional) room_type_code: str | None = Field(None, min_length=1, max_length=8) room_classification_code: str | None = Field(None, pattern=r"[0-9]+") room_type: str | None = Field(None, pattern=r"^[1-5]$") @model_validator(mode="after") def ensure_md5(self) -> "ReservationData": """Ensure md5_unique_id is set after model validation. Using a model_validator in 'after' mode lets us access all fields via the instance and set md5_unique_id in-place when it wasn't provided. """ if not getattr(self, "md5_unique_id", None) and getattr( self, "unique_id", None ): self.md5_unique_id = hashlib.md5(self.unique_id.encode("utf-8")).hexdigest() return self @model_validator(mode="after") def validate_children_ages(self) -> "ReservationData": """Ensure children_ages matches num_children.""" if len(self.children_ages) != self.num_children: raise ValueError( f"Number of children ages ({len(self.children_ages)}) " f"must match num_children ({self.num_children})" ) for age in self.children_ages: if age < 0 or age > 17: raise ValueError(f"Child age {age} must be between 0 and 17") return self class CustomerData(BaseModel): """Validated customer data for creating reservations and guests.""" given_name: str = Field(..., min_length=1, max_length=100) surname: str = Field(..., min_length=1, max_length=100) name_prefix: str | None = Field(None, max_length=20) name_title: str | None = Field(None, max_length=20) phone_numbers: list[tuple[str, None | PhoneTechType]] = Field(default_factory=list) email_address: EmailStr | None = None email_newsletter: bool | None = None address_line: str | None = Field(None, max_length=255) city_name: str | None = Field(None, max_length=100) postal_code: str | None = Field(None, max_length=20) country_code: str | None = Field(None, min_length=2, max_length=2) address_catalog: bool | None = None gender: str | None = Field(None, pattern="^(Male|Female|Unknown)$") birth_date: str | None = Field(None, pattern=r"^\d{4}-\d{2}-\d{2}$") # ISO format language: str | None = Field(None, min_length=2, max_length=2, pattern="^[a-z]{2}$") @field_validator("given_name", "surname") @classmethod def name_must_not_be_empty(cls, v: str) -> str: """Ensure names are not just whitespace.""" if not v.strip(): raise ValueError("Name cannot be empty or whitespace") return v.strip() @field_validator("country_code", mode="before") @classmethod def normalize_country_code(cls, v: str | None) -> str | None: """Normalize country input to ISO 3166-1 alpha-2 code. Handles: - Country names in English, German, and Italian - Already valid 2-letter codes (case-insensitive) - None/empty values Runs in 'before' mode to normalize before other validations. This ensures that old data saved incorrectly in the database is transformed into the correct format when retrieved, and that new data is always normalized regardless of the source. Args: v: Country name or code (case-insensitive) Returns: 2-letter ISO country code (uppercase) or None if input is None/empty """ if not v: return None # Convert to string and strip whitespace country_input = str(v).strip() if not country_input: return None # If already 2 letters, assume it's a country code (ISO 3166-1 alpha-2) iso_country_code_length = 2 if len(country_input) == iso_country_code_length and country_input.isalpha(): return country_input.upper() # Try to match as country name (case-insensitive) country_lower = country_input.lower() return COUNTRY_NAME_TO_CODE.get(country_lower, country_input) @field_validator("language") @classmethod def normalize_language(cls, v: str | None) -> str | None: """Normalize language code to lowercase.""" return v.lower() if v else None model_config = {"from_attributes": True} # Allow creation from ORM models class HotelReservationIdData(BaseModel): """Validated hotel reservation ID data.""" res_id_type: str = Field(..., pattern=r"^[0-9]+$") # Must be numeric string res_id_value: str | None = Field(None, min_length=1, max_length=64) res_id_source: str | None = Field(None, min_length=1, max_length=64) res_id_source_context: str | None = Field(None, min_length=1, max_length=64) @field_validator( "res_id_value", "res_id_source", "res_id_source_context", mode="before" ) @classmethod def trim_and_truncate(cls, v: str | None) -> str | None: """Trim whitespace and truncate to max length if needed. Runs BEFORE field validation to ensure values are cleaned and truncated before max_length constraints are checked. """ if not v: return None # Convert to string if needed v = str(v) # Strip whitespace v = v.strip() # Convert empty strings to None if not v: return None # Truncate to 64 characters if needed if len(v) > 64: v = v[:64] return v model_config = {"from_attributes": True} class CommentListItemData(BaseModel): """Validated comment list item.""" value: str = Field(..., min_length=1, max_length=1000) list_item: str = Field(..., pattern=r"^[0-9]+$") # Numeric identifier language: str = Field(..., min_length=2, max_length=2, pattern=r"^[a-z]{2}$") @field_validator("language") @classmethod def normalize_language(cls, v: str) -> str: """Normalize language to lowercase.""" return v.lower() model_config = {"from_attributes": True} class CommentData(BaseModel): """Validated comment data.""" name: str # Should be validated against CommentName2 enum text: str | None = Field(None, max_length=4000) list_items: list[CommentListItemData] = Field(default_factory=list) @field_validator("list_items") @classmethod def validate_list_items( cls, v: list[CommentListItemData] ) -> list[CommentListItemData]: """Ensure list items have unique identifiers.""" if v: item_ids = [item.list_item for item in v] if len(item_ids) != len(set(item_ids)): raise ValueError("List items must have unique identifiers") return v model_config = {"from_attributes": True} class CommentsData(BaseModel): """Validated comments collection.""" comments: list[CommentData] = Field(default_factory=list, max_length=3) @field_validator("comments") @classmethod def validate_comment_count(cls, v: list[CommentData]) -> list[CommentData]: """Ensure maximum 3 comments.""" if len(v) > 3: raise ValueError("Maximum 3 comments allowed") return v model_config = {"from_attributes": True} class HotelData(BaseModel): """Validated hotel configuration data.""" hotel_id: str = Field(..., min_length=1, max_length=50) hotel_name: str = Field(..., min_length=1, max_length=200) username: str = Field(..., min_length=1, max_length=100) password_hash: str = Field(..., min_length=1, max_length=200) meta_account_id: str | None = Field(None, max_length=50) google_account_id: str | None = Field(None, max_length=50) push_endpoint_url: str | None = Field(None, max_length=500) push_endpoint_token: str | None = Field(None, max_length=200) push_endpoint_username: str | None = Field(None, max_length=100) created_at: datetime = Field(default_factory=lambda: datetime.now()) updated_at: datetime = Field(default_factory=lambda: datetime.now()) is_active: bool = Field(default=True) @field_validator("hotel_id", "hotel_name", "username") @classmethod def strip_whitespace(cls, v: str) -> str: """Remove leading/trailing whitespace.""" return v.strip() model_config = {"from_attributes": True} class WebhookEndpointData(BaseModel): """Validated webhook endpoint configuration data.""" hotel_id: str = Field(..., min_length=1, max_length=50) webhook_secret: str = Field(..., min_length=1, max_length=64) webhook_type: str = Field(..., min_length=1, max_length=50) description: str | None = Field(None, max_length=200) is_enabled: bool = Field(default=True) created_at: datetime = Field(default_factory=lambda: datetime.now()) @field_validator("hotel_id", "webhook_secret", "webhook_type") @classmethod def strip_whitespace(cls, v: str) -> str: """Remove leading/trailing whitespace.""" return v.strip() model_config = {"from_attributes": True} class WebhookRequestData(BaseModel): """Validated webhook request data. This model handles the special case where: - payload_json is required for creation (to calculate payload_hash) - payload_json becomes optional after processing (can be purged for privacy/storage) - payload_hash is auto-calculated from payload_json when provided """ # Required fields payload_json: dict[str, Any] | None = Field( ..., description="Webhook payload (required for creation, nullable after purge)" ) # Auto-calculated from payload_json payload_hash: str | None = Field( None, min_length=64, max_length=64, description="SHA256 hash of canonical JSON payload (auto-calculated)", ) # Optional foreign keys webhook_endpoint_id: int | None = Field(None, gt=0) hotel_id: str | None = Field(None, max_length=50) # Processing tracking status: WebhookStatus = Field(default=WebhookStatus.PENDING) processing_started_at: datetime | None = None processing_completed_at: datetime | None = None # Retry handling retry_count: int = Field(default=0, ge=0) last_error: str | None = Field(None, max_length=2000) # Payload metadata purged_at: datetime | None = None # Request metadata created_at: datetime = Field(default_factory=lambda: datetime.now()) source_ip: str | None = Field(None, max_length=45) user_agent: str | None = Field(None, max_length=500) # Result tracking created_customer_id: int | None = Field(None, gt=0) created_reservation_id: int | None = Field(None, gt=0) @model_validator(mode="after") def calculate_payload_hash(self) -> "WebhookRequestData": """Auto-calculate payload_hash from payload_json if not provided. Uses the same hashing algorithm as api.py: - Canonical JSON with sorted keys - UTF-8 encoding - SHA256 hash This runs after all field validation, so we can access the validated payload_json. """ # Only calculate if payload_json is provided and payload_hash is not set if self.payload_json is not None and self.payload_hash is None: # Create canonical JSON string (sorted keys for consistency) payload_json_str = json.dumps(self.payload_json, sort_keys=True) # Calculate SHA256 hash self.payload_hash = hashlib.sha256( payload_json_str.encode("utf-8") ).hexdigest() return self @model_validator(mode="after") def validate_payload_hash_requirements(self) -> "WebhookRequestData": """Ensure payload_hash is present (either provided or calculated). This validator runs after calculate_payload_hash, so payload_hash should be set if payload_json was provided. """ if self.payload_hash is None: raise ValueError( "payload_hash is required. It can be auto-calculated from payload_json " "or explicitly provided." ) return self @field_validator("status", mode="before") @classmethod def normalize_status(cls, v: str | WebhookStatus) -> WebhookStatus: """Normalize status to WebhookStatus enum.""" if isinstance(v, WebhookStatus): return v if isinstance(v, str): return WebhookStatus(v) raise ValueError(f"Invalid webhook status: {v}") model_config = {"from_attributes": True} # Example usage in a service layer class ConversionGuestData(BaseModel): """Validated conversion guest data from PMS XML. Handles validation and hashing for guest records extracted from hotel PMS conversion XML files. """ hotel_id: str = Field(..., min_length=1, max_length=50) guest_id: int = Field(..., gt=0) guest_first_name: str | None = Field(None, max_length=100) guest_last_name: str | None = Field(None, max_length=100) guest_email: str | None = Field(None, max_length=200) guest_country_code: str | None = Field(None, max_length=10) guest_birth_date: date | None = None # Auto-calculated hashed fields hashed_first_name: str | None = Field(None, max_length=64) hashed_last_name: str | None = Field(None, max_length=64) hashed_email: str | None = Field(None, max_length=64) hashed_country_code: str | None = Field(None, max_length=64) hashed_birth_date: str | None = Field(None, max_length=64) # Timestamps first_seen: datetime = Field(default_factory=lambda: datetime.now(UTC)) last_seen: datetime = Field(default_factory=lambda: datetime.now(UTC)) @staticmethod def _normalize_and_hash(value: str | None) -> str | None: """Normalize and hash a value for privacy-preserving matching. Uses the same logic as ConversionGuest._normalize_and_hash. """ if value is None or value == "": return None # Normalize: lowercase, strip whitespace normalized = value.lower().strip() if not normalized: return None # Hash with SHA256 return hashlib.sha256(normalized.encode("utf-8")).hexdigest() @model_validator(mode="after") def calculate_hashes(self) -> "ConversionGuestData": """Auto-calculate hashed fields from plain text fields.""" if self.hashed_first_name is None: self.hashed_first_name = self._normalize_and_hash(self.guest_first_name) if self.hashed_last_name is None: self.hashed_last_name = self._normalize_and_hash(self.guest_last_name) if self.hashed_email is None: self.hashed_email = self._normalize_and_hash(self.guest_email) if self.hashed_country_code is None: self.hashed_country_code = self._normalize_and_hash(self.guest_country_code) if self.hashed_birth_date is None and self.guest_birth_date is not None: self.hashed_birth_date = self._normalize_and_hash( self.guest_birth_date.isoformat() ) return self @field_validator("guest_id", mode="before") @classmethod def convert_guest_id_to_int(cls, v: Any) -> int: """Convert guest_id to integer (handles string input from XML).""" return convert_to_int("guest_id", v) model_config = {"from_attributes": True} class ConversionData(BaseModel): """Validated conversion data from PMS XML. Handles validation for conversion records extracted from hotel PMS conversion XML files. This model ensures proper type conversion and validation before creating a Conversion database entry. """ # Foreign key references (nullable - matched after creation) reservation_id: int | None = Field(None, gt=0) customer_id: int | None = Field(None, gt=0) # Required reservation metadata from PMS hotel_id: str = Field(..., min_length=1, max_length=50) pms_reservation_id: int = Field(..., gt=0) guest_id: int | None = Field(None, gt=0) # Optional reservation metadata reservation_number: str | None = Field(None, max_length=100) reservation_date: date | None = None creation_time: datetime | None = None reservation_type: str | None = Field(None, max_length=50) booking_channel: str | None = Field(None, max_length=100) # Advertising/tracking data (used for matching) advertising_medium: str | None = Field(None, max_length=200) advertising_partner: str | None = Field(None, max_length=200) advertising_campagne: str | None = Field(None, max_length=500) # Attribution flags directly_attributable: bool = Field(default=False) guest_matched: bool = Field(default=False) # Timestamps (auto-managed) created_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) updated_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) @field_validator( "pms_reservation_id", "guest_id", "reservation_id", "customer_id", mode="before" ) @classmethod def convert_int_fields(cls, v: Any) -> int | None: """Convert integer fields from string to int (handles XML input).""" if v is None or v == "": return None # Get the field name from the validation context if available # For now, use a generic name since we handle multiple fields return convert_to_int("field", v) @field_validator("hotel_id", "reservation_number", "reservation_type", "booking_channel", "advertising_medium", "advertising_partner", "advertising_campagne", mode="before") @classmethod def strip_string_fields(cls, v: str | None) -> str | None: """Strip whitespace from string fields.""" if v is None: return None stripped = str(v).strip() return stripped if stripped else None model_config = {"from_attributes": True}