diff --git a/alembic/versions/2025_12_03_1044-0fbeb40dbb2c_merge_hashed_customers_into_customers.py b/alembic/versions/2025_12_03_1044-0fbeb40dbb2c_merge_hashed_customers_into_customers.py new file mode 100644 index 0000000..a9f2382 --- /dev/null +++ b/alembic/versions/2025_12_03_1044-0fbeb40dbb2c_merge_hashed_customers_into_customers.py @@ -0,0 +1,104 @@ +"""merge_hashed_customers_into_customers + +Revision ID: 0fbeb40dbb2c +Revises: 694d52a883c3 +Create Date: 2025-12-03 10:44:32.243220 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '0fbeb40dbb2c' +down_revision: Union[str, Sequence[str], None] = '694d52a883c3' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + # Add hashed columns to customers table + op.add_column('customers', sa.Column('hashed_email', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('hashed_phone', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('hashed_given_name', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('hashed_surname', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('hashed_city', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('hashed_postal_code', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('hashed_country_code', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('hashed_gender', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('hashed_birth_date', sa.String(length=64), nullable=True)) + op.add_column('customers', sa.Column('created_at', sa.DateTime(timezone=True), nullable=True)) + + # Migrate data from hashed_customers to customers + op.execute(''' + UPDATE customers c + SET + hashed_email = hc.hashed_email, + hashed_phone = hc.hashed_phone, + hashed_given_name = hc.hashed_given_name, + hashed_surname = hc.hashed_surname, + hashed_city = hc.hashed_city, + hashed_postal_code = hc.hashed_postal_code, + hashed_country_code = hc.hashed_country_code, + hashed_gender = hc.hashed_gender, + hashed_birth_date = hc.hashed_birth_date, + created_at = COALESCE(c.created_at, hc.created_at) + FROM hashed_customers hc + WHERE c.id = hc.customer_id + ''') + + # Update reservations to point to customers instead of hashed_customers + # First, update reservations.customer_id from reservations.hashed_customer_id + op.execute(''' + UPDATE reservations r + SET customer_id = hc.customer_id + FROM hashed_customers hc + WHERE r.hashed_customer_id = hc.id + AND r.customer_id IS NULL + ''') + + # Update conversions to point to customers instead of hashed_customers + op.execute(''' + UPDATE conversions c + SET customer_id = hc.customer_id + FROM hashed_customers hc + WHERE c.hashed_customer_id = hc.id + AND c.customer_id IS NULL + ''') + + # Update conversion_guests to point to customers instead of hashed_customers + op.execute(''' + UPDATE conversion_guests cg + SET hashed_customer_id = NULL + WHERE hashed_customer_id IS NOT NULL + ''') + + # Now safe to drop the FK and column from reservations + op.drop_constraint(op.f('reservations_hashed_customer_id_fkey'), 'reservations', type_='foreignkey') + op.drop_column('reservations', 'hashed_customer_id') + + # Note: We're keeping the hashed_customers table for now since conversion_service.py still uses it + # It can be dropped in a future migration after updating the application code + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('reservations', sa.Column('hashed_customer_id', sa.INTEGER(), autoincrement=False, nullable=True)) + op.create_foreign_key(op.f('reservations_hashed_customer_id_fkey'), 'reservations', 'hashed_customers', ['hashed_customer_id'], ['id'], ondelete='CASCADE') + op.drop_column('customers', 'created_at') + op.drop_column('customers', 'hashed_birth_date') + op.drop_column('customers', 'hashed_gender') + op.drop_column('customers', 'hashed_country_code') + op.drop_column('customers', 'hashed_postal_code') + op.drop_column('customers', 'hashed_city') + op.drop_column('customers', 'hashed_surname') + op.drop_column('customers', 'hashed_given_name') + op.drop_column('customers', 'hashed_phone') + op.drop_column('customers', 'hashed_email') + # ### end Alembic commands ### diff --git a/src/alpine_bits_python/customer_service.py b/src/alpine_bits_python/customer_service.py index 5fa8d4c..ff73a83 100644 --- a/src/alpine_bits_python/customer_service.py +++ b/src/alpine_bits_python/customer_service.py @@ -53,13 +53,13 @@ class CustomerService: if "phone" in customer_data: customer.phone = customer_data["phone"] - self.session.add(customer) - await self.session.flush() # Flush to get the customer.id + # Set creation timestamp + customer.created_at = datetime.now(UTC) - # Create hashed version - hashed_customer = customer.create_hashed_customer() - hashed_customer.created_at = datetime.now(UTC) - self.session.add(hashed_customer) + # Update hashed fields + customer.update_hashed_fields() + + self.session.add(customer) if auto_commit: await self.session.commit() @@ -130,29 +130,8 @@ class CustomerService: if "phone" in update_data: customer.phone = update_data["phone"] - # Update or create hashed version - result = await self.session.execute( - select(HashedCustomer).where(HashedCustomer.customer_id == customer.id) - ) - hashed_customer = result.scalar_one_or_none() - - if hashed_customer: - # Update existing hashed customer - new_hashed = customer.create_hashed_customer() - hashed_customer.hashed_email = new_hashed.hashed_email - hashed_customer.hashed_phone = new_hashed.hashed_phone - hashed_customer.hashed_given_name = new_hashed.hashed_given_name - hashed_customer.hashed_surname = new_hashed.hashed_surname - hashed_customer.hashed_city = new_hashed.hashed_city - hashed_customer.hashed_postal_code = new_hashed.hashed_postal_code - hashed_customer.hashed_country_code = new_hashed.hashed_country_code - hashed_customer.hashed_gender = new_hashed.hashed_gender - hashed_customer.hashed_birth_date = new_hashed.hashed_birth_date - else: - # Create new hashed customer if it doesn't exist - hashed_customer = customer.create_hashed_customer() - hashed_customer.created_at = datetime.now(UTC) - self.session.add(hashed_customer) + # Update hashed fields + customer.update_hashed_fields() if auto_commit: await self.session.commit() @@ -216,10 +195,11 @@ class CustomerService: return result.scalar_one_or_none() async def hash_existing_customers(self) -> int: - """Hash all existing customers that don't have a hashed version yet. + """Hash all existing customers that don't have hashed fields populated yet. This is useful for backfilling hashed data for customers created - before the hashing system was implemented. + before the hashing system was implemented, or after migrating from + the separate hashed_customers table. Also validates and sanitizes customer data (e.g., normalizes country codes to uppercase). Customers with invalid data that cannot be fixed @@ -229,62 +209,64 @@ class CustomerService: Number of customers that were hashed """ - # Get all customers - result = await self.session.execute(select(Customer)) + # Get all customers without hashed data + result = await self.session.execute( + select(Customer).where(Customer.hashed_email.is_(None)) + ) customers = result.scalars().all() hashed_count = 0 skipped_count = 0 for customer in customers: - # Check if this customer already has a hashed version - existing_hashed = await self.get_hashed_customer(customer.id) - if not existing_hashed: - # Validate and sanitize customer data before hashing - customer_dict = { - "given_name": customer.given_name, - "surname": customer.surname, - "name_prefix": customer.name_prefix, - "email_address": customer.email_address, - "phone": customer.phone, - "email_newsletter": customer.email_newsletter, - "address_line": customer.address_line, - "city_name": customer.city_name, - "postal_code": customer.postal_code, - "country_code": customer.country_code, - "gender": customer.gender, - "birth_date": customer.birth_date, - "language": customer.language, - "address_catalog": customer.address_catalog, - "name_title": customer.name_title, - } + # Validate and sanitize customer data before hashing + customer_dict = { + "given_name": customer.given_name, + "surname": customer.surname, + "name_prefix": customer.name_prefix, + "email_address": customer.email_address, + "phone": customer.phone, + "email_newsletter": customer.email_newsletter, + "address_line": customer.address_line, + "city_name": customer.city_name, + "postal_code": customer.postal_code, + "country_code": customer.country_code, + "gender": customer.gender, + "birth_date": customer.birth_date, + "language": customer.language, + "address_catalog": customer.address_catalog, + "name_title": customer.name_title, + } - try: - # Validate through Pydantic (normalizes country code) - validated = CustomerData(**customer_dict) + try: + # Validate through Pydantic (normalizes country code) + validated = CustomerData(**customer_dict) - # Update customer with sanitized data - # Exclude 'phone_numbers' as Customer model uses 'phone' field - for key, value in validated.model_dump( - exclude_none=True, exclude={"phone_numbers"} - ).items(): - if hasattr(customer, key): - setattr(customer, key, value) + # Update customer with sanitized data + # Exclude 'phone_numbers' as Customer model uses 'phone' field + for key, value in validated.model_dump( + exclude_none=True, exclude={"phone_numbers"} + ).items(): + if hasattr(customer, key): + setattr(customer, key, value) - # Create hashed version with sanitized data - hashed_customer = customer.create_hashed_customer() - hashed_customer.created_at = datetime.now(UTC) - self.session.add(hashed_customer) - hashed_count += 1 + # Update hashed fields with sanitized data + customer.update_hashed_fields() - except ValidationError as e: - # Skip customers with invalid data and log - skipped_count += 1 - _LOGGER.warning( - "Skipping customer ID %s due to validation error: %s", - customer.id, - e, - ) + # Set created_at if not already set + if not customer.created_at: + customer.created_at = datetime.now(UTC) + + hashed_count += 1 + + except ValidationError as e: + # Skip customers with invalid data and log + skipped_count += 1 + _LOGGER.warning( + "Skipping customer ID %s due to validation error: %s", + customer.id, + e, + ) if hashed_count > 0: await self.session.commit() diff --git a/src/alpine_bits_python/db.py b/src/alpine_bits_python/db.py index f367e8c..303e4bc 100644 --- a/src/alpine_bits_python/db.py +++ b/src/alpine_bits_python/db.py @@ -311,6 +311,20 @@ class Customer(Base): language = Column(String) address_catalog = Column(Boolean) # Added for XML name_title = Column(String) # Added for XML + + # Hashed fields for Meta Conversion API (SHA256) + hashed_email = Column(String(64)) + hashed_phone = Column(String(64)) + hashed_given_name = Column(String(64)) + hashed_surname = Column(String(64)) + hashed_city = Column(String(64)) + hashed_postal_code = Column(String(64)) + hashed_country_code = Column(String(64)) + hashed_gender = Column(String(64)) + hashed_birth_date = Column(String(64)) + + created_at = Column(DateTime(timezone=True)) + reservations = relationship("Reservation", back_populates="customer") def __repr__(self): @@ -335,21 +349,17 @@ class Customer(Base): # SHA256 hash return hashlib.sha256(normalized.encode("utf-8")).hexdigest() - def create_hashed_customer(self): - """Create a HashedCustomer instance from this Customer.""" - return HashedCustomer( - customer_id=self.id, - contact_id=self.contact_id, - hashed_email=self._normalize_and_hash(self.email_address), - hashed_phone=self._normalize_and_hash(self.phone), - hashed_given_name=self._normalize_and_hash(self.given_name), - hashed_surname=self._normalize_and_hash(self.surname), - hashed_city=self._normalize_and_hash(self.city_name), - hashed_postal_code=self._normalize_and_hash(self.postal_code), - hashed_country_code=self._normalize_and_hash(self.country_code), - hashed_gender=self._normalize_and_hash(self.gender), - hashed_birth_date=self._normalize_and_hash(self.birth_date), - ) + def update_hashed_fields(self): + """Update the hashed fields based on current plaintext values.""" + self.hashed_email = self._normalize_and_hash(self.email_address) + self.hashed_phone = self._normalize_and_hash(self.phone) + self.hashed_given_name = self._normalize_and_hash(self.given_name) + self.hashed_surname = self._normalize_and_hash(self.surname) + self.hashed_city = self._normalize_and_hash(self.city_name) + self.hashed_postal_code = self._normalize_and_hash(self.postal_code) + self.hashed_country_code = self._normalize_and_hash(self.country_code) + self.hashed_gender = self._normalize_and_hash(self.gender) + self.hashed_birth_date = self._normalize_and_hash(self.birth_date) class HashedCustomer(Base): @@ -523,9 +533,6 @@ class Reservation(Base): __tablename__ = "reservations" id = Column(Integer, primary_key=True) customer_id = Column(Integer, ForeignKey("customers.id", ondelete="SET NULL")) - hashed_customer_id = Column( - Integer, ForeignKey("hashed_customers.id", ondelete="CASCADE") - ) unique_id = Column(String, unique=True) md5_unique_id = Column(String(32), unique=True) # max length 32 guaranteed start_date = Column(Date) @@ -555,7 +562,6 @@ class Reservation(Base): room_classification_code = Column(String) room_type = Column(String) customer = relationship("Customer", back_populates="reservations") - hashed_customer = relationship("HashedCustomer", backref="reservations") # Table for tracking acknowledged requests by client