Migration to single customer table works but conversion_service still needs updating

This commit is contained in:
Jonas Linter
2025-12-03 10:51:18 +01:00
parent b572f660a7
commit 3193ceac63
3 changed files with 188 additions and 96 deletions

View File

@@ -0,0 +1,104 @@
"""merge_hashed_customers_into_customers
Revision ID: 0fbeb40dbb2c
Revises: 694d52a883c3
Create Date: 2025-12-03 10:44:32.243220
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '0fbeb40dbb2c'
down_revision: Union[str, Sequence[str], None] = '694d52a883c3'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
# Add hashed columns to customers table
op.add_column('customers', sa.Column('hashed_email', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_phone', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_given_name', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_surname', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_city', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_postal_code', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_country_code', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_gender', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_birth_date', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('created_at', sa.DateTime(timezone=True), nullable=True))
# Migrate data from hashed_customers to customers
op.execute('''
UPDATE customers c
SET
hashed_email = hc.hashed_email,
hashed_phone = hc.hashed_phone,
hashed_given_name = hc.hashed_given_name,
hashed_surname = hc.hashed_surname,
hashed_city = hc.hashed_city,
hashed_postal_code = hc.hashed_postal_code,
hashed_country_code = hc.hashed_country_code,
hashed_gender = hc.hashed_gender,
hashed_birth_date = hc.hashed_birth_date,
created_at = COALESCE(c.created_at, hc.created_at)
FROM hashed_customers hc
WHERE c.id = hc.customer_id
''')
# Update reservations to point to customers instead of hashed_customers
# First, update reservations.customer_id from reservations.hashed_customer_id
op.execute('''
UPDATE reservations r
SET customer_id = hc.customer_id
FROM hashed_customers hc
WHERE r.hashed_customer_id = hc.id
AND r.customer_id IS NULL
''')
# Update conversions to point to customers instead of hashed_customers
op.execute('''
UPDATE conversions c
SET customer_id = hc.customer_id
FROM hashed_customers hc
WHERE c.hashed_customer_id = hc.id
AND c.customer_id IS NULL
''')
# Update conversion_guests to point to customers instead of hashed_customers
op.execute('''
UPDATE conversion_guests cg
SET hashed_customer_id = NULL
WHERE hashed_customer_id IS NOT NULL
''')
# Now safe to drop the FK and column from reservations
op.drop_constraint(op.f('reservations_hashed_customer_id_fkey'), 'reservations', type_='foreignkey')
op.drop_column('reservations', 'hashed_customer_id')
# Note: We're keeping the hashed_customers table for now since conversion_service.py still uses it
# It can be dropped in a future migration after updating the application code
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('reservations', sa.Column('hashed_customer_id', sa.INTEGER(), autoincrement=False, nullable=True))
op.create_foreign_key(op.f('reservations_hashed_customer_id_fkey'), 'reservations', 'hashed_customers', ['hashed_customer_id'], ['id'], ondelete='CASCADE')
op.drop_column('customers', 'created_at')
op.drop_column('customers', 'hashed_birth_date')
op.drop_column('customers', 'hashed_gender')
op.drop_column('customers', 'hashed_country_code')
op.drop_column('customers', 'hashed_postal_code')
op.drop_column('customers', 'hashed_city')
op.drop_column('customers', 'hashed_surname')
op.drop_column('customers', 'hashed_given_name')
op.drop_column('customers', 'hashed_phone')
op.drop_column('customers', 'hashed_email')
# ### end Alembic commands ###

View File

@@ -53,13 +53,13 @@ class CustomerService:
if "phone" in customer_data: if "phone" in customer_data:
customer.phone = customer_data["phone"] customer.phone = customer_data["phone"]
self.session.add(customer) # Set creation timestamp
await self.session.flush() # Flush to get the customer.id customer.created_at = datetime.now(UTC)
# Create hashed version # Update hashed fields
hashed_customer = customer.create_hashed_customer() customer.update_hashed_fields()
hashed_customer.created_at = datetime.now(UTC)
self.session.add(hashed_customer) self.session.add(customer)
if auto_commit: if auto_commit:
await self.session.commit() await self.session.commit()
@@ -130,29 +130,8 @@ class CustomerService:
if "phone" in update_data: if "phone" in update_data:
customer.phone = update_data["phone"] customer.phone = update_data["phone"]
# Update or create hashed version # Update hashed fields
result = await self.session.execute( customer.update_hashed_fields()
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id)
)
hashed_customer = result.scalar_one_or_none()
if hashed_customer:
# Update existing hashed customer
new_hashed = customer.create_hashed_customer()
hashed_customer.hashed_email = new_hashed.hashed_email
hashed_customer.hashed_phone = new_hashed.hashed_phone
hashed_customer.hashed_given_name = new_hashed.hashed_given_name
hashed_customer.hashed_surname = new_hashed.hashed_surname
hashed_customer.hashed_city = new_hashed.hashed_city
hashed_customer.hashed_postal_code = new_hashed.hashed_postal_code
hashed_customer.hashed_country_code = new_hashed.hashed_country_code
hashed_customer.hashed_gender = new_hashed.hashed_gender
hashed_customer.hashed_birth_date = new_hashed.hashed_birth_date
else:
# Create new hashed customer if it doesn't exist
hashed_customer = customer.create_hashed_customer()
hashed_customer.created_at = datetime.now(UTC)
self.session.add(hashed_customer)
if auto_commit: if auto_commit:
await self.session.commit() await self.session.commit()
@@ -216,10 +195,11 @@ class CustomerService:
return result.scalar_one_or_none() return result.scalar_one_or_none()
async def hash_existing_customers(self) -> int: async def hash_existing_customers(self) -> int:
"""Hash all existing customers that don't have a hashed version yet. """Hash all existing customers that don't have hashed fields populated yet.
This is useful for backfilling hashed data for customers created This is useful for backfilling hashed data for customers created
before the hashing system was implemented. before the hashing system was implemented, or after migrating from
the separate hashed_customers table.
Also validates and sanitizes customer data (e.g., normalizes country Also validates and sanitizes customer data (e.g., normalizes country
codes to uppercase). Customers with invalid data that cannot be fixed codes to uppercase). Customers with invalid data that cannot be fixed
@@ -229,62 +209,64 @@ class CustomerService:
Number of customers that were hashed Number of customers that were hashed
""" """
# Get all customers # Get all customers without hashed data
result = await self.session.execute(select(Customer)) result = await self.session.execute(
select(Customer).where(Customer.hashed_email.is_(None))
)
customers = result.scalars().all() customers = result.scalars().all()
hashed_count = 0 hashed_count = 0
skipped_count = 0 skipped_count = 0
for customer in customers: for customer in customers:
# Check if this customer already has a hashed version # Validate and sanitize customer data before hashing
existing_hashed = await self.get_hashed_customer(customer.id) customer_dict = {
if not existing_hashed: "given_name": customer.given_name,
# Validate and sanitize customer data before hashing "surname": customer.surname,
customer_dict = { "name_prefix": customer.name_prefix,
"given_name": customer.given_name, "email_address": customer.email_address,
"surname": customer.surname, "phone": customer.phone,
"name_prefix": customer.name_prefix, "email_newsletter": customer.email_newsletter,
"email_address": customer.email_address, "address_line": customer.address_line,
"phone": customer.phone, "city_name": customer.city_name,
"email_newsletter": customer.email_newsletter, "postal_code": customer.postal_code,
"address_line": customer.address_line, "country_code": customer.country_code,
"city_name": customer.city_name, "gender": customer.gender,
"postal_code": customer.postal_code, "birth_date": customer.birth_date,
"country_code": customer.country_code, "language": customer.language,
"gender": customer.gender, "address_catalog": customer.address_catalog,
"birth_date": customer.birth_date, "name_title": customer.name_title,
"language": customer.language, }
"address_catalog": customer.address_catalog,
"name_title": customer.name_title,
}
try: try:
# Validate through Pydantic (normalizes country code) # Validate through Pydantic (normalizes country code)
validated = CustomerData(**customer_dict) validated = CustomerData(**customer_dict)
# Update customer with sanitized data # Update customer with sanitized data
# Exclude 'phone_numbers' as Customer model uses 'phone' field # Exclude 'phone_numbers' as Customer model uses 'phone' field
for key, value in validated.model_dump( for key, value in validated.model_dump(
exclude_none=True, exclude={"phone_numbers"} exclude_none=True, exclude={"phone_numbers"}
).items(): ).items():
if hasattr(customer, key): if hasattr(customer, key):
setattr(customer, key, value) setattr(customer, key, value)
# Create hashed version with sanitized data # Update hashed fields with sanitized data
hashed_customer = customer.create_hashed_customer() customer.update_hashed_fields()
hashed_customer.created_at = datetime.now(UTC)
self.session.add(hashed_customer)
hashed_count += 1
except ValidationError as e: # Set created_at if not already set
# Skip customers with invalid data and log if not customer.created_at:
skipped_count += 1 customer.created_at = datetime.now(UTC)
_LOGGER.warning(
"Skipping customer ID %s due to validation error: %s", hashed_count += 1
customer.id,
e, except ValidationError as e:
) # Skip customers with invalid data and log
skipped_count += 1
_LOGGER.warning(
"Skipping customer ID %s due to validation error: %s",
customer.id,
e,
)
if hashed_count > 0: if hashed_count > 0:
await self.session.commit() await self.session.commit()

View File

@@ -311,6 +311,20 @@ class Customer(Base):
language = Column(String) language = Column(String)
address_catalog = Column(Boolean) # Added for XML address_catalog = Column(Boolean) # Added for XML
name_title = Column(String) # Added for XML name_title = Column(String) # Added for XML
# Hashed fields for Meta Conversion API (SHA256)
hashed_email = Column(String(64))
hashed_phone = Column(String(64))
hashed_given_name = Column(String(64))
hashed_surname = Column(String(64))
hashed_city = Column(String(64))
hashed_postal_code = Column(String(64))
hashed_country_code = Column(String(64))
hashed_gender = Column(String(64))
hashed_birth_date = Column(String(64))
created_at = Column(DateTime(timezone=True))
reservations = relationship("Reservation", back_populates="customer") reservations = relationship("Reservation", back_populates="customer")
def __repr__(self): def __repr__(self):
@@ -335,21 +349,17 @@ class Customer(Base):
# SHA256 hash # SHA256 hash
return hashlib.sha256(normalized.encode("utf-8")).hexdigest() return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
def create_hashed_customer(self): def update_hashed_fields(self):
"""Create a HashedCustomer instance from this Customer.""" """Update the hashed fields based on current plaintext values."""
return HashedCustomer( self.hashed_email = self._normalize_and_hash(self.email_address)
customer_id=self.id, self.hashed_phone = self._normalize_and_hash(self.phone)
contact_id=self.contact_id, self.hashed_given_name = self._normalize_and_hash(self.given_name)
hashed_email=self._normalize_and_hash(self.email_address), self.hashed_surname = self._normalize_and_hash(self.surname)
hashed_phone=self._normalize_and_hash(self.phone), self.hashed_city = self._normalize_and_hash(self.city_name)
hashed_given_name=self._normalize_and_hash(self.given_name), self.hashed_postal_code = self._normalize_and_hash(self.postal_code)
hashed_surname=self._normalize_and_hash(self.surname), self.hashed_country_code = self._normalize_and_hash(self.country_code)
hashed_city=self._normalize_and_hash(self.city_name), self.hashed_gender = self._normalize_and_hash(self.gender)
hashed_postal_code=self._normalize_and_hash(self.postal_code), self.hashed_birth_date = self._normalize_and_hash(self.birth_date)
hashed_country_code=self._normalize_and_hash(self.country_code),
hashed_gender=self._normalize_and_hash(self.gender),
hashed_birth_date=self._normalize_and_hash(self.birth_date),
)
class HashedCustomer(Base): class HashedCustomer(Base):
@@ -523,9 +533,6 @@ class Reservation(Base):
__tablename__ = "reservations" __tablename__ = "reservations"
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
customer_id = Column(Integer, ForeignKey("customers.id", ondelete="SET NULL")) customer_id = Column(Integer, ForeignKey("customers.id", ondelete="SET NULL"))
hashed_customer_id = Column(
Integer, ForeignKey("hashed_customers.id", ondelete="CASCADE")
)
unique_id = Column(String, unique=True) unique_id = Column(String, unique=True)
md5_unique_id = Column(String(32), unique=True) # max length 32 guaranteed md5_unique_id = Column(String(32), unique=True) # max length 32 guaranteed
start_date = Column(Date) start_date = Column(Date)
@@ -555,7 +562,6 @@ class Reservation(Base):
room_classification_code = Column(String) room_classification_code = Column(String)
room_type = Column(String) room_type = Column(String)
customer = relationship("Customer", back_populates="reservations") customer = relationship("Customer", back_populates="reservations")
hashed_customer = relationship("HashedCustomer", backref="reservations")
# Table for tracking acknowledged requests by client # Table for tracking acknowledged requests by client