Migration to single customer table works but conversion_service still needs updating

This commit is contained in:
Jonas Linter
2025-12-03 10:51:18 +01:00
parent 08f85d1b26
commit 34cb2131c4
3 changed files with 188 additions and 96 deletions

View File

@@ -0,0 +1,104 @@
"""merge_hashed_customers_into_customers
Revision ID: 0fbeb40dbb2c
Revises: 694d52a883c3
Create Date: 2025-12-03 10:44:32.243220
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '0fbeb40dbb2c'
down_revision: Union[str, Sequence[str], None] = '694d52a883c3'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
# Add hashed columns to customers table
op.add_column('customers', sa.Column('hashed_email', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_phone', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_given_name', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_surname', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_city', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_postal_code', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_country_code', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_gender', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('hashed_birth_date', sa.String(length=64), nullable=True))
op.add_column('customers', sa.Column('created_at', sa.DateTime(timezone=True), nullable=True))
# Migrate data from hashed_customers to customers
op.execute('''
UPDATE customers c
SET
hashed_email = hc.hashed_email,
hashed_phone = hc.hashed_phone,
hashed_given_name = hc.hashed_given_name,
hashed_surname = hc.hashed_surname,
hashed_city = hc.hashed_city,
hashed_postal_code = hc.hashed_postal_code,
hashed_country_code = hc.hashed_country_code,
hashed_gender = hc.hashed_gender,
hashed_birth_date = hc.hashed_birth_date,
created_at = COALESCE(c.created_at, hc.created_at)
FROM hashed_customers hc
WHERE c.id = hc.customer_id
''')
# Update reservations to point to customers instead of hashed_customers
# First, update reservations.customer_id from reservations.hashed_customer_id
op.execute('''
UPDATE reservations r
SET customer_id = hc.customer_id
FROM hashed_customers hc
WHERE r.hashed_customer_id = hc.id
AND r.customer_id IS NULL
''')
# Update conversions to point to customers instead of hashed_customers
op.execute('''
UPDATE conversions c
SET customer_id = hc.customer_id
FROM hashed_customers hc
WHERE c.hashed_customer_id = hc.id
AND c.customer_id IS NULL
''')
# Update conversion_guests to point to customers instead of hashed_customers
op.execute('''
UPDATE conversion_guests cg
SET hashed_customer_id = NULL
WHERE hashed_customer_id IS NOT NULL
''')
# Now safe to drop the FK and column from reservations
op.drop_constraint(op.f('reservations_hashed_customer_id_fkey'), 'reservations', type_='foreignkey')
op.drop_column('reservations', 'hashed_customer_id')
# Note: We're keeping the hashed_customers table for now since conversion_service.py still uses it
# It can be dropped in a future migration after updating the application code
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('reservations', sa.Column('hashed_customer_id', sa.INTEGER(), autoincrement=False, nullable=True))
op.create_foreign_key(op.f('reservations_hashed_customer_id_fkey'), 'reservations', 'hashed_customers', ['hashed_customer_id'], ['id'], ondelete='CASCADE')
op.drop_column('customers', 'created_at')
op.drop_column('customers', 'hashed_birth_date')
op.drop_column('customers', 'hashed_gender')
op.drop_column('customers', 'hashed_country_code')
op.drop_column('customers', 'hashed_postal_code')
op.drop_column('customers', 'hashed_city')
op.drop_column('customers', 'hashed_surname')
op.drop_column('customers', 'hashed_given_name')
op.drop_column('customers', 'hashed_phone')
op.drop_column('customers', 'hashed_email')
# ### end Alembic commands ###

View File

@@ -53,13 +53,13 @@ class CustomerService:
if "phone" in customer_data:
customer.phone = customer_data["phone"]
self.session.add(customer)
await self.session.flush() # Flush to get the customer.id
# Set creation timestamp
customer.created_at = datetime.now(UTC)
# Create hashed version
hashed_customer = customer.create_hashed_customer()
hashed_customer.created_at = datetime.now(UTC)
self.session.add(hashed_customer)
# Update hashed fields
customer.update_hashed_fields()
self.session.add(customer)
if auto_commit:
await self.session.commit()
@@ -130,29 +130,8 @@ class CustomerService:
if "phone" in update_data:
customer.phone = update_data["phone"]
# Update or create hashed version
result = await self.session.execute(
select(HashedCustomer).where(HashedCustomer.customer_id == customer.id)
)
hashed_customer = result.scalar_one_or_none()
if hashed_customer:
# Update existing hashed customer
new_hashed = customer.create_hashed_customer()
hashed_customer.hashed_email = new_hashed.hashed_email
hashed_customer.hashed_phone = new_hashed.hashed_phone
hashed_customer.hashed_given_name = new_hashed.hashed_given_name
hashed_customer.hashed_surname = new_hashed.hashed_surname
hashed_customer.hashed_city = new_hashed.hashed_city
hashed_customer.hashed_postal_code = new_hashed.hashed_postal_code
hashed_customer.hashed_country_code = new_hashed.hashed_country_code
hashed_customer.hashed_gender = new_hashed.hashed_gender
hashed_customer.hashed_birth_date = new_hashed.hashed_birth_date
else:
# Create new hashed customer if it doesn't exist
hashed_customer = customer.create_hashed_customer()
hashed_customer.created_at = datetime.now(UTC)
self.session.add(hashed_customer)
# Update hashed fields
customer.update_hashed_fields()
if auto_commit:
await self.session.commit()
@@ -216,10 +195,11 @@ class CustomerService:
return result.scalar_one_or_none()
async def hash_existing_customers(self) -> int:
"""Hash all existing customers that don't have a hashed version yet.
"""Hash all existing customers that don't have hashed fields populated yet.
This is useful for backfilling hashed data for customers created
before the hashing system was implemented.
before the hashing system was implemented, or after migrating from
the separate hashed_customers table.
Also validates and sanitizes customer data (e.g., normalizes country
codes to uppercase). Customers with invalid data that cannot be fixed
@@ -229,62 +209,64 @@ class CustomerService:
Number of customers that were hashed
"""
# Get all customers
result = await self.session.execute(select(Customer))
# Get all customers without hashed data
result = await self.session.execute(
select(Customer).where(Customer.hashed_email.is_(None))
)
customers = result.scalars().all()
hashed_count = 0
skipped_count = 0
for customer in customers:
# Check if this customer already has a hashed version
existing_hashed = await self.get_hashed_customer(customer.id)
if not existing_hashed:
# Validate and sanitize customer data before hashing
customer_dict = {
"given_name": customer.given_name,
"surname": customer.surname,
"name_prefix": customer.name_prefix,
"email_address": customer.email_address,
"phone": customer.phone,
"email_newsletter": customer.email_newsletter,
"address_line": customer.address_line,
"city_name": customer.city_name,
"postal_code": customer.postal_code,
"country_code": customer.country_code,
"gender": customer.gender,
"birth_date": customer.birth_date,
"language": customer.language,
"address_catalog": customer.address_catalog,
"name_title": customer.name_title,
}
# Validate and sanitize customer data before hashing
customer_dict = {
"given_name": customer.given_name,
"surname": customer.surname,
"name_prefix": customer.name_prefix,
"email_address": customer.email_address,
"phone": customer.phone,
"email_newsletter": customer.email_newsletter,
"address_line": customer.address_line,
"city_name": customer.city_name,
"postal_code": customer.postal_code,
"country_code": customer.country_code,
"gender": customer.gender,
"birth_date": customer.birth_date,
"language": customer.language,
"address_catalog": customer.address_catalog,
"name_title": customer.name_title,
}
try:
# Validate through Pydantic (normalizes country code)
validated = CustomerData(**customer_dict)
try:
# Validate through Pydantic (normalizes country code)
validated = CustomerData(**customer_dict)
# Update customer with sanitized data
# Exclude 'phone_numbers' as Customer model uses 'phone' field
for key, value in validated.model_dump(
exclude_none=True, exclude={"phone_numbers"}
).items():
if hasattr(customer, key):
setattr(customer, key, value)
# Update customer with sanitized data
# Exclude 'phone_numbers' as Customer model uses 'phone' field
for key, value in validated.model_dump(
exclude_none=True, exclude={"phone_numbers"}
).items():
if hasattr(customer, key):
setattr(customer, key, value)
# Create hashed version with sanitized data
hashed_customer = customer.create_hashed_customer()
hashed_customer.created_at = datetime.now(UTC)
self.session.add(hashed_customer)
hashed_count += 1
# Update hashed fields with sanitized data
customer.update_hashed_fields()
except ValidationError as e:
# Skip customers with invalid data and log
skipped_count += 1
_LOGGER.warning(
"Skipping customer ID %s due to validation error: %s",
customer.id,
e,
)
# Set created_at if not already set
if not customer.created_at:
customer.created_at = datetime.now(UTC)
hashed_count += 1
except ValidationError as e:
# Skip customers with invalid data and log
skipped_count += 1
_LOGGER.warning(
"Skipping customer ID %s due to validation error: %s",
customer.id,
e,
)
if hashed_count > 0:
await self.session.commit()

View File

@@ -311,6 +311,20 @@ class Customer(Base):
language = Column(String)
address_catalog = Column(Boolean) # Added for XML
name_title = Column(String) # Added for XML
# Hashed fields for Meta Conversion API (SHA256)
hashed_email = Column(String(64))
hashed_phone = Column(String(64))
hashed_given_name = Column(String(64))
hashed_surname = Column(String(64))
hashed_city = Column(String(64))
hashed_postal_code = Column(String(64))
hashed_country_code = Column(String(64))
hashed_gender = Column(String(64))
hashed_birth_date = Column(String(64))
created_at = Column(DateTime(timezone=True))
reservations = relationship("Reservation", back_populates="customer")
def __repr__(self):
@@ -335,21 +349,17 @@ class Customer(Base):
# SHA256 hash
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
def create_hashed_customer(self):
"""Create a HashedCustomer instance from this Customer."""
return HashedCustomer(
customer_id=self.id,
contact_id=self.contact_id,
hashed_email=self._normalize_and_hash(self.email_address),
hashed_phone=self._normalize_and_hash(self.phone),
hashed_given_name=self._normalize_and_hash(self.given_name),
hashed_surname=self._normalize_and_hash(self.surname),
hashed_city=self._normalize_and_hash(self.city_name),
hashed_postal_code=self._normalize_and_hash(self.postal_code),
hashed_country_code=self._normalize_and_hash(self.country_code),
hashed_gender=self._normalize_and_hash(self.gender),
hashed_birth_date=self._normalize_and_hash(self.birth_date),
)
def update_hashed_fields(self):
"""Update the hashed fields based on current plaintext values."""
self.hashed_email = self._normalize_and_hash(self.email_address)
self.hashed_phone = self._normalize_and_hash(self.phone)
self.hashed_given_name = self._normalize_and_hash(self.given_name)
self.hashed_surname = self._normalize_and_hash(self.surname)
self.hashed_city = self._normalize_and_hash(self.city_name)
self.hashed_postal_code = self._normalize_and_hash(self.postal_code)
self.hashed_country_code = self._normalize_and_hash(self.country_code)
self.hashed_gender = self._normalize_and_hash(self.gender)
self.hashed_birth_date = self._normalize_and_hash(self.birth_date)
class HashedCustomer(Base):
@@ -523,9 +533,6 @@ class Reservation(Base):
__tablename__ = "reservations"
id = Column(Integer, primary_key=True)
customer_id = Column(Integer, ForeignKey("customers.id", ondelete="SET NULL"))
hashed_customer_id = Column(
Integer, ForeignKey("hashed_customers.id", ondelete="CASCADE")
)
unique_id = Column(String, unique=True)
md5_unique_id = Column(String(32), unique=True) # max length 32 guaranteed
start_date = Column(Date)
@@ -555,7 +562,6 @@ class Reservation(Base):
room_classification_code = Column(String)
room_type = Column(String)
customer = relationship("Customer", back_populates="reservations")
hashed_customer = relationship("HashedCustomer", backref="reservations")
# Table for tracking acknowledged requests by client