Migration to guest_table for conversion works
This commit is contained in:
@@ -0,0 +1,168 @@
|
||||
"""Add ConversionGuest table and link conversions
|
||||
|
||||
Revision ID: 70b2579d1d96
|
||||
Revises: b33fd7a2da6c
|
||||
Create Date: 2025-11-19 11:56:46.532881
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
import hashlib
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '70b2579d1d96'
|
||||
down_revision: Union[str, Sequence[str], None] = 'b33fd7a2da6c'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def normalize_and_hash(value):
|
||||
"""Normalize and hash a value for ConversionGuest hashed fields."""
|
||||
if not value:
|
||||
return None
|
||||
normalized = str(value).lower().strip()
|
||||
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table('conversion_guests',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('hotel_id', sa.String(), nullable=False),
|
||||
sa.Column('guest_id', sa.String(), nullable=True),
|
||||
sa.Column('guest_first_name', sa.String(), nullable=True),
|
||||
sa.Column('guest_last_name', sa.String(), nullable=True),
|
||||
sa.Column('guest_email', sa.String(), nullable=True),
|
||||
sa.Column('guest_country_code', sa.String(), nullable=True),
|
||||
sa.Column('guest_birth_date', sa.Date(), nullable=True),
|
||||
sa.Column('hashed_first_name', sa.String(length=64), nullable=True),
|
||||
sa.Column('hashed_last_name', sa.String(length=64), nullable=True),
|
||||
sa.Column('hashed_email', sa.String(length=64), nullable=True),
|
||||
sa.Column('hashed_country_code', sa.String(length=64), nullable=True),
|
||||
sa.Column('hashed_birth_date', sa.String(length=64), nullable=True),
|
||||
sa.Column('first_seen', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('last_seen', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index(op.f('ix_conversion_guests_guest_id'), 'conversion_guests', ['guest_id'], unique=False)
|
||||
op.create_index(op.f('ix_conversion_guests_hashed_email'), 'conversion_guests', ['hashed_email'], unique=False)
|
||||
op.create_index(op.f('ix_conversion_guests_hashed_first_name'), 'conversion_guests', ['hashed_first_name'], unique=False)
|
||||
op.create_index(op.f('ix_conversion_guests_hashed_last_name'), 'conversion_guests', ['hashed_last_name'], unique=False)
|
||||
op.create_index(op.f('ix_conversion_guests_hotel_id'), 'conversion_guests', ['hotel_id'], unique=False)
|
||||
op.add_column('conversions', sa.Column('conversion_guest_id', sa.Integer(), nullable=True))
|
||||
op.create_index(op.f('ix_conversions_conversion_guest_id'), 'conversions', ['conversion_guest_id'], unique=False)
|
||||
op.create_foreign_key(None, 'conversions', 'conversion_guests', ['conversion_guest_id'], ['id'])
|
||||
# ### end Alembic commands ###
|
||||
|
||||
# Data migration: Migrate existing conversion guest data to ConversionGuest table
|
||||
connection = op.get_bind()
|
||||
|
||||
# Get all conversions grouped by (hotel_id, guest_id), picking the most recent by creation_time
|
||||
# For guests with NULL guest_id, group by hotel_id only
|
||||
result = connection.execute(sa.text("""
|
||||
SELECT
|
||||
c.hotel_id,
|
||||
c.guest_id,
|
||||
c.guest_first_name,
|
||||
c.guest_last_name,
|
||||
c.guest_email,
|
||||
c.guest_country_code,
|
||||
c.guest_birth_date,
|
||||
c.creation_time,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY c.hotel_id, c.guest_id
|
||||
ORDER BY c.creation_time DESC NULLS LAST
|
||||
) as rn
|
||||
FROM conversions c
|
||||
WHERE c.guest_first_name IS NOT NULL
|
||||
OR c.guest_last_name IS NOT NULL
|
||||
OR c.guest_email IS NOT NULL
|
||||
OR c.guest_country_code IS NOT NULL
|
||||
OR c.guest_birth_date IS NOT NULL
|
||||
"""))
|
||||
|
||||
conversion_guests = {} # Map of (hotel_id, guest_id) -> guest data
|
||||
|
||||
for row in result:
|
||||
hotel_id = row.hotel_id
|
||||
guest_id = row.guest_id
|
||||
|
||||
# Only process the most recent record for each guest
|
||||
if row.rn != 1:
|
||||
continue
|
||||
|
||||
key = (hotel_id, guest_id)
|
||||
if key not in conversion_guests:
|
||||
conversion_guests[key] = {
|
||||
'hotel_id': hotel_id,
|
||||
'guest_id': guest_id,
|
||||
'guest_first_name': row.guest_first_name,
|
||||
'guest_last_name': row.guest_last_name,
|
||||
'guest_email': row.guest_email,
|
||||
'guest_country_code': row.guest_country_code,
|
||||
'guest_birth_date': row.guest_birth_date,
|
||||
'first_seen': row.creation_time,
|
||||
'last_seen': row.creation_time,
|
||||
}
|
||||
|
||||
# Insert conversion guests
|
||||
if conversion_guests:
|
||||
for guest_data in conversion_guests.values():
|
||||
insert_stmt = sa.text("""
|
||||
INSERT INTO conversion_guests
|
||||
(hotel_id, guest_id, guest_first_name, guest_last_name, guest_email,
|
||||
guest_country_code, guest_birth_date, hashed_first_name, hashed_last_name,
|
||||
hashed_email, hashed_country_code, hashed_birth_date, first_seen, last_seen)
|
||||
VALUES
|
||||
(:hotel_id, :guest_id, :guest_first_name, :guest_last_name, :guest_email,
|
||||
:guest_country_code, :guest_birth_date, :hashed_first_name, :hashed_last_name,
|
||||
:hashed_email, :hashed_country_code, :hashed_birth_date, :first_seen, :last_seen)
|
||||
""")
|
||||
|
||||
connection.execute(insert_stmt, {
|
||||
'hotel_id': guest_data['hotel_id'],
|
||||
'guest_id': guest_data['guest_id'],
|
||||
'guest_first_name': guest_data['guest_first_name'],
|
||||
'guest_last_name': guest_data['guest_last_name'],
|
||||
'guest_email': guest_data['guest_email'],
|
||||
'guest_country_code': guest_data['guest_country_code'],
|
||||
'guest_birth_date': guest_data['guest_birth_date'],
|
||||
'hashed_first_name': normalize_and_hash(guest_data['guest_first_name']),
|
||||
'hashed_last_name': normalize_and_hash(guest_data['guest_last_name']),
|
||||
'hashed_email': normalize_and_hash(guest_data['guest_email']),
|
||||
'hashed_country_code': normalize_and_hash(guest_data['guest_country_code']),
|
||||
'hashed_birth_date': normalize_and_hash(
|
||||
guest_data['guest_birth_date'].isoformat() if guest_data['guest_birth_date'] else None
|
||||
),
|
||||
'first_seen': guest_data['first_seen'],
|
||||
'last_seen': guest_data['last_seen'],
|
||||
})
|
||||
|
||||
# Link conversions to conversion_guests based on (hotel_id, guest_id)
|
||||
update_stmt = sa.text("""
|
||||
UPDATE conversions c
|
||||
SET conversion_guest_id = cg.id
|
||||
FROM conversion_guests cg
|
||||
WHERE c.hotel_id = cg.hotel_id
|
||||
AND c.guest_id IS NOT DISTINCT FROM cg.guest_id
|
||||
""")
|
||||
connection.execute(update_stmt)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_constraint(None, 'conversions', type_='foreignkey')
|
||||
op.drop_index(op.f('ix_conversions_conversion_guest_id'), table_name='conversions')
|
||||
op.drop_column('conversions', 'conversion_guest_id')
|
||||
op.drop_index(op.f('ix_conversion_guests_hotel_id'), table_name='conversion_guests')
|
||||
op.drop_index(op.f('ix_conversion_guests_hashed_last_name'), table_name='conversion_guests')
|
||||
op.drop_index(op.f('ix_conversion_guests_hashed_first_name'), table_name='conversion_guests')
|
||||
op.drop_index(op.f('ix_conversion_guests_hashed_email'), table_name='conversion_guests')
|
||||
op.drop_index(op.f('ix_conversion_guests_guest_id'), table_name='conversion_guests')
|
||||
op.drop_table('conversion_guests')
|
||||
# ### end Alembic commands ###
|
||||
Reference in New Issue
Block a user