More refactoring

This commit is contained in:
Jonas Linter
2025-11-19 16:25:18 +01:00
parent 1396135208
commit d81ba79256
9 changed files with 301001 additions and 340 deletions

View File

@@ -23,6 +23,7 @@ def upgrade() -> None:
# Drop existing tables to start with a clean slate
# Drop conversion_rooms first due to foreign key dependency
op.execute("DROP TABLE IF EXISTS conversion_rooms CASCADE")
op.execute("DROP TABLE IF EXISTS conversion_guests CASCADE")
op.execute("DROP TABLE IF EXISTS conversions CASCADE")
print("dropped existing conversion tables")

View File

@@ -1,66 +0,0 @@
"""Added birth_date, storing revenue as number
Revision ID: b33fd7a2da6c
Revises: 630b0c367dcb
Create Date: 2025-11-18 14:41:17.567595
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'b33fd7a2da6c'
down_revision: Union[str, Sequence[str], None] = '630b0c367dcb'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
# Convert VARCHAR to Double with explicit CAST for PostgreSQL compatibility
# PostgreSQL requires USING clause for type conversion
connection = op.get_bind()
if connection.dialect.name == 'postgresql':
op.execute(
"ALTER TABLE conversion_rooms "
"ALTER COLUMN total_revenue TYPE DOUBLE PRECISION "
"USING total_revenue::DOUBLE PRECISION"
)
else:
# For SQLite and other databases, use standard alter_column
op.alter_column('conversion_rooms', 'total_revenue',
existing_type=sa.VARCHAR(),
type_=sa.Double(),
existing_nullable=True)
op.add_column('conversions', sa.Column('guest_birth_date', sa.Date(), nullable=True))
op.add_column('conversions', sa.Column('guest_id', sa.String(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('conversions', 'guest_id')
op.drop_column('conversions', 'guest_birth_date')
# Convert Double back to VARCHAR with explicit CAST for PostgreSQL compatibility
connection = op.get_bind()
if connection.dialect.name == 'postgresql':
op.execute(
"ALTER TABLE conversion_rooms "
"ALTER COLUMN total_revenue TYPE VARCHAR "
"USING total_revenue::VARCHAR"
)
else:
# For SQLite and other databases, use standard alter_column
op.alter_column('conversion_rooms', 'total_revenue',
existing_type=sa.Double(),
type_=sa.VARCHAR(),
existing_nullable=True)
# ### end Alembic commands ###

View File

@@ -0,0 +1,284 @@
"""Update conversions schema with new attribution fields and composite key for guests.
Revision ID: a2b3c4d5e6f7
Revises: 630b0c367dcb
Create Date: 2025-11-19 00:00:00.000000
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "a2b3c4d5e6f7"
down_revision: str | Sequence[str] | None = "630b0c367dcb"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Upgrade schema."""
# Drop existing conversion tables to migrate to new schema
# Drop conversion_rooms first due to foreign key dependency
op.execute("DROP TABLE IF EXISTS conversion_rooms CASCADE")
op.execute("DROP TABLE IF EXISTS conversions CASCADE")
op.execute("DROP TABLE IF EXISTS conversion_guests CASCADE")
# Create conversion_guests table with composite primary key (hotel_id, guest_id)
op.create_table(
"conversion_guests",
sa.Column("hotel_id", sa.String(), nullable=False, primary_key=True),
sa.Column("guest_id", sa.String(), nullable=False, primary_key=True),
sa.Column("guest_first_name", sa.String(), nullable=True),
sa.Column("guest_last_name", sa.String(), nullable=True),
sa.Column("guest_email", sa.String(), nullable=True),
sa.Column("guest_country_code", sa.String(), nullable=True),
sa.Column("guest_birth_date", sa.Date(), nullable=True),
sa.Column("hashed_first_name", sa.String(64), nullable=True),
sa.Column("hashed_last_name", sa.String(64), nullable=True),
sa.Column("hashed_email", sa.String(64), nullable=True),
sa.Column("hashed_country_code", sa.String(64), nullable=True),
sa.Column("hashed_birth_date", sa.String(64), nullable=True),
sa.Column("is_regular", sa.Boolean(), default=False, nullable=False),
sa.Column("first_seen", sa.DateTime(timezone=True), nullable=True),
sa.Column("last_seen", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("hotel_id", "guest_id"),
)
op.create_index(
op.f("ix_conversion_guests_hotel_id"),
"conversion_guests",
["hotel_id"],
unique=False,
)
op.create_index(
op.f("ix_conversion_guests_guest_id"),
"conversion_guests",
["guest_id"],
unique=False,
)
op.create_index(
op.f("ix_conversion_guests_hashed_first_name"),
"conversion_guests",
["hashed_first_name"],
unique=False,
)
op.create_index(
op.f("ix_conversion_guests_hashed_last_name"),
"conversion_guests",
["hashed_last_name"],
unique=False,
)
op.create_index(
op.f("ix_conversion_guests_hashed_email"),
"conversion_guests",
["hashed_email"],
unique=False,
)
# Create conversions table with new schema
op.create_table(
"conversions",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("reservation_id", sa.Integer(), nullable=True),
sa.Column("customer_id", sa.Integer(), nullable=True),
sa.Column("hashed_customer_id", sa.Integer(), nullable=True),
sa.Column("hotel_id", sa.String(), nullable=True),
sa.Column("guest_id", sa.String(), nullable=True),
sa.Column("pms_reservation_id", sa.String(), nullable=True),
sa.Column("reservation_number", sa.String(), nullable=True),
sa.Column("reservation_date", sa.Date(), nullable=True),
sa.Column("creation_time", sa.DateTime(timezone=True), nullable=True),
sa.Column("reservation_type", sa.String(), nullable=True),
sa.Column("booking_channel", sa.String(), nullable=True),
sa.Column("advertising_medium", sa.String(), nullable=True),
sa.Column("advertising_partner", sa.String(), nullable=True),
sa.Column("advertising_campagne", sa.String(), nullable=True),
sa.Column("directly_attributable", sa.Boolean(), default=False, nullable=False),
sa.Column("guest_matched", sa.Boolean(), default=False, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(["reservation_id"], ["reservations.id"]),
sa.ForeignKeyConstraint(["customer_id"], ["customers.id"]),
sa.ForeignKeyConstraint(["hashed_customer_id"], ["hashed_customers.id"]),
sa.ForeignKeyConstraint(
["hotel_id", "guest_id"],
["conversion_guests.hotel_id", "conversion_guests.guest_id"],
ondelete="SET NULL",
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_conversions_advertising_campagne"),
"conversions",
["advertising_campagne"],
unique=False,
)
op.create_index(
op.f("ix_conversions_advertising_medium"),
"conversions",
["advertising_medium"],
unique=False,
)
op.create_index(
op.f("ix_conversions_advertising_partner"),
"conversions",
["advertising_partner"],
unique=False,
)
op.create_index(
op.f("ix_conversions_customer_id"),
"conversions",
["customer_id"],
unique=False,
)
op.create_index(
op.f("ix_conversions_hashed_customer_id"),
"conversions",
["hashed_customer_id"],
unique=False,
)
op.create_index(
op.f("ix_conversions_hotel_id"),
"conversions",
["hotel_id"],
unique=False,
)
op.create_index(
op.f("ix_conversions_guest_id"),
"conversions",
["guest_id"],
unique=False,
)
op.create_index(
op.f("ix_conversions_pms_reservation_id"),
"conversions",
["pms_reservation_id"],
unique=False,
)
op.create_index(
op.f("ix_conversions_reservation_id"),
"conversions",
["reservation_id"],
unique=False,
)
# Create conversion_rooms table
op.create_table(
"conversion_rooms",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("conversion_id", sa.Integer(), nullable=False),
sa.Column("pms_hotel_reservation_id", sa.String(), nullable=True),
sa.Column("arrival_date", sa.Date(), nullable=True),
sa.Column("departure_date", sa.Date(), nullable=True),
sa.Column("room_status", sa.String(), nullable=True),
sa.Column("room_type", sa.String(), nullable=True),
sa.Column("room_number", sa.String(), nullable=True),
sa.Column("num_adults", sa.Integer(), nullable=True),
sa.Column("rate_plan_code", sa.String(), nullable=True),
sa.Column("connected_room_type", sa.String(), nullable=True),
sa.Column("daily_sales", sa.JSON(), nullable=True),
sa.Column("total_revenue", sa.Double(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(["conversion_id"], ["conversions.id"]),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_conversion_rooms_arrival_date"),
"conversion_rooms",
["arrival_date"],
unique=False,
)
op.create_index(
op.f("ix_conversion_rooms_conversion_id"),
"conversion_rooms",
["conversion_id"],
unique=False,
)
op.create_index(
op.f("ix_conversion_rooms_departure_date"),
"conversion_rooms",
["departure_date"],
unique=False,
)
op.create_index(
op.f("ix_conversion_rooms_pms_hotel_reservation_id"),
"conversion_rooms",
["pms_hotel_reservation_id"],
unique=False,
)
op.create_index(
op.f("ix_conversion_rooms_room_number"),
"conversion_rooms",
["room_number"],
unique=False,
)
def downgrade() -> None:
"""Downgrade schema."""
op.drop_index(
op.f("ix_conversion_rooms_room_number"), table_name="conversion_rooms"
)
op.drop_index(
op.f("ix_conversion_rooms_pms_hotel_reservation_id"),
table_name="conversion_rooms",
)
op.drop_index(
op.f("ix_conversion_rooms_departure_date"), table_name="conversion_rooms"
)
op.drop_index(
op.f("ix_conversion_rooms_conversion_id"), table_name="conversion_rooms"
)
op.drop_index(
op.f("ix_conversion_rooms_arrival_date"), table_name="conversion_rooms"
)
op.drop_table("conversion_rooms")
op.drop_index(
op.f("ix_conversions_reservation_id"), table_name="conversions"
)
op.drop_index(
op.f("ix_conversions_pms_reservation_id"), table_name="conversions"
)
op.drop_index(
op.f("ix_conversions_guest_id"), table_name="conversions"
)
op.drop_index(
op.f("ix_conversions_hotel_id"), table_name="conversions"
)
op.drop_index(
op.f("ix_conversions_hashed_customer_id"), table_name="conversions"
)
op.drop_index(
op.f("ix_conversions_customer_id"), table_name="conversions"
)
op.drop_index(
op.f("ix_conversions_advertising_partner"), table_name="conversions"
)
op.drop_index(
op.f("ix_conversions_advertising_medium"), table_name="conversions"
)
op.drop_index(
op.f("ix_conversions_advertising_campagne"), table_name="conversions"
)
op.drop_table("conversions")
op.drop_index(
op.f("ix_conversion_guests_hashed_email"), table_name="conversion_guests"
)
op.drop_index(
op.f("ix_conversion_guests_hashed_last_name"), table_name="conversion_guests"
)
op.drop_index(
op.f("ix_conversion_guests_hashed_first_name"), table_name="conversion_guests"
)
op.drop_index(
op.f("ix_conversion_guests_guest_id"), table_name="conversion_guests"
)
op.drop_index(
op.f("ix_conversion_guests_hotel_id"), table_name="conversion_guests"
)
op.drop_table("conversion_guests")

View File

@@ -1,168 +0,0 @@
"""Add ConversionGuest table and link conversions
Revision ID: 70b2579d1d96
Revises: b33fd7a2da6c
Create Date: 2025-11-19 11:56:46.532881
"""
from typing import Sequence, Union
import hashlib
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '70b2579d1d96'
down_revision: Union[str, Sequence[str], None] = 'b33fd7a2da6c'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def normalize_and_hash(value):
"""Normalize and hash a value for ConversionGuest hashed fields."""
if not value:
return None
normalized = str(value).lower().strip()
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('conversion_guests',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('hotel_id', sa.String(), nullable=False),
sa.Column('guest_id', sa.String(), nullable=True),
sa.Column('guest_first_name', sa.String(), nullable=True),
sa.Column('guest_last_name', sa.String(), nullable=True),
sa.Column('guest_email', sa.String(), nullable=True),
sa.Column('guest_country_code', sa.String(), nullable=True),
sa.Column('guest_birth_date', sa.Date(), nullable=True),
sa.Column('hashed_first_name', sa.String(length=64), nullable=True),
sa.Column('hashed_last_name', sa.String(length=64), nullable=True),
sa.Column('hashed_email', sa.String(length=64), nullable=True),
sa.Column('hashed_country_code', sa.String(length=64), nullable=True),
sa.Column('hashed_birth_date', sa.String(length=64), nullable=True),
sa.Column('first_seen', sa.DateTime(timezone=True), nullable=True),
sa.Column('last_seen', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_conversion_guests_guest_id'), 'conversion_guests', ['guest_id'], unique=False)
op.create_index(op.f('ix_conversion_guests_hashed_email'), 'conversion_guests', ['hashed_email'], unique=False)
op.create_index(op.f('ix_conversion_guests_hashed_first_name'), 'conversion_guests', ['hashed_first_name'], unique=False)
op.create_index(op.f('ix_conversion_guests_hashed_last_name'), 'conversion_guests', ['hashed_last_name'], unique=False)
op.create_index(op.f('ix_conversion_guests_hotel_id'), 'conversion_guests', ['hotel_id'], unique=False)
op.add_column('conversions', sa.Column('conversion_guest_id', sa.Integer(), nullable=True))
op.create_index(op.f('ix_conversions_conversion_guest_id'), 'conversions', ['conversion_guest_id'], unique=False)
op.create_foreign_key(None, 'conversions', 'conversion_guests', ['conversion_guest_id'], ['id'])
# ### end Alembic commands ###
# Data migration: Migrate existing conversion guest data to ConversionGuest table
connection = op.get_bind()
# Get all conversions grouped by (hotel_id, guest_id), picking the most recent by creation_time
# For guests with NULL guest_id, group by hotel_id only
result = connection.execute(sa.text("""
SELECT
c.hotel_id,
c.guest_id,
c.guest_first_name,
c.guest_last_name,
c.guest_email,
c.guest_country_code,
c.guest_birth_date,
c.creation_time,
ROW_NUMBER() OVER (
PARTITION BY c.hotel_id, c.guest_id
ORDER BY c.creation_time DESC NULLS LAST
) as rn
FROM conversions c
WHERE c.guest_first_name IS NOT NULL
OR c.guest_last_name IS NOT NULL
OR c.guest_email IS NOT NULL
OR c.guest_country_code IS NOT NULL
OR c.guest_birth_date IS NOT NULL
"""))
conversion_guests = {} # Map of (hotel_id, guest_id) -> guest data
for row in result:
hotel_id = row.hotel_id
guest_id = row.guest_id
# Only process the most recent record for each guest
if row.rn != 1:
continue
key = (hotel_id, guest_id)
if key not in conversion_guests:
conversion_guests[key] = {
'hotel_id': hotel_id,
'guest_id': guest_id,
'guest_first_name': row.guest_first_name,
'guest_last_name': row.guest_last_name,
'guest_email': row.guest_email,
'guest_country_code': row.guest_country_code,
'guest_birth_date': row.guest_birth_date,
'first_seen': row.creation_time,
'last_seen': row.creation_time,
}
# Insert conversion guests
if conversion_guests:
for guest_data in conversion_guests.values():
insert_stmt = sa.text("""
INSERT INTO conversion_guests
(hotel_id, guest_id, guest_first_name, guest_last_name, guest_email,
guest_country_code, guest_birth_date, hashed_first_name, hashed_last_name,
hashed_email, hashed_country_code, hashed_birth_date, first_seen, last_seen)
VALUES
(:hotel_id, :guest_id, :guest_first_name, :guest_last_name, :guest_email,
:guest_country_code, :guest_birth_date, :hashed_first_name, :hashed_last_name,
:hashed_email, :hashed_country_code, :hashed_birth_date, :first_seen, :last_seen)
""")
connection.execute(insert_stmt, {
'hotel_id': guest_data['hotel_id'],
'guest_id': guest_data['guest_id'],
'guest_first_name': guest_data['guest_first_name'],
'guest_last_name': guest_data['guest_last_name'],
'guest_email': guest_data['guest_email'],
'guest_country_code': guest_data['guest_country_code'],
'guest_birth_date': guest_data['guest_birth_date'],
'hashed_first_name': normalize_and_hash(guest_data['guest_first_name']),
'hashed_last_name': normalize_and_hash(guest_data['guest_last_name']),
'hashed_email': normalize_and_hash(guest_data['guest_email']),
'hashed_country_code': normalize_and_hash(guest_data['guest_country_code']),
'hashed_birth_date': normalize_and_hash(
guest_data['guest_birth_date'].isoformat() if guest_data['guest_birth_date'] else None
),
'first_seen': guest_data['first_seen'],
'last_seen': guest_data['last_seen'],
})
# Link conversions to conversion_guests based on (hotel_id, guest_id)
update_stmt = sa.text("""
UPDATE conversions c
SET conversion_guest_id = cg.id
FROM conversion_guests cg
WHERE c.hotel_id = cg.hotel_id
AND c.guest_id IS NOT DISTINCT FROM cg.guest_id
""")
connection.execute(update_stmt)
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(None, 'conversions', type_='foreignkey')
op.drop_index(op.f('ix_conversions_conversion_guest_id'), table_name='conversions')
op.drop_column('conversions', 'conversion_guest_id')
op.drop_index(op.f('ix_conversion_guests_hotel_id'), table_name='conversion_guests')
op.drop_index(op.f('ix_conversion_guests_hashed_last_name'), table_name='conversion_guests')
op.drop_index(op.f('ix_conversion_guests_hashed_first_name'), table_name='conversion_guests')
op.drop_index(op.f('ix_conversion_guests_hashed_email'), table_name='conversion_guests')
op.drop_index(op.f('ix_conversion_guests_guest_id'), table_name='conversion_guests')
op.drop_table('conversion_guests')
# ### end Alembic commands ###

View File

@@ -13,7 +13,7 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '08fe946414d8'
down_revision: Union[str, Sequence[str], None] = '70b2579d1d96'
down_revision: Union[str, Sequence[str], None] = 'a2b3c4d5e6f7'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None