Migration to guest_table for conversion works

This commit is contained in:
Jonas Linter
2025-11-19 12:05:38 +01:00
parent 55c4b0b9de
commit a087a312a7
4 changed files with 43096 additions and 1 deletions

View File

@@ -0,0 +1,168 @@
"""Add ConversionGuest table and link conversions
Revision ID: 70b2579d1d96
Revises: b33fd7a2da6c
Create Date: 2025-11-19 11:56:46.532881
"""
from typing import Sequence, Union
import hashlib
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '70b2579d1d96'
down_revision: Union[str, Sequence[str], None] = 'b33fd7a2da6c'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def normalize_and_hash(value):
"""Normalize and hash a value for ConversionGuest hashed fields."""
if not value:
return None
normalized = str(value).lower().strip()
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('conversion_guests',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('hotel_id', sa.String(), nullable=False),
sa.Column('guest_id', sa.String(), nullable=True),
sa.Column('guest_first_name', sa.String(), nullable=True),
sa.Column('guest_last_name', sa.String(), nullable=True),
sa.Column('guest_email', sa.String(), nullable=True),
sa.Column('guest_country_code', sa.String(), nullable=True),
sa.Column('guest_birth_date', sa.Date(), nullable=True),
sa.Column('hashed_first_name', sa.String(length=64), nullable=True),
sa.Column('hashed_last_name', sa.String(length=64), nullable=True),
sa.Column('hashed_email', sa.String(length=64), nullable=True),
sa.Column('hashed_country_code', sa.String(length=64), nullable=True),
sa.Column('hashed_birth_date', sa.String(length=64), nullable=True),
sa.Column('first_seen', sa.DateTime(timezone=True), nullable=True),
sa.Column('last_seen', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_conversion_guests_guest_id'), 'conversion_guests', ['guest_id'], unique=False)
op.create_index(op.f('ix_conversion_guests_hashed_email'), 'conversion_guests', ['hashed_email'], unique=False)
op.create_index(op.f('ix_conversion_guests_hashed_first_name'), 'conversion_guests', ['hashed_first_name'], unique=False)
op.create_index(op.f('ix_conversion_guests_hashed_last_name'), 'conversion_guests', ['hashed_last_name'], unique=False)
op.create_index(op.f('ix_conversion_guests_hotel_id'), 'conversion_guests', ['hotel_id'], unique=False)
op.add_column('conversions', sa.Column('conversion_guest_id', sa.Integer(), nullable=True))
op.create_index(op.f('ix_conversions_conversion_guest_id'), 'conversions', ['conversion_guest_id'], unique=False)
op.create_foreign_key(None, 'conversions', 'conversion_guests', ['conversion_guest_id'], ['id'])
# ### end Alembic commands ###
# Data migration: Migrate existing conversion guest data to ConversionGuest table
connection = op.get_bind()
# Get all conversions grouped by (hotel_id, guest_id), picking the most recent by creation_time
# For guests with NULL guest_id, group by hotel_id only
result = connection.execute(sa.text("""
SELECT
c.hotel_id,
c.guest_id,
c.guest_first_name,
c.guest_last_name,
c.guest_email,
c.guest_country_code,
c.guest_birth_date,
c.creation_time,
ROW_NUMBER() OVER (
PARTITION BY c.hotel_id, c.guest_id
ORDER BY c.creation_time DESC NULLS LAST
) as rn
FROM conversions c
WHERE c.guest_first_name IS NOT NULL
OR c.guest_last_name IS NOT NULL
OR c.guest_email IS NOT NULL
OR c.guest_country_code IS NOT NULL
OR c.guest_birth_date IS NOT NULL
"""))
conversion_guests = {} # Map of (hotel_id, guest_id) -> guest data
for row in result:
hotel_id = row.hotel_id
guest_id = row.guest_id
# Only process the most recent record for each guest
if row.rn != 1:
continue
key = (hotel_id, guest_id)
if key not in conversion_guests:
conversion_guests[key] = {
'hotel_id': hotel_id,
'guest_id': guest_id,
'guest_first_name': row.guest_first_name,
'guest_last_name': row.guest_last_name,
'guest_email': row.guest_email,
'guest_country_code': row.guest_country_code,
'guest_birth_date': row.guest_birth_date,
'first_seen': row.creation_time,
'last_seen': row.creation_time,
}
# Insert conversion guests
if conversion_guests:
for guest_data in conversion_guests.values():
insert_stmt = sa.text("""
INSERT INTO conversion_guests
(hotel_id, guest_id, guest_first_name, guest_last_name, guest_email,
guest_country_code, guest_birth_date, hashed_first_name, hashed_last_name,
hashed_email, hashed_country_code, hashed_birth_date, first_seen, last_seen)
VALUES
(:hotel_id, :guest_id, :guest_first_name, :guest_last_name, :guest_email,
:guest_country_code, :guest_birth_date, :hashed_first_name, :hashed_last_name,
:hashed_email, :hashed_country_code, :hashed_birth_date, :first_seen, :last_seen)
""")
connection.execute(insert_stmt, {
'hotel_id': guest_data['hotel_id'],
'guest_id': guest_data['guest_id'],
'guest_first_name': guest_data['guest_first_name'],
'guest_last_name': guest_data['guest_last_name'],
'guest_email': guest_data['guest_email'],
'guest_country_code': guest_data['guest_country_code'],
'guest_birth_date': guest_data['guest_birth_date'],
'hashed_first_name': normalize_and_hash(guest_data['guest_first_name']),
'hashed_last_name': normalize_and_hash(guest_data['guest_last_name']),
'hashed_email': normalize_and_hash(guest_data['guest_email']),
'hashed_country_code': normalize_and_hash(guest_data['guest_country_code']),
'hashed_birth_date': normalize_and_hash(
guest_data['guest_birth_date'].isoformat() if guest_data['guest_birth_date'] else None
),
'first_seen': guest_data['first_seen'],
'last_seen': guest_data['last_seen'],
})
# Link conversions to conversion_guests based on (hotel_id, guest_id)
update_stmt = sa.text("""
UPDATE conversions c
SET conversion_guest_id = cg.id
FROM conversion_guests cg
WHERE c.hotel_id = cg.hotel_id
AND c.guest_id IS NOT DISTINCT FROM cg.guest_id
""")
connection.execute(update_stmt)
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(None, 'conversions', type_='foreignkey')
op.drop_index(op.f('ix_conversions_conversion_guest_id'), table_name='conversions')
op.drop_column('conversions', 'conversion_guest_id')
op.drop_index(op.f('ix_conversion_guests_hotel_id'), table_name='conversion_guests')
op.drop_index(op.f('ix_conversion_guests_hashed_last_name'), table_name='conversion_guests')
op.drop_index(op.f('ix_conversion_guests_hashed_first_name'), table_name='conversion_guests')
op.drop_index(op.f('ix_conversion_guests_hashed_email'), table_name='conversion_guests')
op.drop_index(op.f('ix_conversion_guests_guest_id'), table_name='conversion_guests')
op.drop_table('conversion_guests')
# ### end Alembic commands ###

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
import asyncio
import xml.etree.ElementTree as ET
from datetime import datetime
from datetime import UTC, datetime
from decimal import Decimal
from typing import Any
@@ -12,6 +12,7 @@ from sqlalchemy.orm import selectinload
from .db import (
Conversion,
ConversionGuest,
ConversionRoom,
Customer,
HashedCustomer,
@@ -73,6 +74,70 @@ class ConversionService:
f"session must be AsyncSession or SessionMaker, got {type(session)}"
)
async def _get_or_create_conversion_guest(
self,
hotel_id: str,
guest_id: str | None,
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
guest_country_code: str | None,
guest_birth_date,
session: AsyncSession,
) -> ConversionGuest | None:
"""Get or create a ConversionGuest record for the given guest data.
Uses (hotel_id, guest_id) as the natural key to identify a guest.
If a guest with this key exists, updates it with new data.
If not, creates a new guest record.
Returns the ConversionGuest record, or None if no guest data provided.
"""
# Don't create a ConversionGuest if we have no guest information
if not any(
[guest_first_name, guest_last_name, guest_email, guest_country_code, guest_birth_date]
):
return None
now = datetime.now(UTC)
# Try to find existing guest by (hotel_id, guest_id)
if guest_id:
result = await session.execute(
select(ConversionGuest).where(
(ConversionGuest.hotel_id == hotel_id)
& (ConversionGuest.guest_id == guest_id)
)
)
existing_guest = result.scalar_one_or_none()
if existing_guest:
# Update with new data
existing_guest.update_from_conversion_data(
guest_first_name,
guest_last_name,
guest_email,
guest_country_code,
guest_birth_date,
now,
)
return existing_guest
# Create new ConversionGuest
new_guest = ConversionGuest.create_from_conversion_data(
hotel_id=hotel_id,
guest_id=guest_id,
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
guest_birth_date=guest_birth_date,
now=now,
)
session.add(new_guest)
await session.flush() # Ensure the guest has an ID
return new_guest
async def process_conversion_xml(self, xml_content: str) -> dict[str, Any]:
"""Parse conversion XML and save daily sales data to database.
@@ -525,6 +590,20 @@ class ConversionService:
# Flush to ensure conversion has an ID before creating room reservations
await session.flush()
# Create or update ConversionGuest and link it to the conversion
conversion_guest = await self._get_or_create_conversion_guest(
hotel_id=hotel_id,
guest_id=guest_id,
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
guest_birth_date=guest_birth_date,
session=session,
)
if conversion_guest:
conversion.conversion_guest_id = conversion_guest.id
# Update stats for the conversion record itself
if matched_reservation:
stats["matched_to_reservation"] += 1

View File

@@ -364,6 +364,116 @@ class HashedCustomer(Base):
customer = relationship("Customer", backref="hashed_version")
class ConversionGuest(Base):
"""Guest information from hotel PMS conversions, with hashed fields for privacy.
Stores both unhashed (for reference during transition) and hashed (SHA256 per Meta API)
versions of guest PII. Multiple conversions can reference the same guest if they have
the same hotel_id and guest_id (PMS guest identifier).
When multiple conversions for the same guest arrive with different guest info,
the most recent (by creation_time) data is kept as the canonical version.
"""
__tablename__ = "conversion_guests"
id = Column(Integer, primary_key=True)
# Natural keys from PMS (composite unique constraint)
hotel_id = Column(String, nullable=False, index=True)
guest_id = Column(String, index=True) # PMS guest ID (nullable for unidentified guests)
# Unhashed guest information (for reference/transition period)
guest_first_name = Column(String)
guest_last_name = Column(String)
guest_email = Column(String)
guest_country_code = Column(String)
guest_birth_date = Column(Date)
# Hashed guest information (SHA256, for privacy compliance)
hashed_first_name = Column(String(64), index=True)
hashed_last_name = Column(String(64), index=True)
hashed_email = Column(String(64), index=True)
hashed_country_code = Column(String(64))
hashed_birth_date = Column(String(64))
# Metadata
first_seen = Column(DateTime(timezone=True))
last_seen = Column(DateTime(timezone=True))
# Relationships
conversions = relationship("Conversion", back_populates="guest")
@staticmethod
def _normalize_and_hash(value):
"""Normalize and hash a value according to Meta Conversion API requirements."""
if not value:
return None
# Normalize: lowercase, strip whitespace
normalized = str(value).lower().strip()
# SHA256 hash
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
@classmethod
def create_from_conversion_data(
cls,
hotel_id: str,
guest_id: str | None,
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
guest_country_code: str | None,
guest_birth_date: Date | None,
now: DateTime,
):
"""Create a ConversionGuest from conversion guest data."""
return cls(
hotel_id=hotel_id,
guest_id=guest_id,
guest_first_name=guest_first_name,
guest_last_name=guest_last_name,
guest_email=guest_email,
guest_country_code=guest_country_code,
guest_birth_date=guest_birth_date,
hashed_first_name=cls._normalize_and_hash(guest_first_name),
hashed_last_name=cls._normalize_and_hash(guest_last_name),
hashed_email=cls._normalize_and_hash(guest_email),
hashed_country_code=cls._normalize_and_hash(guest_country_code),
hashed_birth_date=cls._normalize_and_hash(
guest_birth_date.isoformat() if guest_birth_date else None
),
first_seen=now,
last_seen=now,
)
def update_from_conversion_data(
self,
guest_first_name: str | None,
guest_last_name: str | None,
guest_email: str | None,
guest_country_code: str | None,
guest_birth_date: Date | None,
now: DateTime,
):
"""Update ConversionGuest with newer guest data, preferring non-null values."""
# Only update if new data is provided (not null)
if guest_first_name:
self.guest_first_name = guest_first_name
self.hashed_first_name = self._normalize_and_hash(guest_first_name)
if guest_last_name:
self.guest_last_name = guest_last_name
self.hashed_last_name = self._normalize_and_hash(guest_last_name)
if guest_email:
self.guest_email = guest_email
self.hashed_email = self._normalize_and_hash(guest_email)
if guest_country_code:
self.guest_country_code = guest_country_code
self.hashed_country_code = self._normalize_and_hash(guest_country_code)
if guest_birth_date:
self.guest_birth_date = guest_birth_date
self.hashed_birth_date = self._normalize_and_hash(guest_birth_date.isoformat())
self.last_seen = now
class Reservation(Base):
__tablename__ = "reservations"
id = Column(Integer, primary_key=True)
@@ -445,6 +555,9 @@ class Conversion(Base):
hashed_customer_id = Column(
Integer, ForeignKey("hashed_customers.id"), nullable=True, index=True
)
conversion_guest_id = Column(
Integer, ForeignKey("conversion_guests.id"), nullable=True, index=True
)
# Reservation metadata from XML
hotel_id = Column(String, index=True) # hotelID attribute
@@ -482,6 +595,7 @@ class Conversion(Base):
reservation = relationship("Reservation", backref="conversions")
customer = relationship("Customer", backref="conversions")
hashed_customer = relationship("HashedCustomer", backref="conversions")
guest = relationship("ConversionGuest", back_populates="conversions")
conversion_rooms = relationship(
"ConversionRoom", back_populates="conversion", cascade="all, delete-orphan"
)