diff --git a/pyproject.toml b/pyproject.toml index c4c348f..0a5d0f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,12 @@ dependencies = [ "sqlalchemy[asyncio]>=2.0.44", ] +[project.optional-dependencies] +test = [ + "pytest>=8.0.0", + "pytest-asyncio>=0.25.0", +] + [project.scripts] meta-auth = "meta_api_grabber.auth:main" meta-scheduled = "meta_api_grabber.scheduled_grabber:main" diff --git a/src/meta_api_grabber/scheduled_grabber.py b/src/meta_api_grabber/scheduled_grabber.py index 557e3f4..de4fcf6 100644 --- a/src/meta_api_grabber/scheduled_grabber.py +++ b/src/meta_api_grabber/scheduled_grabber.py @@ -26,6 +26,23 @@ from .view_manager import ViewManager logger = logging.getLogger(__name__) +common_fields = [ + AdsInsights.Field.impressions, + AdsInsights.Field.clicks, + AdsInsights.Field.spend, + AdsInsights.Field.cpc, + AdsInsights.Field.cpm, + AdsInsights.Field.ctr, + AdsInsights.Field.cpp, + AdsInsights.Field.reach, + AdsInsights.Field.frequency, + AdsInsights.Field.actions, + AdsInsights.Field.cost_per_action_type, + AdsInsights.Field.date_start, + AdsInsights.Field.date_stop, + ] + + class ScheduledInsightsGrabber: """ Scheduled grabber for Meta ad insights with TimescaleDB storage. @@ -410,6 +427,120 @@ class ScheduledInsightsGrabber: print(f" {count} ad sets cached for {account_id}") + async def _master_grab_insights( + self, + account_id: str, + fields: list, + level: str, + db_insert_func, + date_preset: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + breakdowns: Optional[list] = None, + limit: Optional[int] = None, + cache_metadata: bool = False, + required_fields: Optional[dict] = None, + extra_data_processor=None, + ) -> tuple[int, Optional[date]]: + """ + Master method to grab and store insights at any level. + + Args: + account_id: Ad account ID + fields: List of AdsInsights fields to retrieve + level: Insights level ("account", "campaign", "adset", etc.) + db_insert_func: Database insert function to call for each insight + date_preset: Meta date preset (e.g., "today", "yesterday"). Use either this or start_date/end_date + start_date: Start date for custom date range (optional) + end_date: End date for custom date range (optional) + breakdowns: List of breakdown fields (optional) + limit: Maximum number of results (optional) + cache_metadata: Whether to cache metadata (for campaign/adset levels) + required_fields: Dict of field_name -> label for validation before insert + extra_data_processor: Optional callable to process/add extra data to insight_dict + + Returns: + Tuple of (count of records stored, date_start from insights) + """ + # Build params + params = {"level": level} + + if date_preset: + params["date_preset"] = date_preset + date_preset_for_db = date_preset + else: + # Use time_range for custom date ranges + params["time_range"] = { + "since": start_date.isoformat(), + "until": end_date.isoformat(), + } + params["time_increment"] = 1 # Daily breakdown + date_preset_for_db = "custom" + + if breakdowns: + params["breakdowns"] = breakdowns + + if limit: + params["limit"] = limit + + # Fetch insights from Meta API + ad_account = AdAccount(account_id) + try: + insights = await self._rate_limited_request( + ad_account.get_insights, + fields=fields, + params=params, + ) + except FacebookRequestError as e: + error_code = e.api_error_code() + if error_code in [190, 102]: # Invalid OAuth token errors + raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}") + raise + + # Get account timezone from database + account_timezone = await self._get_account_timezone(account_id) + + # Store insights + count = 0 + date_start_value = None + + for insight in insights: + insight_dict = dict(insight) + + # Extract date_start if available (for return value) + date_start_str = insight_dict.get("date_start") + if date_start_str and date_start_value is None: + date_start_value = date.fromisoformat(date_start_str) + + # Check required fields before processing + if required_fields: + skip = False + for field_name, field_label in required_fields.items(): + if not insight_dict.get(field_name): + skip = True + break + if skip: + continue + + # Call extra processor if provided + if extra_data_processor: + extra_data_processor(insight_dict) + + # Compute appropriate timestamp based on date_start and account timezone + timestamp = self._compute_timestamp(date_start_str, account_timezone) + + # Call the appropriate database insert function + await db_insert_func( + time=timestamp, + account_id=account_id, + data=insight_dict, + date_preset=date_preset_for_db, + cache_metadata=cache_metadata, + ) + count += 1 + + return count, date_start_value + async def grab_account_insights(self, account_id: str, date_preset: str = "today") -> Optional[date]: """ Grab and store account-level insights. @@ -437,50 +568,13 @@ class ScheduledInsightsGrabber: AdsInsights.Field.date_stop, ] - params = { - "date_preset": date_preset, - "level": "account", - } - - ad_account = AdAccount(account_id) - try: - insights = await self._rate_limited_request( - ad_account.get_insights, - fields=fields, - params=params, - ) - except FacebookRequestError as e: - # Check if it's a token error - error_code = e.api_error_code() - if error_code in [190, 102]: # Invalid OAuth token errors - raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}") - raise - - # Get account timezone from database - account_timezone = await self._get_account_timezone(account_id) - - # Store insights - count = 0 - date_start_value = None - - for insight in insights: - insight_dict = dict(insight) - - # Extract date_start if available - date_start_str = insight_dict.get("date_start") - if date_start_str and date_start_value is None: - date_start_value = date.fromisoformat(date_start_str) - - # Compute appropriate timestamp based on date_start and account timezone - timestamp = self._compute_timestamp(date_start_str, account_timezone) - - await self.db.insert_account_insights( - time=timestamp, - account_id=account_id, - data=insight_dict, - date_preset=date_preset, - ) - count += 1 + count, date_start_value = await self._master_grab_insights( + account_id=account_id, + fields=fields, + level="account", + db_insert_func=self.db.insert_account_insights, + date_preset=date_preset, + ) print(f" Account insights stored for {account_id} ({count} records, date: {date_start_value})") return date_start_value @@ -494,64 +588,23 @@ class ScheduledInsightsGrabber: date_preset: Meta date preset limit: Maximum number of campaigns """ - fields = [ + + fields = common_fields + [ AdsInsights.Field.campaign_id, AdsInsights.Field.campaign_name, - AdsInsights.Field.impressions, - AdsInsights.Field.clicks, - AdsInsights.Field.spend, - AdsInsights.Field.ctr, - AdsInsights.Field.cpc, - AdsInsights.Field.cpm, - AdsInsights.Field.reach, - AdsInsights.Field.actions, - AdsInsights.Field.date_start, - AdsInsights.Field.date_stop, ] - params = { - "date_preset": date_preset, - "level": "campaign", - "limit": limit, - } - ad_account = AdAccount(account_id) - try: - insights = await self._rate_limited_request( - ad_account.get_insights, - fields=fields, - params=params, - ) - except FacebookRequestError as e: - error_code = e.api_error_code() - if error_code in [190, 102]: - raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}") - raise - - # Get account timezone from database - account_timezone = await self._get_account_timezone(account_id) - - # Store insights (metadata is automatically cached from insights data) - count = 0 - for insight in insights: - campaign_id = insight.get('campaign_id') - if campaign_id: - insight_dict = dict(insight) - - # Compute appropriate timestamp based on date_preset and account timezone - date_start_str = insight_dict.get("date_start") - timestamp = self._compute_timestamp(date_start_str, account_timezone) - - # Insert insights - metadata is automatically cached from the insights data - await self.db.insert_campaign_insights( - time=timestamp, - campaign_id=campaign_id, - account_id=account_id, - data=insight_dict, - date_preset=date_preset, - cache_metadata=True, # Automatically cache campaign name from insights - ) - count += 1 + count, _ = await self._master_grab_insights( + account_id=account_id, + fields=fields, + level="campaign", + db_insert_func=self.db.insert_campaign_insights, + date_preset=date_preset, + limit=limit, + cache_metadata=True, + required_fields={"campaign_id": "campaign_id"}, + ) print(f" Campaign insights stored for {account_id} ({count} records)") @@ -580,51 +633,16 @@ class ScheduledInsightsGrabber: AdsInsights.Field.date_stop, ] - params = { - "date_preset": date_preset, - "level": "adset", - "limit": limit, - } - - ad_account = AdAccount(account_id) - try: - insights = await self._rate_limited_request( - ad_account.get_insights, - fields=fields, - params=params, - ) - except FacebookRequestError as e: - error_code = e.api_error_code() - if error_code in [190, 102]: - raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}") - raise - - # Get account timezone from database - account_timezone = await self._get_account_timezone(account_id) - - # Store insights (metadata is automatically cached from insights data) - count = 0 - for insight in insights: - adset_id = insight.get('adset_id') - campaign_id = insight.get('campaign_id') - if adset_id and campaign_id: - insight_dict = dict(insight) - - # Compute appropriate timestamp based on date_preset and account timezone - date_start_str = insight_dict.get("date_start") - timestamp = self._compute_timestamp(date_start_str, account_timezone) - - # Insert insights - metadata is automatically cached from the insights data - await self.db.insert_adset_insights( - time=timestamp, - adset_id=adset_id, - campaign_id=campaign_id, - account_id=account_id, - data=insight_dict, - date_preset=date_preset, - cache_metadata=True, # Automatically cache adset/campaign from insights - ) - count += 1 + count, _ = await self._master_grab_insights( + account_id=account_id, + fields=fields, + level="adset", + db_insert_func=self.db.insert_adset_insights, + date_preset=date_preset, + limit=limit, + cache_metadata=True, + required_fields={"adset_id": "adset_id", "campaign_id": "campaign_id"}, + ) print(f" Ad set insights stored for {account_id} ({count} records)") @@ -637,66 +655,21 @@ class ScheduledInsightsGrabber: date_preset: Meta date preset limit: Maximum number of campaigns """ - fields = [ + fields = common_fields + [ AdsInsights.Field.campaign_id, AdsInsights.Field.campaign_name, - AdsInsights.Field.impressions, - AdsInsights.Field.clicks, - AdsInsights.Field.spend, - AdsInsights.Field.ctr, - AdsInsights.Field.cpc, - AdsInsights.Field.cpm, - AdsInsights.Field.reach, - AdsInsights.Field.actions, - AdsInsights.Field.date_start, - AdsInsights.Field.date_stop, ] - params = { - "date_preset": date_preset, - "level": "campaign", - "breakdowns": [AdsInsights.Breakdowns.country], - "limit": limit, - } - - ad_account = AdAccount(account_id) - try: - insights = await self._rate_limited_request( - ad_account.get_insights, - fields=fields, - params=params, - ) - except FacebookRequestError as e: - error_code = e.api_error_code() - if error_code in [190, 102]: - raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}") - raise - - # Get account timezone from database - account_timezone = await self._get_account_timezone(account_id) - - # Store insights (metadata is automatically cached from insights data) - count = 0 - for insight in insights: - campaign_id = insight.get('campaign_id') - country = insight.get('country') - if campaign_id and country: - insight_dict = dict(insight) - - # Compute appropriate timestamp based on date_preset and account timezone - date_start_str = insight_dict.get("date_start") - timestamp = self._compute_timestamp(date_start_str, account_timezone) - - # Insert insights - metadata is automatically cached from the insights data - await self.db.insert_campaign_insights_by_country( - time=timestamp, - campaign_id=campaign_id, - account_id=account_id, - country=country, - data=insight_dict, - date_preset=date_preset, - ) - count += 1 + count, _ = await self._master_grab_insights( + account_id=account_id, + fields=fields, + level="campaign", + db_insert_func=self.db.insert_campaign_insights_by_country, + date_preset=date_preset, + breakdowns=[AdsInsights.Breakdowns.country], + limit=limit, + required_fields={"campaign_id": "campaign_id", "country": "country"}, + ) print(f" Campaign insights by country stored for {account_id} ({count} records)") @@ -733,48 +706,14 @@ class ScheduledInsightsGrabber: AdsInsights.Field.date_stop, ] - # Use time_range instead of date_preset for custom date ranges - params = { - "time_range": { - "since": start_date.isoformat(), - "until": end_date.isoformat(), - }, - "level": "account", - "time_increment": 1, # Daily breakdown - } - - ad_account = AdAccount(account_id) - try: - insights = await self._rate_limited_request( - ad_account.get_insights, - fields=fields, - params=params, - ) - except FacebookRequestError as e: - error_code = e.api_error_code() - if error_code in [190, 102]: - raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}") - raise - - # Get account timezone from database - account_timezone = await self._get_account_timezone(account_id) - - # Store insights - count = 0 - for insight in insights: - insight_dict = dict(insight) - - # Compute appropriate timestamp based on date_start and account timezone - date_start_str = insight_dict.get("date_start") - timestamp = self._compute_timestamp(date_start_str, account_timezone) - - await self.db.insert_account_insights( - time=timestamp, - account_id=account_id, - data=insight_dict, - date_preset="custom", # Indicate this was a custom date range - ) - count += 1 + count, _ = await self._master_grab_insights( + account_id=account_id, + fields=fields, + level="account", + db_insert_func=self.db.insert_account_insights, + start_date=start_date, + end_date=end_date, + ) return count @@ -812,51 +751,17 @@ class ScheduledInsightsGrabber: AdsInsights.Field.date_stop, ] - params = { - "time_range": { - "since": start_date.isoformat(), - "until": end_date.isoformat(), - }, - "level": "campaign", - "time_increment": 1, # Daily breakdown - "limit": limit, - } - - ad_account = AdAccount(account_id) - try: - insights = await self._rate_limited_request( - ad_account.get_insights, - fields=fields, - params=params, - ) - except FacebookRequestError as e: - error_code = e.api_error_code() - if error_code in [190, 102]: - raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}") - raise - - # Get account timezone from database - account_timezone = await self._get_account_timezone(account_id) - - # Store insights - count = 0 - for insight in insights: - campaign_id = insight.get('campaign_id') - if campaign_id: - insight_dict = dict(insight) - - date_start_str = insight_dict.get("date_start") - timestamp = self._compute_timestamp(date_start_str, account_timezone) - - await self.db.insert_campaign_insights( - time=timestamp, - campaign_id=campaign_id, - account_id=account_id, - data=insight_dict, - date_preset="custom", - cache_metadata=True, - ) - count += 1 + count, _ = await self._master_grab_insights( + account_id=account_id, + fields=fields, + level="campaign", + db_insert_func=self.db.insert_campaign_insights, + start_date=start_date, + end_date=end_date, + limit=limit, + cache_metadata=True, + required_fields={"campaign_id": "campaign_id"}, + ) return count @@ -895,53 +800,17 @@ class ScheduledInsightsGrabber: AdsInsights.Field.date_stop, ] - params = { - "time_range": { - "since": start_date.isoformat(), - "until": end_date.isoformat(), - }, - "level": "adset", - "time_increment": 1, # Daily breakdown - "limit": limit, - } - - ad_account = AdAccount(account_id) - try: - insights = await self._rate_limited_request( - ad_account.get_insights, - fields=fields, - params=params, - ) - except FacebookRequestError as e: - error_code = e.api_error_code() - if error_code in [190, 102]: - raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}") - raise - - # Get account timezone from database - account_timezone = await self._get_account_timezone(account_id) - - # Store insights - count = 0 - for insight in insights: - adset_id = insight.get('adset_id') - campaign_id = insight.get('campaign_id') - if adset_id and campaign_id: - insight_dict = dict(insight) - - date_start_str = insight_dict.get("date_start") - timestamp = self._compute_timestamp(date_start_str, account_timezone) - - await self.db.insert_adset_insights( - time=timestamp, - adset_id=adset_id, - campaign_id=campaign_id, - account_id=account_id, - data=insight_dict, - date_preset="custom", - cache_metadata=True, - ) - count += 1 + count, _ = await self._master_grab_insights( + account_id=account_id, + fields=fields, + level="adset", + db_insert_func=self.db.insert_adset_insights, + start_date=start_date, + end_date=end_date, + limit=limit, + cache_metadata=True, + required_fields={"adset_id": "adset_id", "campaign_id": "campaign_id"}, + ) return count diff --git a/src/meta_api_grabber/view_manager.py b/src/meta_api_grabber/view_manager.py index 1aec1dc..6ef3f86 100644 --- a/src/meta_api_grabber/view_manager.py +++ b/src/meta_api_grabber/view_manager.py @@ -92,6 +92,7 @@ class ViewManager: "adset_insights_flattened", "account_insights_flattened", "campaign_insights_flattened", + "campaign_insights_by_country_flattened", ] async with self.pool.acquire() as conn: diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..7ef383a --- /dev/null +++ b/tests/README.md @@ -0,0 +1,58 @@ +# Tests + +This directory contains tests for the meta_api_grabber project. + +## Running Tests + +Install test dependencies: +```bash +uv sync --extra test +``` + +Run all tests: +```bash +uv run pytest +``` + +Run specific test file: +```bash +uv run pytest tests/test_field_schema_validation.py -v +``` + +Run with coverage: +```bash +uv run pytest --cov=meta_api_grabber +``` + +## Test Files + +### `test_field_schema_validation.py` +Validates that all fields requested by the grab_* methods in `scheduled_grabber.py` exist in the database schema. This ensures: +- Field compatibility between Meta API and database +- Early detection of schema mismatches +- Consistency across all insight levels (account, campaign, adset, country) + +**Why this test is important:** When new fields are added to the Meta API field lists, this test quickly alerts you if the corresponding database columns need to be added. + +## Writing Tests + +Use markers to categorize tests: +```python +@pytest.mark.unit +def test_something(): + pass + +@pytest.mark.integration +async def test_database_connection(): + pass +``` + +Run only unit tests: +```bash +uv run pytest -m unit +``` + +Run everything except integration tests: +```bash +uv run pytest -m "not integration" +``` diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..b213d57 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for meta_api_grabber package.""" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..f98deef --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,13 @@ +"""Pytest configuration and fixtures.""" + +import pytest + + +def pytest_configure(config): + """Configure pytest.""" + config.addinivalue_line( + "markers", "integration: marks tests as integration tests (deselect with '-m \"not integration\"')" + ) + config.addinivalue_line( + "markers", "unit: marks tests as unit tests" + ) diff --git a/tests/test_field_schema_validation.py b/tests/test_field_schema_validation.py new file mode 100644 index 0000000..d13d914 --- /dev/null +++ b/tests/test_field_schema_validation.py @@ -0,0 +1,245 @@ +""" +Test that validates all fields requested by grab_* methods exist in the database schema. + +This test ensures that whenever new fields are added to the Meta API field lists, +the corresponding database columns exist. It catches schema mismatches early. +""" + +import pytest +from facebook_business.adobjects.adsinsights import AdsInsights + + +# Database schema field mappings +# Maps API field names to database column names +FIELD_MAPPINGS = { + # Core metrics + "impressions": "impressions", + "clicks": "clicks", + "spend": "spend", + "reach": "reach", + "frequency": "frequency", + + # Calculated metrics + "ctr": "ctr", + "cpc": "cpc", + "cpm": "cpm", + "cpp": "cpp", + + # Actions and costs + "actions": "actions", + "cost_per_action_type": "cost_per_action_type", + + # Date/time fields + "date_start": "date_start", + "date_stop": "date_stop", + + # ID fields (not stored in insights tables, but referenced) + "campaign_id": "referenced_in_campaigns", + "campaign_name": "referenced_in_campaigns", + "adset_id": "referenced_in_adsets", + "adset_name": "referenced_in_adsets", + "country": "country", +} + +# Table schemas +TABLE_SCHEMAS = { + "account_insights": { + "impressions", "clicks", "spend", "reach", "frequency", + "ctr", "cpc", "cpm", "cpp", "actions", "cost_per_action_type", + "date_start", "date_stop", "date_preset", "fetched_at" + }, + "campaign_insights": { + "impressions", "clicks", "spend", "reach", + "ctr", "cpc", "cpm", "actions", + "date_start", "date_stop", "date_preset", "fetched_at", + "campaign_id", "account_id" + }, + "adset_insights": { + "impressions", "clicks", "spend", "reach", + "ctr", "cpc", "cpm", "actions", + "date_start", "date_stop", "date_preset", "fetched_at", + "adset_id", "campaign_id", "account_id" + }, + "campaign_insights_by_country": { + "impressions", "clicks", "spend", "reach", + "ctr", "cpc", "cpm", "actions", + "date_start", "date_stop", "date_preset", "fetched_at", + "campaign_id", "account_id", "country" + } +} + + +def get_field_value(field_obj) -> str: + """Extract field name from AdsInsights.Field object.""" + # AdsInsights.Field attributes are simple string values + return str(field_obj) + + +class TestFieldSchemaValidation: + """Validate that all API field requests have corresponding database columns.""" + + def test_account_insights_fields(self): + """Test that account insights fields exist in schema.""" + fields = [ + AdsInsights.Field.impressions, + AdsInsights.Field.clicks, + AdsInsights.Field.spend, + AdsInsights.Field.cpc, + AdsInsights.Field.cpm, + AdsInsights.Field.ctr, + AdsInsights.Field.cpp, + AdsInsights.Field.reach, + AdsInsights.Field.frequency, + AdsInsights.Field.actions, + AdsInsights.Field.cost_per_action_type, + AdsInsights.Field.date_start, + AdsInsights.Field.date_stop, + ] + + schema_fields = TABLE_SCHEMAS["account_insights"] + for field in fields: + field_name = get_field_value(field) + assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS" + db_column = FIELD_MAPPINGS[field_name] + + # Skip reference checks for ID fields + if "referenced_in" not in db_column: + assert db_column in schema_fields, \ + f"Account insights field '{field_name}' (DB: '{db_column}') not in schema" + + def test_campaign_insights_fields(self): + """Test that campaign insights fields exist in schema.""" + fields = [ + AdsInsights.Field.campaign_id, + AdsInsights.Field.campaign_name, + AdsInsights.Field.impressions, + AdsInsights.Field.clicks, + AdsInsights.Field.spend, + AdsInsights.Field.ctr, + AdsInsights.Field.cpc, + AdsInsights.Field.cpm, + AdsInsights.Field.reach, + AdsInsights.Field.actions, + AdsInsights.Field.date_start, + AdsInsights.Field.date_stop, + ] + + schema_fields = TABLE_SCHEMAS["campaign_insights"] + for field in fields: + field_name = get_field_value(field) + assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS" + db_column = FIELD_MAPPINGS[field_name] + + # Skip reference checks for ID/name fields + if "referenced_in" not in db_column: + assert db_column in schema_fields, \ + f"Campaign insights field '{field_name}' (DB: '{db_column}') not in schema" + + def test_adset_insights_fields(self): + """Test that adset insights fields exist in schema.""" + fields = [ + AdsInsights.Field.adset_id, + AdsInsights.Field.adset_name, + AdsInsights.Field.campaign_id, + AdsInsights.Field.impressions, + AdsInsights.Field.clicks, + AdsInsights.Field.spend, + AdsInsights.Field.ctr, + AdsInsights.Field.cpc, + AdsInsights.Field.cpm, + AdsInsights.Field.reach, + AdsInsights.Field.actions, + AdsInsights.Field.date_start, + AdsInsights.Field.date_stop, + ] + + schema_fields = TABLE_SCHEMAS["adset_insights"] + for field in fields: + field_name = get_field_value(field) + assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS" + db_column = FIELD_MAPPINGS[field_name] + + # Skip reference checks for ID/name fields + if "referenced_in" not in db_column: + assert db_column in schema_fields, \ + f"Adset insights field '{field_name}' (DB: '{db_column}') not in schema" + + def test_campaign_insights_by_country_fields(self): + """Test that campaign insights by country fields exist in schema.""" + fields = [ + AdsInsights.Field.campaign_id, + AdsInsights.Field.campaign_name, + AdsInsights.Field.impressions, + AdsInsights.Field.clicks, + AdsInsights.Field.spend, + AdsInsights.Field.ctr, + AdsInsights.Field.cpc, + AdsInsights.Field.cpm, + AdsInsights.Field.reach, + AdsInsights.Field.actions, + AdsInsights.Field.date_start, + AdsInsights.Field.date_stop, + ] + + schema_fields = TABLE_SCHEMAS["campaign_insights_by_country"] + for field in fields: + field_name = get_field_value(field) + assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS" + db_column = FIELD_MAPPINGS[field_name] + + # Skip reference checks for ID/name fields + if "referenced_in" not in db_column: + assert db_column in schema_fields, \ + f"Campaign by country insights field '{field_name}' (DB: '{db_column}') not in schema" + + # Country breakdown field + assert "country" in schema_fields, "Country field missing in campaign_insights_by_country schema" + + def test_common_fields_consistency(self): + """Test that common_fields are consistent across all methods.""" + from meta_api_grabber.scheduled_grabber import common_fields + + # Verify common_fields is defined and contains expected metrics + expected_metrics = { + "impressions", "clicks", "spend", "cpc", "cpm", "ctr", "cpp", + "reach", "frequency", "actions", "cost_per_action_type", + "date_start", "date_stop" + } + + common_field_names = {get_field_value(f) for f in common_fields} + + for metric in expected_metrics: + assert metric in common_field_names, \ + f"Common metric '{metric}' not found in common_fields" + + def test_all_table_schemas_valid(self): + """Test that all table schemas are properly defined.""" + required_tables = { + "account_insights", + "campaign_insights", + "adset_insights", + "campaign_insights_by_country" + } + + for table in required_tables: + assert table in TABLE_SCHEMAS, f"Table '{table}' not defined in TABLE_SCHEMAS" + assert len(TABLE_SCHEMAS[table]) > 0, f"Table '{table}' has no fields defined" + + +class TestSchemaDocumentation: + """Document the expected schema structure for reference.""" + + def test_schema_documentation(self): + """Print out the schema for verification purposes.""" + print("\n" + "="*80) + print("DATABASE SCHEMA DOCUMENTATION") + print("="*80) + + for table, fields in TABLE_SCHEMAS.items(): + print(f"\nTable: {table}") + print(f"Columns: {sorted(fields)}") + print(f"Total columns: {len(fields)}") + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/uv.lock b/uv.lock index e919d6a..ec5d39f 100644 --- a/uv.lock +++ b/uv.lock @@ -193,6 +193,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, ] +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + [[package]] name = "curlify" version = "3.0.0" @@ -382,6 +391,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" }, + { url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" }, + { url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" }, { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" }, { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, @@ -389,6 +400,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, + { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, + { url = "https://files.pythonhosted.org/packages/0d/da/343cd760ab2f92bac1845ca07ee3faea9fe52bee65f7bcb19f16ad7de08b/greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681", size = 1680760, upload-time = "2025-11-04T12:42:25.341Z" }, { url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" }, ] @@ -446,6 +459,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "mako" version = "1.3.10" @@ -525,6 +547,12 @@ dependencies = [ { name = "sqlalchemy", extra = ["asyncio"] }, ] +[package.optional-dependencies] +test = [ + { name = "pytest" }, + { name = "pytest-asyncio" }, +] + [package.metadata] requires-dist = [ { name = "aiohttp", specifier = ">=3.13.1" }, @@ -532,10 +560,13 @@ requires-dist = [ { name = "asyncpg", specifier = ">=0.30.0" }, { name = "facebook-business", specifier = ">=23.0.3" }, { name = "google-ads", specifier = ">=28.3.0" }, + { name = "pytest", marker = "extra == 'test'", specifier = ">=8.0.0" }, + { name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=0.25.0" }, { name = "python-dotenv", specifier = ">=1.1.1" }, { name = "requests-oauthlib", specifier = ">=2.0.0" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.44" }, ] +provides-extras = ["test"] [[package]] name = "multidict" @@ -627,6 +658,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, ] +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + [[package]] name = "propcache" version = "0.4.1" @@ -753,6 +802,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b1/ec/1fb891d8a2660716aadb2143235481d15ed1cbfe3ad669194690b0604492/pycountry-24.6.1-py3-none-any.whl", hash = "sha256:f1a4fb391cd7214f8eefd39556d740adcc233c778a27f8942c8dca351d6ce06f", size = 6335189, upload-time = "2024-06-01T04:11:49.711Z" }, ] +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pytest" +version = "8.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" }, +] + [[package]] name = "python-dotenv" version = "1.1.1"