Simplified insight grabbers

This commit is contained in:
Jonas Linter
2025-11-10 11:27:54 +01:00
parent 03ae7ea61a
commit 511f381ff2
8 changed files with 611 additions and 332 deletions

58
tests/README.md Normal file
View File

@@ -0,0 +1,58 @@
# Tests
This directory contains tests for the meta_api_grabber project.
## Running Tests
Install test dependencies:
```bash
uv sync --extra test
```
Run all tests:
```bash
uv run pytest
```
Run specific test file:
```bash
uv run pytest tests/test_field_schema_validation.py -v
```
Run with coverage:
```bash
uv run pytest --cov=meta_api_grabber
```
## Test Files
### `test_field_schema_validation.py`
Validates that all fields requested by the grab_* methods in `scheduled_grabber.py` exist in the database schema. This ensures:
- Field compatibility between Meta API and database
- Early detection of schema mismatches
- Consistency across all insight levels (account, campaign, adset, country)
**Why this test is important:** When new fields are added to the Meta API field lists, this test quickly alerts you if the corresponding database columns need to be added.
## Writing Tests
Use markers to categorize tests:
```python
@pytest.mark.unit
def test_something():
pass
@pytest.mark.integration
async def test_database_connection():
pass
```
Run only unit tests:
```bash
uv run pytest -m unit
```
Run everything except integration tests:
```bash
uv run pytest -m "not integration"
```

1
tests/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Tests for meta_api_grabber package."""

13
tests/conftest.py Normal file
View File

@@ -0,0 +1,13 @@
"""Pytest configuration and fixtures."""
import pytest
def pytest_configure(config):
"""Configure pytest."""
config.addinivalue_line(
"markers", "integration: marks tests as integration tests (deselect with '-m \"not integration\"')"
)
config.addinivalue_line(
"markers", "unit: marks tests as unit tests"
)

View File

@@ -0,0 +1,245 @@
"""
Test that validates all fields requested by grab_* methods exist in the database schema.
This test ensures that whenever new fields are added to the Meta API field lists,
the corresponding database columns exist. It catches schema mismatches early.
"""
import pytest
from facebook_business.adobjects.adsinsights import AdsInsights
# Database schema field mappings
# Maps API field names to database column names
FIELD_MAPPINGS = {
# Core metrics
"impressions": "impressions",
"clicks": "clicks",
"spend": "spend",
"reach": "reach",
"frequency": "frequency",
# Calculated metrics
"ctr": "ctr",
"cpc": "cpc",
"cpm": "cpm",
"cpp": "cpp",
# Actions and costs
"actions": "actions",
"cost_per_action_type": "cost_per_action_type",
# Date/time fields
"date_start": "date_start",
"date_stop": "date_stop",
# ID fields (not stored in insights tables, but referenced)
"campaign_id": "referenced_in_campaigns",
"campaign_name": "referenced_in_campaigns",
"adset_id": "referenced_in_adsets",
"adset_name": "referenced_in_adsets",
"country": "country",
}
# Table schemas
TABLE_SCHEMAS = {
"account_insights": {
"impressions", "clicks", "spend", "reach", "frequency",
"ctr", "cpc", "cpm", "cpp", "actions", "cost_per_action_type",
"date_start", "date_stop", "date_preset", "fetched_at"
},
"campaign_insights": {
"impressions", "clicks", "spend", "reach",
"ctr", "cpc", "cpm", "actions",
"date_start", "date_stop", "date_preset", "fetched_at",
"campaign_id", "account_id"
},
"adset_insights": {
"impressions", "clicks", "spend", "reach",
"ctr", "cpc", "cpm", "actions",
"date_start", "date_stop", "date_preset", "fetched_at",
"adset_id", "campaign_id", "account_id"
},
"campaign_insights_by_country": {
"impressions", "clicks", "spend", "reach",
"ctr", "cpc", "cpm", "actions",
"date_start", "date_stop", "date_preset", "fetched_at",
"campaign_id", "account_id", "country"
}
}
def get_field_value(field_obj) -> str:
"""Extract field name from AdsInsights.Field object."""
# AdsInsights.Field attributes are simple string values
return str(field_obj)
class TestFieldSchemaValidation:
"""Validate that all API field requests have corresponding database columns."""
def test_account_insights_fields(self):
"""Test that account insights fields exist in schema."""
fields = [
AdsInsights.Field.impressions,
AdsInsights.Field.clicks,
AdsInsights.Field.spend,
AdsInsights.Field.cpc,
AdsInsights.Field.cpm,
AdsInsights.Field.ctr,
AdsInsights.Field.cpp,
AdsInsights.Field.reach,
AdsInsights.Field.frequency,
AdsInsights.Field.actions,
AdsInsights.Field.cost_per_action_type,
AdsInsights.Field.date_start,
AdsInsights.Field.date_stop,
]
schema_fields = TABLE_SCHEMAS["account_insights"]
for field in fields:
field_name = get_field_value(field)
assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS"
db_column = FIELD_MAPPINGS[field_name]
# Skip reference checks for ID fields
if "referenced_in" not in db_column:
assert db_column in schema_fields, \
f"Account insights field '{field_name}' (DB: '{db_column}') not in schema"
def test_campaign_insights_fields(self):
"""Test that campaign insights fields exist in schema."""
fields = [
AdsInsights.Field.campaign_id,
AdsInsights.Field.campaign_name,
AdsInsights.Field.impressions,
AdsInsights.Field.clicks,
AdsInsights.Field.spend,
AdsInsights.Field.ctr,
AdsInsights.Field.cpc,
AdsInsights.Field.cpm,
AdsInsights.Field.reach,
AdsInsights.Field.actions,
AdsInsights.Field.date_start,
AdsInsights.Field.date_stop,
]
schema_fields = TABLE_SCHEMAS["campaign_insights"]
for field in fields:
field_name = get_field_value(field)
assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS"
db_column = FIELD_MAPPINGS[field_name]
# Skip reference checks for ID/name fields
if "referenced_in" not in db_column:
assert db_column in schema_fields, \
f"Campaign insights field '{field_name}' (DB: '{db_column}') not in schema"
def test_adset_insights_fields(self):
"""Test that adset insights fields exist in schema."""
fields = [
AdsInsights.Field.adset_id,
AdsInsights.Field.adset_name,
AdsInsights.Field.campaign_id,
AdsInsights.Field.impressions,
AdsInsights.Field.clicks,
AdsInsights.Field.spend,
AdsInsights.Field.ctr,
AdsInsights.Field.cpc,
AdsInsights.Field.cpm,
AdsInsights.Field.reach,
AdsInsights.Field.actions,
AdsInsights.Field.date_start,
AdsInsights.Field.date_stop,
]
schema_fields = TABLE_SCHEMAS["adset_insights"]
for field in fields:
field_name = get_field_value(field)
assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS"
db_column = FIELD_MAPPINGS[field_name]
# Skip reference checks for ID/name fields
if "referenced_in" not in db_column:
assert db_column in schema_fields, \
f"Adset insights field '{field_name}' (DB: '{db_column}') not in schema"
def test_campaign_insights_by_country_fields(self):
"""Test that campaign insights by country fields exist in schema."""
fields = [
AdsInsights.Field.campaign_id,
AdsInsights.Field.campaign_name,
AdsInsights.Field.impressions,
AdsInsights.Field.clicks,
AdsInsights.Field.spend,
AdsInsights.Field.ctr,
AdsInsights.Field.cpc,
AdsInsights.Field.cpm,
AdsInsights.Field.reach,
AdsInsights.Field.actions,
AdsInsights.Field.date_start,
AdsInsights.Field.date_stop,
]
schema_fields = TABLE_SCHEMAS["campaign_insights_by_country"]
for field in fields:
field_name = get_field_value(field)
assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS"
db_column = FIELD_MAPPINGS[field_name]
# Skip reference checks for ID/name fields
if "referenced_in" not in db_column:
assert db_column in schema_fields, \
f"Campaign by country insights field '{field_name}' (DB: '{db_column}') not in schema"
# Country breakdown field
assert "country" in schema_fields, "Country field missing in campaign_insights_by_country schema"
def test_common_fields_consistency(self):
"""Test that common_fields are consistent across all methods."""
from meta_api_grabber.scheduled_grabber import common_fields
# Verify common_fields is defined and contains expected metrics
expected_metrics = {
"impressions", "clicks", "spend", "cpc", "cpm", "ctr", "cpp",
"reach", "frequency", "actions", "cost_per_action_type",
"date_start", "date_stop"
}
common_field_names = {get_field_value(f) for f in common_fields}
for metric in expected_metrics:
assert metric in common_field_names, \
f"Common metric '{metric}' not found in common_fields"
def test_all_table_schemas_valid(self):
"""Test that all table schemas are properly defined."""
required_tables = {
"account_insights",
"campaign_insights",
"adset_insights",
"campaign_insights_by_country"
}
for table in required_tables:
assert table in TABLE_SCHEMAS, f"Table '{table}' not defined in TABLE_SCHEMAS"
assert len(TABLE_SCHEMAS[table]) > 0, f"Table '{table}' has no fields defined"
class TestSchemaDocumentation:
"""Document the expected schema structure for reference."""
def test_schema_documentation(self):
"""Print out the schema for verification purposes."""
print("\n" + "="*80)
print("DATABASE SCHEMA DOCUMENTATION")
print("="*80)
for table, fields in TABLE_SCHEMAS.items():
print(f"\nTable: {table}")
print(f"Columns: {sorted(fields)}")
print(f"Total columns: {len(fields)}")
if __name__ == "__main__":
pytest.main([__file__, "-v"])