Fix field schema validation test and update database schema
- Fixed field extraction logic in test_field_schema_validation.py to properly parse methods with docstrings - Previous regex was too greedy and matched across multiple method definitions - Now uses proper parenthesis and docstring matching to isolate method bodies - Correctly handles both 'fields = [...]' and 'fields = common_fields + [...]' patterns - Updated db_schema.sql to include missing columns: - campaign_insights: added frequency, cpp, cost_per_action_type columns - adset_insights: added account_currency column - campaign_insights_by_country: added frequency, cpp, cost_per_action_type columns - All field schema validation tests now pass - Test dynamically extracts fields from scheduled_grabber.py source code - Compares against actual database schema from db_schema.sql - Properly filters metadata-only fields (campaign_id, campaign_name, etc.) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -14,9 +14,17 @@ ALTER TABLE IF EXISTS account_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
|
|||||||
|
|
||||||
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS date_start DATE;
|
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS date_start DATE;
|
||||||
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
|
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
|
||||||
|
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS frequency NUMERIC(10, 4);
|
||||||
|
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS cpp NUMERIC(10, 4);
|
||||||
|
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS cost_per_action_type JSONB;
|
||||||
|
|
||||||
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS date_start DATE;
|
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS date_start DATE;
|
||||||
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
|
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
|
||||||
|
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS account_currency VARCHAR(3);
|
||||||
|
|
||||||
|
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS frequency NUMERIC(10, 4);
|
||||||
|
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS cpp NUMERIC(10, 4);
|
||||||
|
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS cost_per_action_type JSONB;
|
||||||
|
|
||||||
-- ============================================================================
|
-- ============================================================================
|
||||||
-- METADATA TABLES (Regular PostgreSQL tables for caching)
|
-- METADATA TABLES (Regular PostgreSQL tables for caching)
|
||||||
@@ -115,14 +123,17 @@ CREATE TABLE IF NOT EXISTS campaign_insights (
|
|||||||
clicks BIGINT,
|
clicks BIGINT,
|
||||||
spend NUMERIC(12, 2),
|
spend NUMERIC(12, 2),
|
||||||
reach BIGINT,
|
reach BIGINT,
|
||||||
|
frequency NUMERIC(10, 4),
|
||||||
|
|
||||||
-- Calculated metrics
|
-- Calculated metrics
|
||||||
ctr NUMERIC(10, 6),
|
ctr NUMERIC(10, 6),
|
||||||
cpc NUMERIC(10, 4),
|
cpc NUMERIC(10, 4),
|
||||||
cpm NUMERIC(10, 4),
|
cpm NUMERIC(10, 4),
|
||||||
|
cpp NUMERIC(10, 4), -- Cost per reach
|
||||||
|
|
||||||
-- Actions
|
-- Actions
|
||||||
actions JSONB,
|
actions JSONB,
|
||||||
|
cost_per_action_type JSONB,
|
||||||
|
|
||||||
-- Metadata
|
-- Metadata
|
||||||
date_preset VARCHAR(50),
|
date_preset VARCHAR(50),
|
||||||
@@ -163,6 +174,7 @@ CREATE TABLE IF NOT EXISTS adset_insights (
|
|||||||
cpc NUMERIC(10, 4),
|
cpc NUMERIC(10, 4),
|
||||||
cpm NUMERIC(10, 4),
|
cpm NUMERIC(10, 4),
|
||||||
|
|
||||||
|
|
||||||
-- Actions
|
-- Actions
|
||||||
actions JSONB,
|
actions JSONB,
|
||||||
|
|
||||||
@@ -201,14 +213,17 @@ CREATE TABLE IF NOT EXISTS campaign_insights_by_country (
|
|||||||
clicks BIGINT,
|
clicks BIGINT,
|
||||||
spend NUMERIC(12, 2),
|
spend NUMERIC(12, 2),
|
||||||
reach BIGINT,
|
reach BIGINT,
|
||||||
|
frequency NUMERIC(10, 4),
|
||||||
|
|
||||||
-- Calculated metrics
|
-- Calculated metrics
|
||||||
ctr NUMERIC(10, 6),
|
ctr NUMERIC(10, 6),
|
||||||
cpc NUMERIC(10, 4),
|
cpc NUMERIC(10, 4),
|
||||||
cpm NUMERIC(10, 4),
|
cpm NUMERIC(10, 4),
|
||||||
|
cpp NUMERIC(10, 4), -- Cost per reach
|
||||||
|
|
||||||
-- Actions
|
-- Actions
|
||||||
actions JSONB,
|
actions JSONB,
|
||||||
|
cost_per_action_type JSONB,
|
||||||
|
|
||||||
-- Metadata
|
-- Metadata
|
||||||
date_preset VARCHAR(50),
|
date_preset VARCHAR(50),
|
||||||
|
|||||||
@@ -19,20 +19,44 @@ Run specific test file:
|
|||||||
uv run pytest tests/test_field_schema_validation.py -v
|
uv run pytest tests/test_field_schema_validation.py -v
|
||||||
```
|
```
|
||||||
|
|
||||||
Run with coverage:
|
Run with verbose output:
|
||||||
```bash
|
```bash
|
||||||
uv run pytest --cov=meta_api_grabber
|
uv run pytest tests/test_field_schema_validation.py -v -s
|
||||||
```
|
```
|
||||||
|
|
||||||
## Test Files
|
## Test Files
|
||||||
|
|
||||||
### `test_field_schema_validation.py`
|
### `test_field_schema_validation.py` (Integration Test)
|
||||||
Validates that all fields requested by the grab_* methods in `scheduled_grabber.py` exist in the database schema. This ensures:
|
|
||||||
- Field compatibility between Meta API and database
|
|
||||||
- Early detection of schema mismatches
|
|
||||||
- Consistency across all insight levels (account, campaign, adset, country)
|
|
||||||
|
|
||||||
**Why this test is important:** When new fields are added to the Meta API field lists, this test quickly alerts you if the corresponding database columns need to be added.
|
This is a critical integration test that validates all fields requested by the grab_* methods in `scheduled_grabber.py` exist in the actual database schema.
|
||||||
|
|
||||||
|
**What it does:**
|
||||||
|
1. Parses `db_schema.sql` to extract actual table columns
|
||||||
|
2. Checks fields requested by each grab method:
|
||||||
|
- `grab_account_insights()` → `account_insights` table
|
||||||
|
- `grab_campaign_insights()` → `campaign_insights` table
|
||||||
|
- `grab_adset_insights()` → `adset_insights` table
|
||||||
|
- `grab_campaign_insights_by_country()` → `campaign_insights_by_country` table
|
||||||
|
3. Verifies all requested fields exist in the corresponding database table
|
||||||
|
|
||||||
|
**Why this test is important:** When new fields are added to the Meta API field lists, this test quickly alerts you if the corresponding database columns need to be added. Since fields are only added (never removed), the test helps catch schema mismatches early.
|
||||||
|
|
||||||
|
**Test methods:**
|
||||||
|
- `test_account_insights_fields()` - Validates account-level insight fields
|
||||||
|
- `test_campaign_insights_fields()` - Validates campaign-level insight fields
|
||||||
|
- `test_adset_insights_fields()` - Validates ad set-level insight fields
|
||||||
|
- `test_campaign_insights_by_country_fields()` - Validates country breakdown fields
|
||||||
|
- `test_all_tables_exist()` - Ensures all required insight tables exist
|
||||||
|
- `test_schema_documentation()` - Prints out the parsed schema for reference
|
||||||
|
|
||||||
|
**Output example:**
|
||||||
|
```
|
||||||
|
Table: account_insights
|
||||||
|
Columns (17): account_id, actions, clicks, cost_per_action_type, cpc, cpm, cpp, ctr, ...
|
||||||
|
|
||||||
|
Table: campaign_insights
|
||||||
|
Columns (15): account_id, actions, campaign_id, clicks, cpc, cpm, ctr, ...
|
||||||
|
```
|
||||||
|
|
||||||
## Writing Tests
|
## Writing Tests
|
||||||
|
|
||||||
@@ -56,3 +80,36 @@ Run everything except integration tests:
|
|||||||
```bash
|
```bash
|
||||||
uv run pytest -m "not integration"
|
uv run pytest -m "not integration"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Schema Validation Workflow
|
||||||
|
|
||||||
|
When you add new fields to a grab method:
|
||||||
|
|
||||||
|
1. **Add fields to `scheduled_grabber.py`:**
|
||||||
|
```python
|
||||||
|
fields = [
|
||||||
|
...
|
||||||
|
AdsInsights.Field.new_field, # New field added
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Run tests to see what's missing:**
|
||||||
|
```bash
|
||||||
|
uv run pytest tests/test_field_schema_validation.py -v -s
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Test output will show:**
|
||||||
|
```
|
||||||
|
adset_insights table missing columns: {'new_field'}
|
||||||
|
Available: [account_id, actions, adset_id, ...]
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Update `db_schema.sql` with the new column:**
|
||||||
|
```sql
|
||||||
|
ALTER TABLE adset_insights ADD COLUMN IF NOT EXISTS new_field TYPE;
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Run tests again to verify:**
|
||||||
|
```bash
|
||||||
|
uv run pytest tests/test_field_schema_validation.py -v
|
||||||
|
```
|
||||||
|
|||||||
@@ -1,244 +1,359 @@
|
|||||||
"""
|
"""
|
||||||
Test that validates all fields requested by grab_* methods exist in the database schema.
|
Integration test that validates all fields requested by grab_* methods exist in the database schema.
|
||||||
|
|
||||||
This test ensures that whenever new fields are added to the Meta API field lists,
|
This test:
|
||||||
the corresponding database columns exist. It catches schema mismatches early.
|
1. Parses the SQL schema file (db_schema.sql) to extract actual table columns
|
||||||
|
2. Reads scheduled_grabber.py to find which methods call which tables
|
||||||
|
3. Verifies that all requested fields exist in the actual database schema
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import pathlib
|
||||||
|
from typing import Dict, Set, List
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from facebook_business.adobjects.adsinsights import AdsInsights
|
|
||||||
|
|
||||||
|
|
||||||
# Database schema field mappings
|
def parse_sql_schema() -> Dict[str, Set[str]]:
|
||||||
# Maps API field names to database column names
|
"""
|
||||||
FIELD_MAPPINGS = {
|
Parse db_schema.sql to extract table columns.
|
||||||
# Core metrics
|
|
||||||
"impressions": "impressions",
|
|
||||||
"clicks": "clicks",
|
|
||||||
"spend": "spend",
|
|
||||||
"reach": "reach",
|
|
||||||
"frequency": "frequency",
|
|
||||||
|
|
||||||
# Calculated metrics
|
Returns:
|
||||||
"ctr": "ctr",
|
Dictionary mapping table names to sets of column names
|
||||||
"cpc": "cpc",
|
"""
|
||||||
"cpm": "cpm",
|
schema_file = pathlib.Path(__file__).parent.parent / "src" / "meta_api_grabber" / "db_schema.sql"
|
||||||
"cpp": "cpp",
|
|
||||||
|
|
||||||
# Actions and costs
|
if not schema_file.exists():
|
||||||
"actions": "actions",
|
raise FileNotFoundError(f"Schema file not found: {schema_file}")
|
||||||
"cost_per_action_type": "cost_per_action_type",
|
|
||||||
|
|
||||||
# Date/time fields
|
with open(schema_file, 'r') as f:
|
||||||
"date_start": "date_start",
|
content = f.read()
|
||||||
"date_stop": "date_stop",
|
|
||||||
|
|
||||||
# ID fields (not stored in insights tables, but referenced)
|
tables = {}
|
||||||
"campaign_id": "referenced_in_campaigns",
|
|
||||||
"campaign_name": "referenced_in_campaigns",
|
# Parse CREATE TABLE statements
|
||||||
"adset_id": "referenced_in_adsets",
|
# Pattern: CREATE TABLE IF NOT EXISTS table_name (...)
|
||||||
"adset_name": "referenced_in_adsets",
|
create_table_pattern = r'CREATE TABLE IF NOT EXISTS (\w+)\s*\((.*?)\);'
|
||||||
"country": "country",
|
|
||||||
|
for match in re.finditer(create_table_pattern, content, re.DOTALL):
|
||||||
|
table_name = match.group(1)
|
||||||
|
table_body = match.group(2)
|
||||||
|
|
||||||
|
# Extract column names (first word before space/comma)
|
||||||
|
# Pattern: column_name TYPE ...
|
||||||
|
column_pattern = r'^\s*(\w+)\s+\w+'
|
||||||
|
columns = set()
|
||||||
|
|
||||||
|
for line in table_body.split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith('--') or line.startswith('PRIMARY') or line.startswith('FOREIGN') or line.startswith('CONSTRAINT'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
col_match = re.match(column_pattern, line)
|
||||||
|
if col_match:
|
||||||
|
columns.add(col_match.group(1))
|
||||||
|
|
||||||
|
if columns:
|
||||||
|
tables[table_name] = columns
|
||||||
|
|
||||||
|
return tables
|
||||||
|
|
||||||
|
|
||||||
|
def get_field_name(field_str: str) -> str:
|
||||||
|
"""
|
||||||
|
Extract field name from AdsInsights.Field.xxx notation.
|
||||||
|
|
||||||
|
Example: 'impressions' from 'AdsInsights.Field.impressions'
|
||||||
|
"""
|
||||||
|
if '.' in field_str:
|
||||||
|
return field_str.split('.')[-1]
|
||||||
|
return field_str
|
||||||
|
|
||||||
|
|
||||||
|
def extract_fields_from_grabber_source() -> Dict[str, List[str]]:
|
||||||
|
"""
|
||||||
|
Extract field lists from grab_* methods by reading scheduled_grabber.py source.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping method names to lists of field names
|
||||||
|
"""
|
||||||
|
grabber_file = pathlib.Path(__file__).parent.parent / "src" / "meta_api_grabber" / "scheduled_grabber.py"
|
||||||
|
|
||||||
|
if not grabber_file.exists():
|
||||||
|
raise FileNotFoundError(f"scheduled_grabber.py not found: {grabber_file}")
|
||||||
|
|
||||||
|
with open(grabber_file, 'r') as f:
|
||||||
|
source = f.read()
|
||||||
|
|
||||||
|
methods_to_table = {
|
||||||
|
'grab_account_insights': 'account_insights',
|
||||||
|
'grab_campaign_insights': 'campaign_insights',
|
||||||
|
'grab_adset_insights': 'adset_insights',
|
||||||
|
'grab_campaign_insights_by_country': 'campaign_insights_by_country',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Table schemas
|
result = {}
|
||||||
TABLE_SCHEMAS = {
|
|
||||||
"account_insights": {
|
for method_name in methods_to_table.keys():
|
||||||
"impressions", "clicks", "spend", "reach", "frequency",
|
# Find the method definition by looking for: async def method_name(...)
|
||||||
"ctr", "cpc", "cpm", "cpp", "actions", "cost_per_action_type",
|
method_pattern = rf'async def {method_name}\s*\('
|
||||||
"date_start", "date_stop", "date_preset", "fetched_at"
|
method_match = re.search(method_pattern, source)
|
||||||
},
|
|
||||||
"campaign_insights": {
|
if not method_match:
|
||||||
"impressions", "clicks", "spend", "reach",
|
continue
|
||||||
"ctr", "cpc", "cpm", "actions",
|
|
||||||
"date_start", "date_stop", "date_preset", "fetched_at",
|
# Get the position after the method name pattern
|
||||||
"campaign_id", "account_id"
|
start_pos = method_match.end()
|
||||||
},
|
|
||||||
"adset_insights": {
|
# Now find where the method body actually starts (after the closing paren and docstring)
|
||||||
"impressions", "clicks", "spend", "reach",
|
# Skip to the opening paren
|
||||||
"ctr", "cpc", "cpm", "actions",
|
open_paren_pos = start_pos - 1
|
||||||
"date_start", "date_stop", "date_preset", "fetched_at",
|
|
||||||
"adset_id", "campaign_id", "account_id"
|
# Count parentheses to find the closing paren of the function signature
|
||||||
},
|
paren_count = 1
|
||||||
"campaign_insights_by_country": {
|
pos = open_paren_pos + 1
|
||||||
"impressions", "clicks", "spend", "reach",
|
while pos < len(source) and paren_count > 0:
|
||||||
"ctr", "cpc", "cpm", "actions",
|
if source[pos] == '(':
|
||||||
"date_start", "date_stop", "date_preset", "fetched_at",
|
paren_count += 1
|
||||||
"campaign_id", "account_id", "country"
|
elif source[pos] == ')':
|
||||||
}
|
paren_count -= 1
|
||||||
}
|
pos += 1
|
||||||
|
|
||||||
|
# Now pos is after the closing paren. Find the colon
|
||||||
|
colon_pos = source.find(':', pos)
|
||||||
|
|
||||||
|
# Skip past any docstring if present
|
||||||
|
after_colon = source[colon_pos + 1:colon_pos + 10].lstrip()
|
||||||
|
if after_colon.startswith('"""') or after_colon.startswith("'''"):
|
||||||
|
quote_type = '"""' if after_colon.startswith('"""') else "'''"
|
||||||
|
docstring_start = source.find(quote_type, colon_pos)
|
||||||
|
docstring_end = source.find(quote_type, docstring_start + 3) + 3
|
||||||
|
method_body_start = docstring_end
|
||||||
|
else:
|
||||||
|
method_body_start = colon_pos + 1
|
||||||
|
|
||||||
|
# Find the next method definition to know where this method ends
|
||||||
|
next_method_pattern = r'async def \w+\s*\('
|
||||||
|
next_match = re.search(next_method_pattern, source[method_body_start:])
|
||||||
|
|
||||||
|
if next_match:
|
||||||
|
method_body_end = method_body_start + next_match.start()
|
||||||
|
else:
|
||||||
|
# Last method - use rest of file
|
||||||
|
method_body_end = len(source)
|
||||||
|
|
||||||
|
method_body = source[method_body_start:method_body_end]
|
||||||
|
|
||||||
|
# Extract fields from the method body
|
||||||
|
# Look for: fields = [...] or fields = common_fields + [...]
|
||||||
|
|
||||||
|
# First check if this method uses common_fields
|
||||||
|
uses_common_fields = 'common_fields' in method_body[:500]
|
||||||
|
|
||||||
|
if uses_common_fields:
|
||||||
|
# Pattern: fields = common_fields + [...]
|
||||||
|
fields_pattern = r'fields\s*=\s*common_fields\s*\+\s*\[(.*?)\]'
|
||||||
|
fields_match = re.search(fields_pattern, method_body, re.DOTALL)
|
||||||
|
if fields_match:
|
||||||
|
fields_str = fields_match.group(1)
|
||||||
|
# Extract individual field names
|
||||||
|
field_pattern = r'AdsInsights\.Field\.(\w+)'
|
||||||
|
fields = re.findall(field_pattern, fields_str)
|
||||||
|
|
||||||
|
# Also get common_fields from the module level
|
||||||
|
common_pattern = r'common_fields\s*=\s*\[(.*?)\]'
|
||||||
|
common_match = re.search(common_pattern, source, re.DOTALL)
|
||||||
|
if common_match:
|
||||||
|
common_str = common_match.group(1)
|
||||||
|
common_fields_list = re.findall(field_pattern, common_str)
|
||||||
|
fields = common_fields_list + fields
|
||||||
|
|
||||||
|
result[method_name] = fields
|
||||||
|
else:
|
||||||
|
# Pattern: fields = [...]
|
||||||
|
# Use bracket matching to find the correct field list
|
||||||
|
fields_keyword_pos = method_body.find('fields =')
|
||||||
|
|
||||||
|
if fields_keyword_pos != -1:
|
||||||
|
# Find the opening bracket after fields =
|
||||||
|
bracket_pos = method_body.find('[', fields_keyword_pos)
|
||||||
|
if bracket_pos != -1:
|
||||||
|
# Count brackets to find the matching closing bracket
|
||||||
|
bracket_count = 0
|
||||||
|
end_pos = bracket_pos
|
||||||
|
for i, char in enumerate(method_body[bracket_pos:]):
|
||||||
|
if char == '[':
|
||||||
|
bracket_count += 1
|
||||||
|
elif char == ']':
|
||||||
|
bracket_count -= 1
|
||||||
|
if bracket_count == 0:
|
||||||
|
end_pos = bracket_pos + i
|
||||||
|
break
|
||||||
|
|
||||||
|
fields_str = method_body[bracket_pos + 1:end_pos]
|
||||||
|
field_pattern = r'AdsInsights\.Field\.(\w+)'
|
||||||
|
fields = re.findall(field_pattern, fields_str)
|
||||||
|
result[method_name] = fields
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def get_field_value(field_obj) -> str:
|
@pytest.fixture(scope="module")
|
||||||
"""Extract field name from AdsInsights.Field object."""
|
def schema_columns():
|
||||||
# AdsInsights.Field attributes are simple string values
|
"""Parse and cache the schema columns."""
|
||||||
return str(field_obj)
|
return parse_sql_schema()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def extracted_fields_by_method():
|
||||||
|
"""Extract and cache the fields from each grab_* method."""
|
||||||
|
return extract_fields_from_grabber_source()
|
||||||
|
|
||||||
|
|
||||||
|
# Mapping of method names to their insight table names
|
||||||
|
METHOD_TO_TABLE = {
|
||||||
|
'grab_account_insights': 'account_insights',
|
||||||
|
'grab_campaign_insights': 'campaign_insights',
|
||||||
|
'grab_adset_insights': 'adset_insights',
|
||||||
|
'grab_campaign_insights_by_country': 'campaign_insights_by_country',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fields that are IDs/names stored in metadata tables, not in the insights table
|
||||||
|
METADATA_ONLY_FIELDS = {
|
||||||
|
'campaign_id', 'campaign_name',
|
||||||
|
'adset_id', 'adset_name',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class TestFieldSchemaValidation:
|
class TestFieldSchemaValidation:
|
||||||
"""Validate that all API field requests have corresponding database columns."""
|
"""Validate that all API field requests have corresponding database columns."""
|
||||||
|
|
||||||
def test_account_insights_fields(self):
|
def test_grab_account_insights_fields(self, schema_columns, extracted_fields_by_method):
|
||||||
"""Test that account insights fields exist in schema."""
|
"""Test that grab_account_insights fields exist in schema."""
|
||||||
fields = [
|
method_name = 'grab_account_insights'
|
||||||
AdsInsights.Field.impressions,
|
table_name = METHOD_TO_TABLE[method_name]
|
||||||
AdsInsights.Field.clicks,
|
|
||||||
AdsInsights.Field.spend,
|
|
||||||
AdsInsights.Field.cpc,
|
|
||||||
AdsInsights.Field.cpm,
|
|
||||||
AdsInsights.Field.ctr,
|
|
||||||
AdsInsights.Field.cpp,
|
|
||||||
AdsInsights.Field.reach,
|
|
||||||
AdsInsights.Field.frequency,
|
|
||||||
AdsInsights.Field.actions,
|
|
||||||
AdsInsights.Field.cost_per_action_type,
|
|
||||||
AdsInsights.Field.date_start,
|
|
||||||
AdsInsights.Field.date_stop,
|
|
||||||
]
|
|
||||||
|
|
||||||
schema_fields = TABLE_SCHEMAS["account_insights"]
|
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
|
||||||
for field in fields:
|
|
||||||
field_name = get_field_value(field)
|
|
||||||
assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS"
|
|
||||||
db_column = FIELD_MAPPINGS[field_name]
|
|
||||||
|
|
||||||
# Skip reference checks for ID fields
|
extracted_fields = set(extracted_fields_by_method[method_name])
|
||||||
if "referenced_in" not in db_column:
|
table_cols = schema_columns.get(table_name, set())
|
||||||
assert db_column in schema_fields, \
|
assert table_cols, f"Table {table_name} not found in schema"
|
||||||
f"Account insights field '{field_name}' (DB: '{db_column}') not in schema"
|
|
||||||
|
|
||||||
def test_campaign_insights_fields(self):
|
missing = extracted_fields - table_cols
|
||||||
"""Test that campaign insights fields exist in schema."""
|
assert not missing, \
|
||||||
fields = [
|
f"{table_name} table missing columns: {missing}\n" \
|
||||||
AdsInsights.Field.campaign_id,
|
f"Method requests: {sorted(extracted_fields)}\n" \
|
||||||
AdsInsights.Field.campaign_name,
|
f"Available: {sorted(table_cols)}"
|
||||||
AdsInsights.Field.impressions,
|
|
||||||
AdsInsights.Field.clicks,
|
|
||||||
AdsInsights.Field.spend,
|
|
||||||
AdsInsights.Field.ctr,
|
|
||||||
AdsInsights.Field.cpc,
|
|
||||||
AdsInsights.Field.cpm,
|
|
||||||
AdsInsights.Field.reach,
|
|
||||||
AdsInsights.Field.actions,
|
|
||||||
AdsInsights.Field.date_start,
|
|
||||||
AdsInsights.Field.date_stop,
|
|
||||||
]
|
|
||||||
|
|
||||||
schema_fields = TABLE_SCHEMAS["campaign_insights"]
|
print(f"✓ {method_name} → {table_name}: {len(extracted_fields)} fields validated")
|
||||||
for field in fields:
|
|
||||||
field_name = get_field_value(field)
|
|
||||||
assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS"
|
|
||||||
db_column = FIELD_MAPPINGS[field_name]
|
|
||||||
|
|
||||||
# Skip reference checks for ID/name fields
|
def test_grab_campaign_insights_fields(self, schema_columns, extracted_fields_by_method):
|
||||||
if "referenced_in" not in db_column:
|
"""Test that grab_campaign_insights fields exist in schema."""
|
||||||
assert db_column in schema_fields, \
|
method_name = 'grab_campaign_insights'
|
||||||
f"Campaign insights field '{field_name}' (DB: '{db_column}') not in schema"
|
table_name = METHOD_TO_TABLE[method_name]
|
||||||
|
|
||||||
def test_adset_insights_fields(self):
|
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
|
||||||
"""Test that adset insights fields exist in schema."""
|
|
||||||
fields = [
|
|
||||||
AdsInsights.Field.adset_id,
|
|
||||||
AdsInsights.Field.adset_name,
|
|
||||||
AdsInsights.Field.campaign_id,
|
|
||||||
AdsInsights.Field.impressions,
|
|
||||||
AdsInsights.Field.clicks,
|
|
||||||
AdsInsights.Field.spend,
|
|
||||||
AdsInsights.Field.ctr,
|
|
||||||
AdsInsights.Field.cpc,
|
|
||||||
AdsInsights.Field.cpm,
|
|
||||||
AdsInsights.Field.reach,
|
|
||||||
AdsInsights.Field.actions,
|
|
||||||
AdsInsights.Field.date_start,
|
|
||||||
AdsInsights.Field.date_stop,
|
|
||||||
]
|
|
||||||
|
|
||||||
schema_fields = TABLE_SCHEMAS["adset_insights"]
|
extracted_fields = set(extracted_fields_by_method[method_name])
|
||||||
for field in fields:
|
table_cols = schema_columns.get(table_name, set())
|
||||||
field_name = get_field_value(field)
|
assert table_cols, f"Table {table_name} not found in schema"
|
||||||
assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS"
|
|
||||||
db_column = FIELD_MAPPINGS[field_name]
|
|
||||||
|
|
||||||
# Skip reference checks for ID/name fields
|
# Remove ID/name fields (stored in metadata tables, not insights table)
|
||||||
if "referenced_in" not in db_column:
|
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
|
||||||
assert db_column in schema_fields, \
|
|
||||||
f"Adset insights field '{field_name}' (DB: '{db_column}') not in schema"
|
|
||||||
|
|
||||||
def test_campaign_insights_by_country_fields(self):
|
missing = insight_only_fields - table_cols
|
||||||
"""Test that campaign insights by country fields exist in schema."""
|
assert not missing, \
|
||||||
fields = [
|
f"{table_name} table missing columns: {missing}\n" \
|
||||||
AdsInsights.Field.campaign_id,
|
f"Method requests: {sorted(extracted_fields)}\n" \
|
||||||
AdsInsights.Field.campaign_name,
|
f"Available: {sorted(table_cols)}"
|
||||||
AdsInsights.Field.impressions,
|
|
||||||
AdsInsights.Field.clicks,
|
|
||||||
AdsInsights.Field.spend,
|
|
||||||
AdsInsights.Field.ctr,
|
|
||||||
AdsInsights.Field.cpc,
|
|
||||||
AdsInsights.Field.cpm,
|
|
||||||
AdsInsights.Field.reach,
|
|
||||||
AdsInsights.Field.actions,
|
|
||||||
AdsInsights.Field.date_start,
|
|
||||||
AdsInsights.Field.date_stop,
|
|
||||||
]
|
|
||||||
|
|
||||||
schema_fields = TABLE_SCHEMAS["campaign_insights_by_country"]
|
print(f"✓ {method_name} → {table_name}: {len(extracted_fields)} fields validated")
|
||||||
for field in fields:
|
|
||||||
field_name = get_field_value(field)
|
|
||||||
assert field_name in FIELD_MAPPINGS, f"Field '{field_name}' not in FIELD_MAPPINGS"
|
|
||||||
db_column = FIELD_MAPPINGS[field_name]
|
|
||||||
|
|
||||||
# Skip reference checks for ID/name fields
|
def test_grab_adset_insights_fields(self, schema_columns, extracted_fields_by_method):
|
||||||
if "referenced_in" not in db_column:
|
"""Test that grab_adset_insights fields exist in schema."""
|
||||||
assert db_column in schema_fields, \
|
method_name = 'grab_adset_insights'
|
||||||
f"Campaign by country insights field '{field_name}' (DB: '{db_column}') not in schema"
|
table_name = METHOD_TO_TABLE[method_name]
|
||||||
|
|
||||||
# Country breakdown field
|
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
|
||||||
assert "country" in schema_fields, "Country field missing in campaign_insights_by_country schema"
|
|
||||||
|
|
||||||
def test_common_fields_consistency(self):
|
extracted_fields = set(extracted_fields_by_method[method_name])
|
||||||
"""Test that common_fields are consistent across all methods."""
|
table_cols = schema_columns.get(table_name, set())
|
||||||
from meta_api_grabber.scheduled_grabber import common_fields
|
assert table_cols, f"Table {table_name} not found in schema"
|
||||||
|
|
||||||
# Verify common_fields is defined and contains expected metrics
|
# Remove ID/name fields (stored in metadata tables, not insights table)
|
||||||
expected_metrics = {
|
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
|
||||||
"impressions", "clicks", "spend", "cpc", "cpm", "ctr", "cpp",
|
|
||||||
"reach", "frequency", "actions", "cost_per_action_type",
|
|
||||||
"date_start", "date_stop"
|
|
||||||
}
|
|
||||||
|
|
||||||
common_field_names = {get_field_value(f) for f in common_fields}
|
missing = insight_only_fields - table_cols
|
||||||
|
assert not missing, \
|
||||||
|
f"{table_name} table missing columns: {missing}\n" \
|
||||||
|
f"Method requests: {sorted(extracted_fields)}\n" \
|
||||||
|
f"Available: {sorted(table_cols)}"
|
||||||
|
|
||||||
for metric in expected_metrics:
|
print(f"✓ {method_name} → {table_name}: {len(extracted_fields)} fields validated")
|
||||||
assert metric in common_field_names, \
|
|
||||||
f"Common metric '{metric}' not found in common_fields"
|
|
||||||
|
|
||||||
def test_all_table_schemas_valid(self):
|
def test_grab_campaign_insights_by_country_fields(self, schema_columns, extracted_fields_by_method):
|
||||||
"""Test that all table schemas are properly defined."""
|
"""Test that grab_campaign_insights_by_country fields exist in schema."""
|
||||||
|
method_name = 'grab_campaign_insights_by_country'
|
||||||
|
table_name = METHOD_TO_TABLE[method_name]
|
||||||
|
|
||||||
|
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
|
||||||
|
|
||||||
|
extracted_fields = set(extracted_fields_by_method[method_name])
|
||||||
|
table_cols = schema_columns.get(table_name, set())
|
||||||
|
assert table_cols, f"Table {table_name} not found in schema"
|
||||||
|
|
||||||
|
# Remove ID/name fields (stored in metadata tables, not insights table)
|
||||||
|
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
|
||||||
|
|
||||||
|
# Country is special - it's part of the breakdown
|
||||||
|
assert "country" in table_cols, \
|
||||||
|
f"country field missing in {table_name} table\n" \
|
||||||
|
f"Available: {sorted(table_cols)}"
|
||||||
|
|
||||||
|
missing = insight_only_fields - table_cols
|
||||||
|
assert not missing, \
|
||||||
|
f"{table_name} table missing columns: {missing}\n" \
|
||||||
|
f"Method requests: {sorted(extracted_fields)}\n" \
|
||||||
|
f"Available: {sorted(table_cols)}"
|
||||||
|
|
||||||
|
print(f"✓ {method_name} → {table_name}: {len(extracted_fields)} fields validated")
|
||||||
|
|
||||||
|
def test_all_tables_exist(self, schema_columns):
|
||||||
|
"""Test that all required insight tables exist in schema."""
|
||||||
required_tables = {
|
required_tables = {
|
||||||
"account_insights",
|
"account_insights",
|
||||||
"campaign_insights",
|
"campaign_insights",
|
||||||
"adset_insights",
|
"adset_insights",
|
||||||
"campaign_insights_by_country"
|
"campaign_insights_by_country",
|
||||||
}
|
}
|
||||||
|
|
||||||
for table in required_tables:
|
existing_tables = set(schema_columns.keys())
|
||||||
assert table in TABLE_SCHEMAS, f"Table '{table}' not defined in TABLE_SCHEMAS"
|
missing = required_tables - existing_tables
|
||||||
assert len(TABLE_SCHEMAS[table]) > 0, f"Table '{table}' has no fields defined"
|
|
||||||
|
|
||||||
|
assert not missing, \
|
||||||
|
f"Missing tables: {missing}\n" \
|
||||||
|
f"Found: {sorted(existing_tables)}"
|
||||||
|
|
||||||
class TestSchemaDocumentation:
|
def test_schema_documentation(self, schema_columns):
|
||||||
"""Document the expected schema structure for reference."""
|
"""Print out the parsed schema for verification."""
|
||||||
|
|
||||||
def test_schema_documentation(self):
|
|
||||||
"""Print out the schema for verification purposes."""
|
|
||||||
print("\n" + "="*80)
|
print("\n" + "="*80)
|
||||||
print("DATABASE SCHEMA DOCUMENTATION")
|
print("PARSED DATABASE SCHEMA")
|
||||||
print("="*80)
|
print("="*80)
|
||||||
|
|
||||||
for table, fields in TABLE_SCHEMAS.items():
|
for table_name in sorted(schema_columns.keys()):
|
||||||
print(f"\nTable: {table}")
|
columns = sorted(schema_columns[table_name])
|
||||||
print(f"Columns: {sorted(fields)}")
|
print(f"\nTable: {table_name}")
|
||||||
print(f"Total columns: {len(fields)}")
|
print(f"Columns ({len(columns)}): {', '.join(columns)}")
|
||||||
|
|
||||||
|
def test_extracted_fields_documentation(self, extracted_fields_by_method):
|
||||||
|
"""Print out extracted fields from each method."""
|
||||||
|
print("\n" + "="*80)
|
||||||
|
print("EXTRACTED FIELDS FROM GRAB METHODS")
|
||||||
|
print("="*80)
|
||||||
|
|
||||||
|
for method_name, fields in sorted(extracted_fields_by_method.items()):
|
||||||
|
print(f"\n{method_name}:")
|
||||||
|
print(f" Fields ({len(fields)}): {', '.join(sorted(set(fields)))}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user