Compare commits

..

12 Commits

Author SHA1 Message Date
a1d9a19d04 Updated view queries 2025-11-12 12:47:31 +00:00
Jonas Linter
c5fa92c4ec Fixed stuff 2025-11-10 13:07:08 +01:00
Jonas Linter
a92c5b699f But no account_currency 2025-11-10 11:49:00 +01:00
Jonas Linter
5f83ecd7ee Fix field schema validation test and update database schema
- Fixed field extraction logic in test_field_schema_validation.py to properly parse methods with docstrings
  - Previous regex was too greedy and matched across multiple method definitions
  - Now uses proper parenthesis and docstring matching to isolate method bodies
  - Correctly handles both 'fields = [...]' and 'fields = common_fields + [...]' patterns

- Updated db_schema.sql to include missing columns:
  - campaign_insights: added frequency, cpp, cost_per_action_type columns
  - adset_insights: added account_currency column
  - campaign_insights_by_country: added frequency, cpp, cost_per_action_type columns

- All field schema validation tests now pass
  - Test dynamically extracts fields from scheduled_grabber.py source code
  - Compares against actual database schema from db_schema.sql
  - Properly filters metadata-only fields (campaign_id, campaign_name, etc.)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-10 11:48:17 +01:00
Jonas Linter
511f381ff2 Simplified insight grabbers 2025-11-10 11:27:54 +01:00
Jonas Linter
03ae7ea61a No need to include the field. Its auto included from the breakdown 2025-11-10 11:06:59 +01:00
Jonas Linter
987553ef74 Again. 2025-11-10 10:50:44 +01:00
Jonas Linter
8f4753ff20 Should work now 2025-11-10 10:43:04 +01:00
Jonas Linter
3ae4ce0d83 Added country level statistics on the campaign level 2025-11-10 10:26:49 +01:00
e577945e75 Probably fixed initial view setup. Needs create drop then recreate 2025-11-05 21:05:36 +00:00
68223f664a Managing views in meta_api_grabber now 2025-11-05 15:43:01 +00:00
750ff0d4ff Archive view creation code 2025-11-05 15:26:08 +00:00
24 changed files with 1667 additions and 295 deletions

View File

@@ -1,2 +1,4 @@
Uv managed python project that grabs data from the meta api and saves it in a timescaledb database.
Always use uv run to execute python related stuff

View File

@@ -15,6 +15,12 @@ dependencies = [
"sqlalchemy[asyncio]>=2.0.44",
]
[project.optional-dependencies]
test = [
"pytest>=8.0.0",
"pytest-asyncio>=0.25.0",
]
[project.scripts]
meta-auth = "meta_api_grabber.auth:main"
meta-scheduled = "meta_api_grabber.scheduled_grabber:main"

View File

@@ -310,7 +310,6 @@ class TimescaleDBClient:
account_id: str,
data: Dict[str, Any],
date_preset: str = "today",
cache_metadata: bool = True,
):
"""
Insert campaign-level insights data.
@@ -321,10 +320,9 @@ class TimescaleDBClient:
account_id: Ad account ID
data: Insights data dictionary from Meta API
date_preset: Date preset used
cache_metadata: If True, automatically cache campaign metadata from insights data
"""
# Cache campaign metadata if requested and available in the insights data
if cache_metadata and data.get("campaign_name"):
# Auto-cache campaign metadata if available in the insights data
if data.get("campaign_name"):
await self.upsert_campaign(
campaign_id=campaign_id,
account_id=account_id,
@@ -389,7 +387,6 @@ class TimescaleDBClient:
account_id: str,
data: Dict[str, Any],
date_preset: str = "today",
cache_metadata: bool = True,
):
"""
Insert ad set level insights data.
@@ -401,21 +398,18 @@ class TimescaleDBClient:
account_id: Ad account ID
data: Insights data dictionary from Meta API
date_preset: Date preset used
cache_metadata: If True, automatically cache adset/campaign metadata from insights data
"""
# Cache metadata if requested and available in the insights data
if cache_metadata:
# Cache adset metadata if available
# Note: Campaign should already exist from cache_campaigns_metadata or grab_campaign_insights
# If it doesn't exist, the foreign key constraint will fail with a clear error
# This is intentional - we should never silently create campaigns with 'Unknown' names
if data.get("adset_name"):
await self.upsert_adset(
adset_id=adset_id,
campaign_id=campaign_id,
adset_name=data["adset_name"],
status=None, # Not available in insights response
)
# Auto-cache adset metadata if available in the insights data
# Note: Campaign should already exist from cache_campaigns_metadata or grab_campaign_insights
# If it doesn't exist, the foreign key constraint will fail with a clear error
# This is intentional - we should never silently create campaigns with 'Unknown' names
if data.get("adset_name"):
await self.upsert_adset(
adset_id=adset_id,
campaign_id=campaign_id,
adset_name=data["adset_name"],
status=None, # Not available in insights response
)
query = """
INSERT INTO adset_insights (
@@ -465,6 +459,74 @@ class TimescaleDBClient:
ctr, cpc, cpm, actions, date_preset, date_start, date_stop
)
async def insert_campaign_insights_by_country(
self,
time: datetime,
campaign_id: str,
account_id: str,
country: str,
data: Dict[str, Any],
date_preset: str = "today",
):
"""
Insert campaign-level insights data broken down by country.
Args:
time: Timestamp for the data point
campaign_id: Campaign ID
account_id: Ad account ID
country: ISO 2-letter country code
data: Insights data dictionary from Meta API
date_preset: Date preset used
"""
query = """
INSERT INTO campaign_insights_by_country (
time, campaign_id, account_id, country, impressions, clicks, spend, reach,
ctr, cpc, cpm, actions, date_preset, date_start, date_stop, fetched_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW())
ON CONFLICT (time, campaign_id, country)
DO UPDATE SET
impressions = EXCLUDED.impressions,
clicks = EXCLUDED.clicks,
spend = EXCLUDED.spend,
reach = EXCLUDED.reach,
ctr = EXCLUDED.ctr,
cpc = EXCLUDED.cpc,
cpm = EXCLUDED.cpm,
actions = EXCLUDED.actions,
date_preset = EXCLUDED.date_preset,
date_start = EXCLUDED.date_start,
date_stop = EXCLUDED.date_stop,
fetched_at = NOW()
"""
impressions = int(data.get("impressions", 0)) if data.get("impressions") else None
clicks = int(data.get("clicks", 0)) if data.get("clicks") else None
spend = float(data.get("spend", 0)) if data.get("spend") else None
reach = int(data.get("reach", 0)) if data.get("reach") else None
ctr = float(data.get("ctr", 0)) if data.get("ctr") else None
cpc = float(data.get("cpc", 0)) if data.get("cpc") else None
cpm = float(data.get("cpm", 0)) if data.get("cpm") else None
# Extract date range from Meta API response and convert to date objects
from datetime import date as Date
date_start = None
date_stop = None
if data.get("date_start"):
date_start = Date.fromisoformat(data["date_start"])
if data.get("date_stop"):
date_stop = Date.fromisoformat(data["date_stop"])
import json
actions = json.dumps(data.get("actions", [])) if data.get("actions") else None
async with self.pool.acquire() as conn:
await conn.execute(
query,
time, campaign_id, account_id, country, impressions, clicks, spend, reach,
ctr, cpc, cpm, actions, date_preset, date_start, date_stop
)
# ========================================================================
# QUERY HELPERS
# ========================================================================

View File

@@ -14,10 +14,17 @@ ALTER TABLE IF EXISTS account_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS date_start DATE;
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS frequency NUMERIC(10, 4);
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS cpp NUMERIC(10, 4);
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS cost_per_action_type JSONB;
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS date_start DATE;
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS frequency NUMERIC(10, 4);
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS cpp NUMERIC(10, 4);
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS cost_per_action_type JSONB;
-- ============================================================================
-- METADATA TABLES (Regular PostgreSQL tables for caching)
-- ============================================================================
@@ -115,14 +122,17 @@ CREATE TABLE IF NOT EXISTS campaign_insights (
clicks BIGINT,
spend NUMERIC(12, 2),
reach BIGINT,
frequency NUMERIC(10, 4),
-- Calculated metrics
ctr NUMERIC(10, 6),
cpc NUMERIC(10, 4),
cpm NUMERIC(10, 4),
cpp NUMERIC(10, 4), -- Cost per reach
-- Actions
actions JSONB,
cost_per_action_type JSONB,
-- Metadata
date_preset VARCHAR(50),
@@ -163,6 +173,7 @@ CREATE TABLE IF NOT EXISTS adset_insights (
cpc NUMERIC(10, 4),
cpm NUMERIC(10, 4),
-- Actions
actions JSONB,
@@ -189,6 +200,57 @@ CREATE INDEX IF NOT EXISTS idx_adset_insights_account_time
ON adset_insights (account_id, time DESC);
-- Campaign-level insights by country (time-series data)
CREATE TABLE IF NOT EXISTS campaign_insights_by_country (
time TIMESTAMPTZ NOT NULL,
campaign_id VARCHAR(50) NOT NULL REFERENCES campaigns(campaign_id),
account_id VARCHAR(50) NOT NULL REFERENCES ad_accounts(account_id),
country VARCHAR(2) NOT NULL, -- ISO 2-letter country code
-- Core metrics
impressions BIGINT,
clicks BIGINT,
spend NUMERIC(12, 2),
reach BIGINT,
frequency NUMERIC(10, 4),
-- Calculated metrics
ctr NUMERIC(10, 6),
cpc NUMERIC(10, 4),
cpm NUMERIC(10, 4),
cpp NUMERIC(10, 4), -- Cost per reach
-- Actions
actions JSONB,
cost_per_action_type JSONB,
-- Metadata
date_preset VARCHAR(50),
date_start DATE, -- Actual start date of the data range from Meta API
date_stop DATE, -- Actual end date of the data range from Meta API
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (time, campaign_id, country)
);
-- Convert to hypertable
SELECT create_hypertable('campaign_insights_by_country', 'time',
if_not_exists => TRUE,
chunk_time_interval => INTERVAL '1 day'
);
CREATE INDEX IF NOT EXISTS idx_campaign_insights_by_country_campaign_time
ON campaign_insights_by_country (campaign_id, time DESC);
CREATE INDEX IF NOT EXISTS idx_campaign_insights_by_country_account_time
ON campaign_insights_by_country (account_id, time DESC);
CREATE INDEX IF NOT EXISTS idx_campaign_insights_by_country_country
ON campaign_insights_by_country (country, time DESC);
-- Compression policy for campaign_insights_by_country
SELECT add_compression_policy('campaign_insights_by_country', INTERVAL '7 days', if_not_exists => TRUE);
-- ============================================================================
-- CONTINUOUS AGGREGATES (Pre-computed rollups for dashboards)
-- ============================================================================

View File

@@ -20,11 +20,29 @@ from facebook_business.exceptions import FacebookRequestError
from .database import TimescaleDBClient
from .rate_limiter import MetaRateLimiter
from .token_manager import MetaTokenManager
from .view_manager import ViewManager
# Set up logger
logger = logging.getLogger(__name__)
common_fields = [
AdsInsights.Field.impressions,
AdsInsights.Field.clicks,
AdsInsights.Field.spend,
AdsInsights.Field.cpc,
AdsInsights.Field.cpm,
AdsInsights.Field.ctr,
AdsInsights.Field.cpp,
AdsInsights.Field.reach,
AdsInsights.Field.frequency,
AdsInsights.Field.actions,
AdsInsights.Field.cost_per_action_type,
AdsInsights.Field.date_start,
AdsInsights.Field.date_stop,
]
class ScheduledInsightsGrabber:
"""
Scheduled grabber for Meta ad insights with TimescaleDB storage.
@@ -84,6 +102,9 @@ class ScheduledInsightsGrabber:
# Database client
self.db: Optional[TimescaleDBClient] = None
# View manager for materialized views
self.view_manager: Optional[ViewManager] = None
# Rate limiter with backoff (Meta best practices)
self.rate_limiter = MetaRateLimiter(
base_delay=2.0, # 2 seconds base delay
@@ -406,6 +427,131 @@ class ScheduledInsightsGrabber:
print(f" {count} ad sets cached for {account_id}")
async def _master_grab_insights(
self,
account_id: str,
fields: list,
level: str,
db_insert_func,
date_preset: Optional[str] = None,
start_date: Optional[date] = None,
end_date: Optional[date] = None,
breakdowns: Optional[list] = None,
limit: Optional[int] = None,
required_fields: Optional[dict] = None,
extra_data_processor=None,
) -> tuple[int, Optional[date]]:
"""
Master method to grab and store insights at any level.
Args:
account_id: Ad account ID
fields: List of AdsInsights fields to retrieve
level: Insights level ("account", "campaign", "adset", etc.)
db_insert_func: Database insert function to call for each insight
date_preset: Meta date preset (e.g., "today", "yesterday"). Use either this or start_date/end_date
start_date: Start date for custom date range (optional)
end_date: End date for custom date range (optional)
breakdowns: List of breakdown fields (optional)
limit: Maximum number of results (optional)
required_fields: Dict of field_name -> label for validation before insert
extra_data_processor: Optional callable to process/add extra data to insight_dict
Returns:
Tuple of (count of records stored, date_start from insights)
"""
# Build params
params = {"level": level}
if date_preset:
params["date_preset"] = date_preset
date_preset_for_db = date_preset
else:
# Use time_range for custom date ranges
params["time_range"] = {
"since": start_date.isoformat(),
"until": end_date.isoformat(),
}
params["time_increment"] = 1 # Daily breakdown
date_preset_for_db = "custom"
if breakdowns:
params["breakdowns"] = breakdowns
if limit:
params["limit"] = limit
# Fetch insights from Meta API
ad_account = AdAccount(account_id)
try:
insights = await self._rate_limited_request(
ad_account.get_insights,
fields=fields,
params=params,
)
except FacebookRequestError as e:
error_code = e.api_error_code()
if error_code in [190, 102]: # Invalid OAuth token errors
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
raise
# Get account timezone from database
account_timezone = await self._get_account_timezone(account_id)
# Store insights
count = 0
date_start_value = None
for insight in insights:
insight_dict = dict(insight)
# Extract date_start if available (for return value)
date_start_str = insight_dict.get("date_start")
if date_start_str and date_start_value is None:
date_start_value = date.fromisoformat(date_start_str)
# Check required fields before processing
if required_fields:
skip = False
for field_name, field_label in required_fields.items():
if not insight_dict.get(field_name):
skip = True
break
if skip:
continue
# Call extra processor if provided
if extra_data_processor:
extra_data_processor(insight_dict)
# Compute appropriate timestamp based on date_start and account timezone
timestamp = self._compute_timestamp(date_start_str, account_timezone)
# Build kwargs for insert function based on level
kwargs = {
"time": timestamp,
"account_id": account_id,
"data": insight_dict,
"date_preset": date_preset_for_db,
}
# Add level-specific parameters
if level == "campaign":
kwargs["campaign_id"] = insight_dict.get("campaign_id")
elif level == "adset":
kwargs["adset_id"] = insight_dict.get("adset_id")
kwargs["campaign_id"] = insight_dict.get("campaign_id")
# Add country for breakdown queries
if "country" in insight_dict:
kwargs["country"] = insight_dict.get("country")
# Call the appropriate database insert function with level-specific parameters
await db_insert_func(**kwargs)
count += 1
return count, date_start_value
async def grab_account_insights(self, account_id: str, date_preset: str = "today") -> Optional[date]:
"""
Grab and store account-level insights.
@@ -433,50 +579,13 @@ class ScheduledInsightsGrabber:
AdsInsights.Field.date_stop,
]
params = {
"date_preset": date_preset,
"level": "account",
}
ad_account = AdAccount(account_id)
try:
insights = await self._rate_limited_request(
ad_account.get_insights,
fields=fields,
params=params,
)
except FacebookRequestError as e:
# Check if it's a token error
error_code = e.api_error_code()
if error_code in [190, 102]: # Invalid OAuth token errors
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
raise
# Get account timezone from database
account_timezone = await self._get_account_timezone(account_id)
# Store insights
count = 0
date_start_value = None
for insight in insights:
insight_dict = dict(insight)
# Extract date_start if available
date_start_str = insight_dict.get("date_start")
if date_start_str and date_start_value is None:
date_start_value = date.fromisoformat(date_start_str)
# Compute appropriate timestamp based on date_start and account timezone
timestamp = self._compute_timestamp(date_start_str, account_timezone)
await self.db.insert_account_insights(
time=timestamp,
account_id=account_id,
data=insight_dict,
date_preset=date_preset,
)
count += 1
count, date_start_value = await self._master_grab_insights(
account_id=account_id,
fields=fields,
level="account",
db_insert_func=self.db.insert_account_insights,
date_preset=date_preset,
)
print(f" Account insights stored for {account_id} ({count} records, date: {date_start_value})")
return date_start_value
@@ -490,64 +599,22 @@ class ScheduledInsightsGrabber:
date_preset: Meta date preset
limit: Maximum number of campaigns
"""
fields = [
fields = common_fields + [
AdsInsights.Field.campaign_id,
AdsInsights.Field.campaign_name,
AdsInsights.Field.impressions,
AdsInsights.Field.clicks,
AdsInsights.Field.spend,
AdsInsights.Field.ctr,
AdsInsights.Field.cpc,
AdsInsights.Field.cpm,
AdsInsights.Field.reach,
AdsInsights.Field.actions,
AdsInsights.Field.date_start,
AdsInsights.Field.date_stop,
]
params = {
"date_preset": date_preset,
"level": "campaign",
"limit": limit,
}
ad_account = AdAccount(account_id)
try:
insights = await self._rate_limited_request(
ad_account.get_insights,
fields=fields,
params=params,
)
except FacebookRequestError as e:
error_code = e.api_error_code()
if error_code in [190, 102]:
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
raise
# Get account timezone from database
account_timezone = await self._get_account_timezone(account_id)
# Store insights (metadata is automatically cached from insights data)
count = 0
for insight in insights:
campaign_id = insight.get('campaign_id')
if campaign_id:
insight_dict = dict(insight)
# Compute appropriate timestamp based on date_preset and account timezone
date_start_str = insight_dict.get("date_start")
timestamp = self._compute_timestamp(date_start_str, account_timezone)
# Insert insights - metadata is automatically cached from the insights data
await self.db.insert_campaign_insights(
time=timestamp,
campaign_id=campaign_id,
account_id=account_id,
data=insight_dict,
date_preset=date_preset,
cache_metadata=True, # Automatically cache campaign name from insights
)
count += 1
count, _ = await self._master_grab_insights(
account_id=account_id,
fields=fields,
level="campaign",
db_insert_func=self.db.insert_campaign_insights,
date_preset=date_preset,
limit=limit,
required_fields={"campaign_id": "campaign_id"},
)
print(f" Campaign insights stored for {account_id} ({count} records)")
@@ -576,54 +643,45 @@ class ScheduledInsightsGrabber:
AdsInsights.Field.date_stop,
]
params = {
"date_preset": date_preset,
"level": "adset",
"limit": limit,
}
ad_account = AdAccount(account_id)
try:
insights = await self._rate_limited_request(
ad_account.get_insights,
fields=fields,
params=params,
)
except FacebookRequestError as e:
error_code = e.api_error_code()
if error_code in [190, 102]:
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
raise
# Get account timezone from database
account_timezone = await self._get_account_timezone(account_id)
# Store insights (metadata is automatically cached from insights data)
count = 0
for insight in insights:
adset_id = insight.get('adset_id')
campaign_id = insight.get('campaign_id')
if adset_id and campaign_id:
insight_dict = dict(insight)
# Compute appropriate timestamp based on date_preset and account timezone
date_start_str = insight_dict.get("date_start")
timestamp = self._compute_timestamp(date_start_str, account_timezone)
# Insert insights - metadata is automatically cached from the insights data
await self.db.insert_adset_insights(
time=timestamp,
adset_id=adset_id,
campaign_id=campaign_id,
account_id=account_id,
data=insight_dict,
date_preset=date_preset,
cache_metadata=True, # Automatically cache adset/campaign from insights
)
count += 1
count, _ = await self._master_grab_insights(
account_id=account_id,
fields=fields,
level="adset",
db_insert_func=self.db.insert_adset_insights,
date_preset=date_preset,
limit=limit,
required_fields={"adset_id": "adset_id", "campaign_id": "campaign_id"},
)
print(f" Ad set insights stored for {account_id} ({count} records)")
async def grab_campaign_insights_by_country(self, account_id: str, date_preset: str = "today", limit: int = 50):
"""
Grab and store campaign-level insights broken down by country.
Args:
account_id: Ad account ID
date_preset: Meta date preset
limit: Maximum number of campaigns
"""
fields = common_fields + [
AdsInsights.Field.campaign_id,
AdsInsights.Field.campaign_name,
]
count, _ = await self._master_grab_insights(
account_id=account_id,
fields=fields,
level="campaign",
db_insert_func=self.db.insert_campaign_insights_by_country,
date_preset=date_preset,
breakdowns=[AdsInsights.Breakdowns.country],
limit=limit,
required_fields={"campaign_id": "campaign_id", "country": "country"},
)
print(f" Campaign insights by country stored for {account_id} ({count} records)")
async def grab_account_insights_for_date_range(
self,
account_id: str,
@@ -657,48 +715,14 @@ class ScheduledInsightsGrabber:
AdsInsights.Field.date_stop,
]
# Use time_range instead of date_preset for custom date ranges
params = {
"time_range": {
"since": start_date.isoformat(),
"until": end_date.isoformat(),
},
"level": "account",
"time_increment": 1, # Daily breakdown
}
ad_account = AdAccount(account_id)
try:
insights = await self._rate_limited_request(
ad_account.get_insights,
fields=fields,
params=params,
)
except FacebookRequestError as e:
error_code = e.api_error_code()
if error_code in [190, 102]:
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
raise
# Get account timezone from database
account_timezone = await self._get_account_timezone(account_id)
# Store insights
count = 0
for insight in insights:
insight_dict = dict(insight)
# Compute appropriate timestamp based on date_start and account timezone
date_start_str = insight_dict.get("date_start")
timestamp = self._compute_timestamp(date_start_str, account_timezone)
await self.db.insert_account_insights(
time=timestamp,
account_id=account_id,
data=insight_dict,
date_preset="custom", # Indicate this was a custom date range
)
count += 1
count, _ = await self._master_grab_insights(
account_id=account_id,
fields=fields,
level="account",
db_insert_func=self.db.insert_account_insights,
start_date=start_date,
end_date=end_date,
)
return count
@@ -736,51 +760,16 @@ class ScheduledInsightsGrabber:
AdsInsights.Field.date_stop,
]
params = {
"time_range": {
"since": start_date.isoformat(),
"until": end_date.isoformat(),
},
"level": "campaign",
"time_increment": 1, # Daily breakdown
"limit": limit,
}
ad_account = AdAccount(account_id)
try:
insights = await self._rate_limited_request(
ad_account.get_insights,
fields=fields,
params=params,
)
except FacebookRequestError as e:
error_code = e.api_error_code()
if error_code in [190, 102]:
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
raise
# Get account timezone from database
account_timezone = await self._get_account_timezone(account_id)
# Store insights
count = 0
for insight in insights:
campaign_id = insight.get('campaign_id')
if campaign_id:
insight_dict = dict(insight)
date_start_str = insight_dict.get("date_start")
timestamp = self._compute_timestamp(date_start_str, account_timezone)
await self.db.insert_campaign_insights(
time=timestamp,
campaign_id=campaign_id,
account_id=account_id,
data=insight_dict,
date_preset="custom",
cache_metadata=True,
)
count += 1
count, _ = await self._master_grab_insights(
account_id=account_id,
fields=fields,
level="campaign",
db_insert_func=self.db.insert_campaign_insights,
start_date=start_date,
end_date=end_date,
limit=limit,
required_fields={"campaign_id": "campaign_id"},
)
return count
@@ -819,53 +808,16 @@ class ScheduledInsightsGrabber:
AdsInsights.Field.date_stop,
]
params = {
"time_range": {
"since": start_date.isoformat(),
"until": end_date.isoformat(),
},
"level": "adset",
"time_increment": 1, # Daily breakdown
"limit": limit,
}
ad_account = AdAccount(account_id)
try:
insights = await self._rate_limited_request(
ad_account.get_insights,
fields=fields,
params=params,
)
except FacebookRequestError as e:
error_code = e.api_error_code()
if error_code in [190, 102]:
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
raise
# Get account timezone from database
account_timezone = await self._get_account_timezone(account_id)
# Store insights
count = 0
for insight in insights:
adset_id = insight.get('adset_id')
campaign_id = insight.get('campaign_id')
if adset_id and campaign_id:
insight_dict = dict(insight)
date_start_str = insight_dict.get("date_start")
timestamp = self._compute_timestamp(date_start_str, account_timezone)
await self.db.insert_adset_insights(
time=timestamp,
adset_id=adset_id,
campaign_id=campaign_id,
account_id=account_id,
data=insight_dict,
date_preset="custom",
cache_metadata=True,
)
count += 1
count, _ = await self._master_grab_insights(
account_id=account_id,
fields=fields,
level="adset",
db_insert_func=self.db.insert_adset_insights,
start_date=start_date,
end_date=end_date,
limit=limit,
required_fields={"adset_id": "adset_id", "campaign_id": "campaign_id"},
)
return count
@@ -1032,6 +984,7 @@ class ScheduledInsightsGrabber:
print("Grabbing today's insights...")
date_start = await self.grab_account_insights(account_id, date_preset="today")
await self.grab_campaign_insights(account_id, date_preset="today", limit=50)
await self.grab_campaign_insights_by_country(account_id, date_preset="today", limit=50)
await self.grab_adset_insights(account_id, date_preset="today", limit=50)
# Track today's date from first account
@@ -1084,6 +1037,7 @@ class ScheduledInsightsGrabber:
print("Grabbing yesterday's insights...")
await self.grab_account_insights(account_id, date_preset="yesterday")
await self.grab_campaign_insights(account_id, date_preset="yesterday", limit=50)
await self.grab_campaign_insights_by_country(account_id, date_preset="yesterday", limit=50)
await self.grab_adset_insights(account_id, date_preset="yesterday", limit=50)
print(f"✓ Completed yesterday's data for {account_id}")
@@ -1135,6 +1089,17 @@ class ScheduledInsightsGrabber:
print("\n" + "-" * 60)
self.rate_limiter.print_stats()
# Refresh materialized views after new data has been inserted
if self.view_manager:
print("\n" + "-" * 60)
print("Refreshing materialized views...")
try:
await self.view_manager.refresh_all_views()
print("✓ Materialized views refreshed successfully")
except Exception as e:
print(f"⚠️ Warning: Failed to refresh materialized views: {e}")
# Don't fail the entire cycle, just log the warning
print("\n" + "="*60)
print("COLLECTION CYCLE COMPLETE")
print("="*60 + "\n")
@@ -1164,6 +1129,10 @@ class ScheduledInsightsGrabber:
# Initialize database schema (idempotent - safe to run multiple times)
await self.db.initialize_schema()
# Initialize view manager and create/ensure views exist
self.view_manager = ViewManager(self.db.pool)
await self.view_manager.initialize_views()
# Load all accessible ad accounts
await self.load_ad_accounts()

View File

@@ -0,0 +1,118 @@
"""
View manager for TimescaleDB materialized views.
Handles creation, updates, and refresh of materialized views for flattened insights data.
Views are loaded from individual SQL files in the views directory.
"""
import logging
import pathlib
from typing import List, Optional
import asyncpg
logger = logging.getLogger(__name__)
class ViewManager:
"""Manages materialized views for insights data flattening."""
def __init__(self, pool: asyncpg.Pool):
"""
Initialize view manager with a database connection pool.
Args:
pool: asyncpg connection pool
"""
self.pool = pool
self.views_dir = pathlib.Path(__file__).parent / "views"
async def initialize_views(self) -> None:
"""
Initialize all materialized views at startup.
Loads and executes SQL files from the views directory in alphabetical order.
Creates views if they don't exist, idempotent operation.
"""
logger.info("Initializing materialized views...")
if not self.views_dir.exists():
logger.warning(f"Views directory not found at {self.views_dir}")
return
# Get all .sql files in alphabetical order for consistent execution
view_files = sorted(self.views_dir.glob("*.sql"))
if not view_files:
logger.warning(f"No SQL files found in {self.views_dir}")
return
async with self.pool.acquire() as conn:
for view_file in view_files:
logger.debug(f"Loading view file: {view_file.name}")
await self._execute_view_file(conn, view_file)
logger.info("✓ Materialized views initialized successfully")
async def _execute_view_file(self, conn: asyncpg.Connection, view_file: pathlib.Path) -> None:
"""
Execute SQL statements from a view file.
Args:
conn: asyncpg connection
view_file: Path to SQL file
"""
with open(view_file, 'r') as f:
view_sql = f.read()
statements = [s.strip() for s in view_sql.split(';') if s.strip()]
for i, stmt in enumerate(statements, 1):
if not stmt:
continue
try:
await conn.execute(stmt)
logger.debug(f"{view_file.name}: Executed statement {i}")
except Exception as e:
error_msg = str(e).lower()
if "does not exist" in error_msg:
# Could be a missing dependent view or table, log it
logger.debug(f"{view_file.name}: View or table does not exist (statement {i})")
else:
# Log other errors but don't fail - could be incompatible schema changes
logger.warning(f"{view_file.name}: Error in statement {i}: {e}")
async def refresh_views(self, view_names: Optional[List[str]] = None) -> None:
"""
Refresh specified materialized views.
Args:
view_names: List of view names to refresh. If None, refreshes all views.
"""
if view_names is None:
view_names = [
"adset_insights_flattened",
"account_insights_flattened",
"campaign_insights_flattened",
"campaign_insights_by_country_flattened",
#"campaign_insights_by_device_flattened",
#"campaign_insights_by_gender_flattened",
]
async with self.pool.acquire() as conn:
for view_name in view_names:
try:
# Use CONCURRENTLY to avoid locking
await conn.execute(
f"REFRESH MATERIALIZED VIEW CONCURRENTLY {view_name};"
)
logger.debug(f"Refreshed materialized view: {view_name}")
except Exception as e:
error_msg = str(e).lower()
# View might not exist if not initialized, that's okay
if "does not exist" in error_msg:
logger.debug(f"View does not exist, skipping refresh: {view_name}")
else:
logger.warning(f"Error refreshing view {view_name}: {e}")
async def refresh_all_views(self) -> None:
"""Refresh all materialized views."""
await self.refresh_views()

View File

@@ -0,0 +1,25 @@
# View Permissions Setup
The scheduled grabber needs the `meta_user` to have permissions to create/drop/modify views.
## One-time Setup (run as superuser or database owner)
```sql
-- Give meta_user the ability to create views in the public schema
GRANT CREATE ON SCHEMA public TO meta_user;
-- Alternative: Make meta_user the owner of all views (if they already exist)
-- ALTER MATERIALIZED VIEW account_insights_flattened OWNER TO meta_user;
-- ALTER MATERIALIZED VIEW campaign_insights_flattened OWNER TO meta_user;
-- ALTER MATERIALIZED VIEW adset_insights_flattened OWNER TO meta_user;
```
Run these commands once as a superuser/database owner, then the scheduled grabber can manage views normally.
## Why This Is Needed
PostgreSQL materialized views must be owned by the user who created them. Since the scheduled grabber recreates views on startup (to apply schema changes), it needs permission to:
- `DROP MATERIALIZED VIEW` - remove old views
- `CREATE MATERIALIZED VIEW` - create new views
Without proper schema permissions, the `meta_user` cannot perform these operations.

View File

@@ -0,0 +1,44 @@
DROP MATERIALIZED VIEW IF EXISTS account_insights_flattened CASCADE;
CREATE MATERIALIZED VIEW account_insights_flattened AS
SELECT
time,
account_id,
impressions,
clicks,
spend,
reach,
frequency,
ctr,
cpc,
cpm,
cpp,
date_preset,
date_start,
date_stop,
fetched_at,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'link_click') AS link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'lead') AS lead,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(cost_per_action_type)
WHERE value->>'action_type' = 'link_click') AS cost_per_link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(cost_per_action_type)
WHERE value->>'action_type' = 'landing_page_view') AS cost_per_landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(cost_per_action_type)
WHERE value->>'action_type' = 'lead') AS cost_per_lead
FROM account_insights;
CREATE INDEX idx_account_insights_flat_date ON account_insights_flattened(date_start, date_stop);
CREATE UNIQUE INDEX idx_account_insights_flat_unique ON account_insights_flattened(time, account_id);
REFRESH MATERIALIZED VIEW CONCURRENTLY account_insights_flattened;

View File

@@ -0,0 +1,20 @@
--- account insights by gender
DROP VIEW IF EXISTS account_insights_by_device CASCADE;
CREATE VIEW account_insights_by_device AS
SELECT
time,
account_id,
device_platform,
SUM(impressions) AS impressions,
SUM(clicks) AS clicks,
SUM(spend) AS spend,
SUM(link_click) AS link_click,
SUM(landing_page_view) AS landing_page_view,
SUM(lead) AS lead
FROM campaign_insights_by_device_flattened
GROUP BY time, account_id, device_platform;

View File

@@ -0,0 +1,53 @@
DROP VIEW IF EXISTS account_insights_by_gender CASCADE;
CREATE VIEW account_insights_by_gender AS
SELECT
time,
account_id,
gender,
SUM(impressions) AS impressions,
SUM(clicks) AS clicks,
SUM(spend) AS spend,
SUM(link_click) AS link_click,
SUM(landing_page_view) AS landing_page_view,
SUM(lead) AS lead
FROM campaign_insights_by_gender
GROUP BY time, account_id, gender;
DROP VIEW IF EXISTS account_insights_by_age CASCADE;
CREATE VIEW account_insights_by_age AS
SELECT
time,
account_id,
age,
SUM(impressions) AS impressions,
SUM(clicks) AS clicks,
SUM(spend) AS spend,
SUM(link_click) AS link_click,
SUM(landing_page_view) AS landing_page_view,
SUM(lead) AS lead
FROM campaign_insights_by_age
GROUP BY time, account_id, age;
DROP VIEW IF EXISTS account_insights_by_gender_and_age CASCADE;
CREATE VIEW account_insights_by_gender_and_age AS
SELECT
time,
account_id,
gender,
age,
SUM(impressions) AS impressions,
SUM(clicks) AS clicks,
SUM(spend) AS spend,
SUM(link_click) AS link_click,
SUM(landing_page_view) AS landing_page_view,
SUM(lead) AS lead
FROM campaign_insights_by_gender_and_age
GROUP BY time, account_id, age, gender;

View File

@@ -0,0 +1,55 @@
DROP VIEW IF EXISTS g_account_insights CASCADE;
CREATE VIEW g_account_insights AS
SELECT
time,
account_id,
clicks,
impressions,
interactions,
cost_micros,
cost_micros / 1000000.0 as cost,
leads,
engagements,
customer_currency_code,
account_name,
-- CTR (Click-Through Rate)
(clicks::numeric / impressions_nz) * 100 as ctr,
-- CPM (Cost Per Mille) in micros and standard units
(cost_micros::numeric / impressions_nz) * 1000 as cpm_micros,
(cost_micros::numeric / impressions_nz) * 1000 / 1000000.0 as cpm,
-- CPC (Cost Per Click) in micros and standard units
cost_micros::numeric / clicks_nz as cpc_micros,
cost_micros::numeric / clicks_nz / 1000000.0 as cpc,
-- CPL (Cost Per Lead) in micros and standard units
cost_micros::numeric / leads_nz as cpl_micros,
cost_micros::numeric / leads_nz / 1000000.0 as cpl,
-- Conversion Rate
(leads::numeric / clicks_nz) * 100 as conversion_rate,
-- Engagement Rate
(engagements::numeric / impressions_nz) * 100 as engagement_rate
FROM (
SELECT
segments_date as time,
customer_id as account_id,
sum(metrics_clicks) as clicks,
sum(metrics_impressions) as impressions,
sum(metrics_interactions) as interactions,
sum(metrics_cost_micros) as cost_micros,
sum(metrics_conversions) as leads,
sum(metrics_engagements) as engagements,
customer_currency_code,
customer_descriptive_name as account_name,
-- Null-safe denominators
NULLIF(sum(metrics_clicks), 0) as clicks_nz,
NULLIF(sum(metrics_impressions), 0) as impressions_nz,
NULLIF(sum(metrics_conversions), 0) as leads_nz
FROM google.account_performance_report
GROUP BY account_id, time, customer_currency_code, account_name
) base;

View File

@@ -0,0 +1,38 @@
DROP MATERIALIZED VIEW IF EXISTS adset_insights_flattened CASCADE;
CREATE MATERIALIZED VIEW adset_insights_flattened AS
SELECT
time,
adset_id,
campaign_id,
account_id,
impressions,
clicks,
spend,
reach,
ctr,
cpc,
cpm,
date_preset,
date_start,
date_stop,
fetched_at,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'link_click') AS link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'lead') AS lead
FROM adset_insights;
-- Add indexes for common query patterns
CREATE INDEX idx_adset_insights_flat_campaign ON adset_insights_flattened(campaign_id);
CREATE INDEX idx_adset_insights_flat_date ON adset_insights_flattened(date_start, date_stop);
CREATE UNIQUE INDEX idx_adset_insights_flat_unique ON adset_insights_flattened(time, adset_id);
REFRESH MATERIALIZED VIEW CONCURRENTLY adset_insights_flattened;

View File

@@ -0,0 +1,33 @@
--- campaign insights
DROP MATERIALIZED VIEW IF EXISTS campaign_insights_flattened CASCADE;
CREATE MATERIALIZED VIEW campaign_insights_flattened AS
SELECT date_start AS "time",
concat('act_', account_id) AS account_id,
campaign_id,
impressions,
clicks,
spend,
reach,
ctr,
cpc,
cpm,
date_start,
date_stop,
( SELECT (jsonb_array_elements.value ->> 'value'::text)::numeric AS "numeric"
FROM jsonb_array_elements(customcampaign_insights.actions) jsonb_array_elements(value)
WHERE (jsonb_array_elements.value ->> 'action_type'::text) = 'link_click'::text) AS link_click,
( SELECT (jsonb_array_elements.value ->> 'value'::text)::numeric AS "numeric"
FROM jsonb_array_elements(customcampaign_insights.actions) jsonb_array_elements(value)
WHERE (jsonb_array_elements.value ->> 'action_type'::text) = 'landing_page_view'::text) AS landing_page_view,
( SELECT (jsonb_array_elements.value ->> 'value'::text)::numeric AS "numeric"
FROM jsonb_array_elements(customcampaign_insights.actions) jsonb_array_elements(value)
WHERE (jsonb_array_elements.value ->> 'action_type'::text) = 'lead'::text) AS lead
FROM meta.customcampaign_insights;
CREATE INDEX idx_campaign_insights_flat_date ON campaign_insights_flattened(date_start, date_stop);
CREATE UNIQUE INDEX idx_campaign_insights_flat_unique ON campaign_insights_flattened(time, campaign_id);
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_flattened;

View File

@@ -0,0 +1,36 @@
--- campaign insights by country
DROP MATERIALIZED VIEW IF EXISTS campaign_insights_by_country_flattened CASCADE;
CREATE MATERIALIZED VIEW campaign_insights_by_country_flattened AS
SELECT date_start AS "time",
concat('act_', account_id) AS account_id,
campaign_id,
country,
impressions,
clicks,
spend,
reach,
frequency,
ctr,
cpc,
cpm,
date_start,
date_stop,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'link_click') AS link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'lead') AS lead
FROM meta.custom_campaign_country;
CREATE INDEX idx_campaign_insights_by_country_flat_date ON campaign_insights_by_country_flattened(date_start, date_stop);
CREATE UNIQUE INDEX idx_campaign_insights_by_country_flat_unique ON campaign_insights_by_country_flattened(time, campaign_id, country);
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_by_country_flattened;

View File

@@ -0,0 +1,32 @@
--- campaign insights by device
DROP MATERIALIZED VIEW IF EXISTS campaign_insights_by_device_flattened CASCADE;
CREATE MATERIALIZED VIEW campaign_insights_by_device_flattened AS
SELECT date_start AS "time",
concat('act_', account_id) AS account_id,
campaign_id,
device_platform,
impressions,
clicks,
spend,
reach,
frequency,
date_start,
date_stop,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'link_click') AS link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'lead') AS lead
FROM meta.custom_campaign_device;
CREATE INDEX idx_campaign_insights_by_device_flat_date ON campaign_insights_by_device_flattened(date_start, date_stop);
CREATE UNIQUE INDEX idx_campaign_insights_by_device_flat_unique ON campaign_insights_by_device_flattened(time, campaign_id, device_platform);
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_by_device_flattened;

View File

@@ -0,0 +1,71 @@
--- campaign insights by country
DROP MATERIALIZED VIEW IF EXISTS campaign_insights_by_gender_and_age CASCADE;
CREATE MATERIALIZED VIEW campaign_insights_by_gender_and_age AS
SELECT date_start AS "time",
concat('act_', account_id) AS account_id,
campaign_id,
gender,
age,
impressions,
clicks,
spend,
reach,
frequency,
ctr,
cpc,
cpm,
date_start,
date_stop,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'link_click') AS link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'lead') AS lead
FROM meta.custom_campaign_gender;
CREATE INDEX idx_campaign_insights_by_gender_and_age_date ON campaign_insights_by_gender_and_age(date_start, date_stop);
CREATE UNIQUE INDEX idx_campaign_insights_by_gender_and_age_unique ON campaign_insights_by_gender_and_age(time, campaign_id, gender, age);
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_by_gender_and_age;
DROP VIEW IF EXISTS campaign_insights_by_gender CASCADE;
create view campaign_insights_by_gender as
Select time,
sum(clicks) as clicks,
sum(link_click) as link_click,
sum(lead) as lead,
sum(landing_page_view) as landing_page_view,
sum(spend) as spend,
sum(reach) as reach,
sum(impressions) as impressions,
gender,
campaign_id,
account_id
from campaign_insights_by_gender_and_age
group by time, gender, account_id, campaign_id, date_start, date_stop;
DROP VIEW IF EXISTS campaign_insights_by_age CASCADE;
create view campaign_insights_by_age as
Select time,
sum(clicks) as clicks,
sum(link_click) as link_click,
sum(lead) as lead,
sum(landing_page_view) as landing_page_view,
sum(spend) as spend,
sum(reach) as reach,
sum(impressions) as impressions,
age,
campaign_id,
account_id
from campaign_insights_by_gender_and_age
group by time, age, account_id, campaign_id, date_start, date_stop;

View File

@@ -0,0 +1,11 @@
-- Grant SELECT permissions for Grafana user on flattened views
GRANT SELECT ON account_insights_flattened TO grafana;
GRANT SELECT ON campaign_insights_flattened TO grafana;
GRANT SELECT ON adset_insights_flattened TO grafana;
-- Grant SELECT on all existing tables and views in the schema
GRANT SELECT ON ALL TABLES IN SCHEMA public TO grafana;
-- Grant SELECT on all future tables and views in the schema
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT ON TABLES TO grafana;

115
tests/README.md Normal file
View File

@@ -0,0 +1,115 @@
# Tests
This directory contains tests for the meta_api_grabber project.
## Running Tests
Install test dependencies:
```bash
uv sync --extra test
```
Run all tests:
```bash
uv run pytest
```
Run specific test file:
```bash
uv run pytest tests/test_field_schema_validation.py -v
```
Run with verbose output:
```bash
uv run pytest tests/test_field_schema_validation.py -v -s
```
## Test Files
### `test_field_schema_validation.py` (Integration Test)
This is a critical integration test that validates all fields requested by the grab_* methods in `scheduled_grabber.py` exist in the actual database schema.
**What it does:**
1. Parses `db_schema.sql` to extract actual table columns
2. Checks fields requested by each grab method:
- `grab_account_insights()``account_insights` table
- `grab_campaign_insights()``campaign_insights` table
- `grab_adset_insights()``adset_insights` table
- `grab_campaign_insights_by_country()``campaign_insights_by_country` table
3. Verifies all requested fields exist in the corresponding database table
**Why this test is important:** When new fields are added to the Meta API field lists, this test quickly alerts you if the corresponding database columns need to be added. Since fields are only added (never removed), the test helps catch schema mismatches early.
**Test methods:**
- `test_account_insights_fields()` - Validates account-level insight fields
- `test_campaign_insights_fields()` - Validates campaign-level insight fields
- `test_adset_insights_fields()` - Validates ad set-level insight fields
- `test_campaign_insights_by_country_fields()` - Validates country breakdown fields
- `test_all_tables_exist()` - Ensures all required insight tables exist
- `test_schema_documentation()` - Prints out the parsed schema for reference
**Output example:**
```
Table: account_insights
Columns (17): account_id, actions, clicks, cost_per_action_type, cpc, cpm, cpp, ctr, ...
Table: campaign_insights
Columns (15): account_id, actions, campaign_id, clicks, cpc, cpm, ctr, ...
```
## Writing Tests
Use markers to categorize tests:
```python
@pytest.mark.unit
def test_something():
pass
@pytest.mark.integration
async def test_database_connection():
pass
```
Run only unit tests:
```bash
uv run pytest -m unit
```
Run everything except integration tests:
```bash
uv run pytest -m "not integration"
```
## Schema Validation Workflow
When you add new fields to a grab method:
1. **Add fields to `scheduled_grabber.py`:**
```python
fields = [
...
AdsInsights.Field.new_field, # New field added
]
```
2. **Run tests to see what's missing:**
```bash
uv run pytest tests/test_field_schema_validation.py -v -s
```
3. **Test output will show:**
```
adset_insights table missing columns: {'new_field'}
Available: [account_id, actions, adset_id, ...]
```
4. **Update `db_schema.sql` with the new column:**
```sql
ALTER TABLE adset_insights ADD COLUMN IF NOT EXISTS new_field TYPE;
```
5. **Run tests again to verify:**
```bash
uv run pytest tests/test_field_schema_validation.py -v
```

1
tests/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Tests for meta_api_grabber package."""

13
tests/conftest.py Normal file
View File

@@ -0,0 +1,13 @@
"""Pytest configuration and fixtures."""
import pytest
def pytest_configure(config):
"""Configure pytest."""
config.addinivalue_line(
"markers", "integration: marks tests as integration tests (deselect with '-m \"not integration\"')"
)
config.addinivalue_line(
"markers", "unit: marks tests as unit tests"
)

View File

@@ -0,0 +1,360 @@
"""
Integration test that validates all fields requested by grab_* methods exist in the database schema.
This test:
1. Parses the SQL schema file (db_schema.sql) to extract actual table columns
2. Reads scheduled_grabber.py to find which methods call which tables
3. Verifies that all requested fields exist in the actual database schema
"""
import re
import pathlib
from typing import Dict, Set, List
import pytest
def parse_sql_schema() -> Dict[str, Set[str]]:
"""
Parse db_schema.sql to extract table columns.
Returns:
Dictionary mapping table names to sets of column names
"""
schema_file = pathlib.Path(__file__).parent.parent / "src" / "meta_api_grabber" / "db_schema.sql"
if not schema_file.exists():
raise FileNotFoundError(f"Schema file not found: {schema_file}")
with open(schema_file, 'r') as f:
content = f.read()
tables = {}
# Parse CREATE TABLE statements
# Pattern: CREATE TABLE IF NOT EXISTS table_name (...)
create_table_pattern = r'CREATE TABLE IF NOT EXISTS (\w+)\s*\((.*?)\);'
for match in re.finditer(create_table_pattern, content, re.DOTALL):
table_name = match.group(1)
table_body = match.group(2)
# Extract column names (first word before space/comma)
# Pattern: column_name TYPE ...
column_pattern = r'^\s*(\w+)\s+\w+'
columns = set()
for line in table_body.split('\n'):
line = line.strip()
if not line or line.startswith('--') or line.startswith('PRIMARY') or line.startswith('FOREIGN') or line.startswith('CONSTRAINT'):
continue
col_match = re.match(column_pattern, line)
if col_match:
columns.add(col_match.group(1))
if columns:
tables[table_name] = columns
return tables
def get_field_name(field_str: str) -> str:
"""
Extract field name from AdsInsights.Field.xxx notation.
Example: 'impressions' from 'AdsInsights.Field.impressions'
"""
if '.' in field_str:
return field_str.split('.')[-1]
return field_str
def extract_fields_from_grabber_source() -> Dict[str, List[str]]:
"""
Extract field lists from grab_* methods by reading scheduled_grabber.py source.
Returns:
Dictionary mapping method names to lists of field names
"""
grabber_file = pathlib.Path(__file__).parent.parent / "src" / "meta_api_grabber" / "scheduled_grabber.py"
if not grabber_file.exists():
raise FileNotFoundError(f"scheduled_grabber.py not found: {grabber_file}")
with open(grabber_file, 'r') as f:
source = f.read()
methods_to_table = {
'grab_account_insights': 'account_insights',
'grab_campaign_insights': 'campaign_insights',
'grab_adset_insights': 'adset_insights',
'grab_campaign_insights_by_country': 'campaign_insights_by_country',
}
result = {}
for method_name in methods_to_table.keys():
# Find the method definition by looking for: async def method_name(...)
method_pattern = rf'async def {method_name}\s*\('
method_match = re.search(method_pattern, source)
if not method_match:
continue
# Get the position after the method name pattern
start_pos = method_match.end()
# Now find where the method body actually starts (after the closing paren and docstring)
# Skip to the opening paren
open_paren_pos = start_pos - 1
# Count parentheses to find the closing paren of the function signature
paren_count = 1
pos = open_paren_pos + 1
while pos < len(source) and paren_count > 0:
if source[pos] == '(':
paren_count += 1
elif source[pos] == ')':
paren_count -= 1
pos += 1
# Now pos is after the closing paren. Find the colon
colon_pos = source.find(':', pos)
# Skip past any docstring if present
after_colon = source[colon_pos + 1:colon_pos + 10].lstrip()
if after_colon.startswith('"""') or after_colon.startswith("'''"):
quote_type = '"""' if after_colon.startswith('"""') else "'''"
docstring_start = source.find(quote_type, colon_pos)
docstring_end = source.find(quote_type, docstring_start + 3) + 3
method_body_start = docstring_end
else:
method_body_start = colon_pos + 1
# Find the next method definition to know where this method ends
next_method_pattern = r'async def \w+\s*\('
next_match = re.search(next_method_pattern, source[method_body_start:])
if next_match:
method_body_end = method_body_start + next_match.start()
else:
# Last method - use rest of file
method_body_end = len(source)
method_body = source[method_body_start:method_body_end]
# Extract fields from the method body
# Look for: fields = [...] or fields = common_fields + [...]
# First check if this method uses common_fields
uses_common_fields = 'common_fields' in method_body[:500]
if uses_common_fields:
# Pattern: fields = common_fields + [...]
fields_pattern = r'fields\s*=\s*common_fields\s*\+\s*\[(.*?)\]'
fields_match = re.search(fields_pattern, method_body, re.DOTALL)
if fields_match:
fields_str = fields_match.group(1)
# Extract individual field names
field_pattern = r'AdsInsights\.Field\.(\w+)'
fields = re.findall(field_pattern, fields_str)
# Also get common_fields from the module level
common_pattern = r'common_fields\s*=\s*\[(.*?)\]'
common_match = re.search(common_pattern, source, re.DOTALL)
if common_match:
common_str = common_match.group(1)
common_fields_list = re.findall(field_pattern, common_str)
fields = common_fields_list + fields
result[method_name] = fields
else:
# Pattern: fields = [...]
# Use bracket matching to find the correct field list
fields_keyword_pos = method_body.find('fields =')
if fields_keyword_pos != -1:
# Find the opening bracket after fields =
bracket_pos = method_body.find('[', fields_keyword_pos)
if bracket_pos != -1:
# Count brackets to find the matching closing bracket
bracket_count = 0
end_pos = bracket_pos
for i, char in enumerate(method_body[bracket_pos:]):
if char == '[':
bracket_count += 1
elif char == ']':
bracket_count -= 1
if bracket_count == 0:
end_pos = bracket_pos + i
break
fields_str = method_body[bracket_pos + 1:end_pos]
field_pattern = r'AdsInsights\.Field\.(\w+)'
fields = re.findall(field_pattern, fields_str)
result[method_name] = fields
return result
@pytest.fixture(scope="module")
def schema_columns():
"""Parse and cache the schema columns."""
return parse_sql_schema()
@pytest.fixture(scope="module")
def extracted_fields_by_method():
"""Extract and cache the fields from each grab_* method."""
return extract_fields_from_grabber_source()
# Mapping of method names to their insight table names
METHOD_TO_TABLE = {
'grab_account_insights': 'account_insights',
'grab_campaign_insights': 'campaign_insights',
'grab_adset_insights': 'adset_insights',
'grab_campaign_insights_by_country': 'campaign_insights_by_country',
}
# Fields that are IDs/names stored in metadata tables, not in the insights table
METADATA_ONLY_FIELDS = {
'campaign_id', 'campaign_name',
'adset_id', 'adset_name',
}
class TestFieldSchemaValidation:
"""Validate that all API field requests have corresponding database columns."""
def test_grab_account_insights_fields(self, schema_columns, extracted_fields_by_method):
"""Test that grab_account_insights fields exist in schema."""
method_name = 'grab_account_insights'
table_name = METHOD_TO_TABLE[method_name]
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
extracted_fields = set(extracted_fields_by_method[method_name])
table_cols = schema_columns.get(table_name, set())
assert table_cols, f"Table {table_name} not found in schema"
missing = extracted_fields - table_cols
assert not missing, \
f"{table_name} table missing columns: {missing}\n" \
f"Method requests: {sorted(extracted_fields)}\n" \
f"Available: {sorted(table_cols)}"
print(f"{method_name}{table_name}: {len(extracted_fields)} fields validated")
def test_grab_campaign_insights_fields(self, schema_columns, extracted_fields_by_method):
"""Test that grab_campaign_insights fields exist in schema."""
method_name = 'grab_campaign_insights'
table_name = METHOD_TO_TABLE[method_name]
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
extracted_fields = set(extracted_fields_by_method[method_name])
table_cols = schema_columns.get(table_name, set())
assert table_cols, f"Table {table_name} not found in schema"
# Remove ID/name fields (stored in metadata tables, not insights table)
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
missing = insight_only_fields - table_cols
assert not missing, \
f"{table_name} table missing columns: {missing}\n" \
f"Method requests: {sorted(extracted_fields)}\n" \
f"Available: {sorted(table_cols)}"
print(f"{method_name}{table_name}: {len(extracted_fields)} fields validated")
def test_grab_adset_insights_fields(self, schema_columns, extracted_fields_by_method):
"""Test that grab_adset_insights fields exist in schema."""
method_name = 'grab_adset_insights'
table_name = METHOD_TO_TABLE[method_name]
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
extracted_fields = set(extracted_fields_by_method[method_name])
table_cols = schema_columns.get(table_name, set())
assert table_cols, f"Table {table_name} not found in schema"
# Remove ID/name fields (stored in metadata tables, not insights table)
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
missing = insight_only_fields - table_cols
assert not missing, \
f"{table_name} table missing columns: {missing}\n" \
f"Method requests: {sorted(extracted_fields)}\n" \
f"Available: {sorted(table_cols)}"
print(f"{method_name}{table_name}: {len(extracted_fields)} fields validated")
def test_grab_campaign_insights_by_country_fields(self, schema_columns, extracted_fields_by_method):
"""Test that grab_campaign_insights_by_country fields exist in schema."""
method_name = 'grab_campaign_insights_by_country'
table_name = METHOD_TO_TABLE[method_name]
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
extracted_fields = set(extracted_fields_by_method[method_name])
table_cols = schema_columns.get(table_name, set())
assert table_cols, f"Table {table_name} not found in schema"
# Remove ID/name fields (stored in metadata tables, not insights table)
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
# Country is special - it's part of the breakdown
assert "country" in table_cols, \
f"country field missing in {table_name} table\n" \
f"Available: {sorted(table_cols)}"
missing = insight_only_fields - table_cols
assert not missing, \
f"{table_name} table missing columns: {missing}\n" \
f"Method requests: {sorted(extracted_fields)}\n" \
f"Available: {sorted(table_cols)}"
print(f"{method_name}{table_name}: {len(extracted_fields)} fields validated")
def test_all_tables_exist(self, schema_columns):
"""Test that all required insight tables exist in schema."""
required_tables = {
"account_insights",
"campaign_insights",
"adset_insights",
"campaign_insights_by_country",
}
existing_tables = set(schema_columns.keys())
missing = required_tables - existing_tables
assert not missing, \
f"Missing tables: {missing}\n" \
f"Found: {sorted(existing_tables)}"
def test_schema_documentation(self, schema_columns):
"""Print out the parsed schema for verification."""
print("\n" + "="*80)
print("PARSED DATABASE SCHEMA")
print("="*80)
for table_name in sorted(schema_columns.keys()):
columns = sorted(schema_columns[table_name])
print(f"\nTable: {table_name}")
print(f"Columns ({len(columns)}): {', '.join(columns)}")
def test_extracted_fields_documentation(self, extracted_fields_by_method):
"""Print out extracted fields from each method."""
print("\n" + "="*80)
print("EXTRACTED FIELDS FROM GRAB METHODS")
print("="*80)
for method_name, fields in sorted(extracted_fields_by_method.items()):
print(f"\n{method_name}:")
print(f" Fields ({len(fields)}): {', '.join(sorted(set(fields)))}")
if __name__ == "__main__":
pytest.main([__file__, "-v"])

86
uv.lock generated
View File

@@ -193,6 +193,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
]
[[package]]
name = "colorama"
version = "0.4.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "curlify"
version = "3.0.0"
@@ -382,6 +391,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
{ url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
{ url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
{ url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
{ url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
{ url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
{ url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" },
{ url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" },
@@ -389,6 +400,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" },
{ url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" },
{ url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" },
{ url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" },
{ url = "https://files.pythonhosted.org/packages/0d/da/343cd760ab2f92bac1845ca07ee3faea9fe52bee65f7bcb19f16ad7de08b/greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681", size = 1680760, upload-time = "2025-11-04T12:42:25.341Z" },
{ url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
]
@@ -446,6 +459,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
]
[[package]]
name = "iniconfig"
version = "2.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
]
[[package]]
name = "mako"
version = "1.3.10"
@@ -525,6 +547,12 @@ dependencies = [
{ name = "sqlalchemy", extra = ["asyncio"] },
]
[package.optional-dependencies]
test = [
{ name = "pytest" },
{ name = "pytest-asyncio" },
]
[package.metadata]
requires-dist = [
{ name = "aiohttp", specifier = ">=3.13.1" },
@@ -532,10 +560,13 @@ requires-dist = [
{ name = "asyncpg", specifier = ">=0.30.0" },
{ name = "facebook-business", specifier = ">=23.0.3" },
{ name = "google-ads", specifier = ">=28.3.0" },
{ name = "pytest", marker = "extra == 'test'", specifier = ">=8.0.0" },
{ name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=0.25.0" },
{ name = "python-dotenv", specifier = ">=1.1.1" },
{ name = "requests-oauthlib", specifier = ">=2.0.0" },
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.44" },
]
provides-extras = ["test"]
[[package]]
name = "multidict"
@@ -627,6 +658,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" },
]
[[package]]
name = "packaging"
version = "25.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
]
[[package]]
name = "pluggy"
version = "1.6.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]]
name = "propcache"
version = "0.4.1"
@@ -753,6 +802,43 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b1/ec/1fb891d8a2660716aadb2143235481d15ed1cbfe3ad669194690b0604492/pycountry-24.6.1-py3-none-any.whl", hash = "sha256:f1a4fb391cd7214f8eefd39556d740adcc233c778a27f8942c8dca351d6ce06f", size = 6335189, upload-time = "2024-06-01T04:11:49.711Z" },
]
[[package]]
name = "pygments"
version = "2.19.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
]
[[package]]
name = "pytest"
version = "8.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
]
[[package]]
name = "pytest-asyncio"
version = "1.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
]
[[package]]
name = "python-dotenv"
version = "1.1.1"

View File

@@ -0,0 +1,150 @@
-- Auto refreshes when new entries get added. New things can be extracted from actions if necessary.
CREATE MATERIALIZED VIEW adset_insights_flattened AS
SELECT
time,
adset_id,
campaign_id,
account_id,
impressions,
clicks,
spend,
reach,
ctr,
cpc,
cpm,
date_preset,
date_start,
date_stop,
fetched_at,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'link_click') AS link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'lead') AS lead
FROM adset_insights;
-- Add indexes for common query patterns
CREATE INDEX idx_adset_insights_flat_campaign ON adset_insights_flattened(campaign_id);
CREATE INDEX idx_adset_insights_flat_date ON adset_insights_flattened(date_start, date_stop);
CREATE UNIQUE INDEX idx_adset_insights_flat_unique ON adset_insights_flattened(time, adset_id);
REFRESH MATERIALIZED VIEW CONCURRENTLY adset_insights_flattened;
--- same or atleast very similar for account_insights
CREATE MATERIALIZED VIEW account_insights_flattened AS
SELECT
time,
account_id,
impressions,
clicks,
spend,
reach,
frequency,
ctr,
cpc,
cpm,
cpp,
date_preset,
date_start,
date_stop,
fetched_at,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'link_click') AS link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'lead') AS lead,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(cost_per_action_type)
WHERE value->>'action_type' = 'link_click') AS cost_per_link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(cost_per_action_type)
WHERE value->>'action_type' = 'landing_page_view') AS cost_per_landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(cost_per_action_type)
WHERE value->>'action_type' = 'lead') AS cost_per_lead
FROM account_insights;
CREATE INDEX idx_account_insights_flat_date ON account_insights_flattened(date_start, date_stop);
CREATE UNIQUE INDEX idx_account_insights_flat_unique ON account_insights_flattened(time, account_id);
REFRESH MATERIALIZED VIEW CONCURRENTLY account_insights_flattened;
--- campaign insights
CREATE MATERIALIZED VIEW campaign_insights_flattened AS
SELECT
time,
account_id,
campaign_id,
impressions,
clicks,
spend,
reach,
ctr,
cpc,
cpm,
date_preset,
date_start,
date_stop,
fetched_at,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'link_click') AS link_click,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
(SELECT (value->>'value')::numeric
FROM jsonb_array_elements(actions)
WHERE value->>'action_type' = 'lead') AS lead
FROM campaign_insights;
CREATE INDEX idx_campaign_insights_flat_date ON campaign_insights_flattened(date_start, date_stop);
CREATE UNIQUE INDEX idx_campaign_insights_flat_unique ON campaign_insights_flattened(time, campaign_id);
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_flattened;
-- permissinos
-- Grant SELECT on the existing materialized view
GRANT SELECT ON account_insights_flattened TO grafana;
GRANT SELECT ON campaign_insights_flattened TO grafana;
GRANT SELECT ON adset_insights_flattened TO grafana;
-- Grant SELECT on all existing tables and views in the schema
GRANT SELECT ON ALL TABLES IN SCHEMA public TO grafana;
-- Grant SELECT on all future tables and views in the schema
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT ON TABLES TO grafana;

10
view_sql_archive/view.md Normal file
View File

@@ -0,0 +1,10 @@
To make handling the dashboard easier its good practice to create views that make the underlying data more accessible. Since data does not get updated that frequently we can also use materialized views to speed up query performance at the cost of storage.
## Schemas
public: Contains the data from the meta_api_grabber application. All ad accounts.
meta: Contains data from airbyte meta api connector. Unfortunatly somewhat bugged for insights on campaign and adset level. Aggregating data from the ads level is possible but much more cumbersome and error prone then querying the stuff directly. Thats why for now I'm continuing to use the meta_api_grabber
google: Will contain the data from google from the airbyte connector assuming we get access and the connector is good.