Compare commits
16 Commits
f962a1a83d
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| a1d9a19d04 | |||
|
|
c5fa92c4ec | ||
|
|
a92c5b699f | ||
|
|
5f83ecd7ee | ||
|
|
511f381ff2 | ||
|
|
03ae7ea61a | ||
|
|
987553ef74 | ||
|
|
8f4753ff20 | ||
|
|
3ae4ce0d83 | ||
| e577945e75 | |||
| 68223f664a | |||
| 750ff0d4ff | |||
|
|
630f541b4f | ||
| 9ff1ee31d0 | |||
| df6da80320 | |||
| d38bb5d4a8 |
@@ -1,2 +1,4 @@
|
||||
Uv managed python project that grabs data from the meta api and saves it in a timescaledb database.
|
||||
|
||||
Always use uv run to execute python related stuff
|
||||
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
Async data collection system for Meta's Marketing API with TimescaleDB time-series storage and dashboard support.
|
||||
|
||||
```
|
||||
docker build . -t gitea.99tales.net/jonas/meta_grabber:lastest
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
- **OAuth2 Authentication** - Automated token generation flow
|
||||
|
||||
@@ -15,6 +15,12 @@ dependencies = [
|
||||
"sqlalchemy[asyncio]>=2.0.44",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = [
|
||||
"pytest>=8.0.0",
|
||||
"pytest-asyncio>=0.25.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
meta-auth = "meta_api_grabber.auth:main"
|
||||
meta-scheduled = "meta_api_grabber.scheduled_grabber:main"
|
||||
|
||||
@@ -310,7 +310,6 @@ class TimescaleDBClient:
|
||||
account_id: str,
|
||||
data: Dict[str, Any],
|
||||
date_preset: str = "today",
|
||||
cache_metadata: bool = True,
|
||||
):
|
||||
"""
|
||||
Insert campaign-level insights data.
|
||||
@@ -321,10 +320,9 @@ class TimescaleDBClient:
|
||||
account_id: Ad account ID
|
||||
data: Insights data dictionary from Meta API
|
||||
date_preset: Date preset used
|
||||
cache_metadata: If True, automatically cache campaign metadata from insights data
|
||||
"""
|
||||
# Cache campaign metadata if requested and available in the insights data
|
||||
if cache_metadata and data.get("campaign_name"):
|
||||
# Auto-cache campaign metadata if available in the insights data
|
||||
if data.get("campaign_name"):
|
||||
await self.upsert_campaign(
|
||||
campaign_id=campaign_id,
|
||||
account_id=account_id,
|
||||
@@ -389,7 +387,6 @@ class TimescaleDBClient:
|
||||
account_id: str,
|
||||
data: Dict[str, Any],
|
||||
date_preset: str = "today",
|
||||
cache_metadata: bool = True,
|
||||
):
|
||||
"""
|
||||
Insert ad set level insights data.
|
||||
@@ -401,21 +398,18 @@ class TimescaleDBClient:
|
||||
account_id: Ad account ID
|
||||
data: Insights data dictionary from Meta API
|
||||
date_preset: Date preset used
|
||||
cache_metadata: If True, automatically cache adset/campaign metadata from insights data
|
||||
"""
|
||||
# Cache metadata if requested and available in the insights data
|
||||
if cache_metadata:
|
||||
# Cache adset metadata if available
|
||||
# Note: Campaign should already exist from cache_campaigns_metadata or grab_campaign_insights
|
||||
# If it doesn't exist, the foreign key constraint will fail with a clear error
|
||||
# This is intentional - we should never silently create campaigns with 'Unknown' names
|
||||
if data.get("adset_name"):
|
||||
await self.upsert_adset(
|
||||
adset_id=adset_id,
|
||||
campaign_id=campaign_id,
|
||||
adset_name=data["adset_name"],
|
||||
status=None, # Not available in insights response
|
||||
)
|
||||
# Auto-cache adset metadata if available in the insights data
|
||||
# Note: Campaign should already exist from cache_campaigns_metadata or grab_campaign_insights
|
||||
# If it doesn't exist, the foreign key constraint will fail with a clear error
|
||||
# This is intentional - we should never silently create campaigns with 'Unknown' names
|
||||
if data.get("adset_name"):
|
||||
await self.upsert_adset(
|
||||
adset_id=adset_id,
|
||||
campaign_id=campaign_id,
|
||||
adset_name=data["adset_name"],
|
||||
status=None, # Not available in insights response
|
||||
)
|
||||
|
||||
query = """
|
||||
INSERT INTO adset_insights (
|
||||
@@ -465,6 +459,74 @@ class TimescaleDBClient:
|
||||
ctr, cpc, cpm, actions, date_preset, date_start, date_stop
|
||||
)
|
||||
|
||||
async def insert_campaign_insights_by_country(
|
||||
self,
|
||||
time: datetime,
|
||||
campaign_id: str,
|
||||
account_id: str,
|
||||
country: str,
|
||||
data: Dict[str, Any],
|
||||
date_preset: str = "today",
|
||||
):
|
||||
"""
|
||||
Insert campaign-level insights data broken down by country.
|
||||
|
||||
Args:
|
||||
time: Timestamp for the data point
|
||||
campaign_id: Campaign ID
|
||||
account_id: Ad account ID
|
||||
country: ISO 2-letter country code
|
||||
data: Insights data dictionary from Meta API
|
||||
date_preset: Date preset used
|
||||
"""
|
||||
query = """
|
||||
INSERT INTO campaign_insights_by_country (
|
||||
time, campaign_id, account_id, country, impressions, clicks, spend, reach,
|
||||
ctr, cpc, cpm, actions, date_preset, date_start, date_stop, fetched_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW())
|
||||
ON CONFLICT (time, campaign_id, country)
|
||||
DO UPDATE SET
|
||||
impressions = EXCLUDED.impressions,
|
||||
clicks = EXCLUDED.clicks,
|
||||
spend = EXCLUDED.spend,
|
||||
reach = EXCLUDED.reach,
|
||||
ctr = EXCLUDED.ctr,
|
||||
cpc = EXCLUDED.cpc,
|
||||
cpm = EXCLUDED.cpm,
|
||||
actions = EXCLUDED.actions,
|
||||
date_preset = EXCLUDED.date_preset,
|
||||
date_start = EXCLUDED.date_start,
|
||||
date_stop = EXCLUDED.date_stop,
|
||||
fetched_at = NOW()
|
||||
"""
|
||||
|
||||
impressions = int(data.get("impressions", 0)) if data.get("impressions") else None
|
||||
clicks = int(data.get("clicks", 0)) if data.get("clicks") else None
|
||||
spend = float(data.get("spend", 0)) if data.get("spend") else None
|
||||
reach = int(data.get("reach", 0)) if data.get("reach") else None
|
||||
ctr = float(data.get("ctr", 0)) if data.get("ctr") else None
|
||||
cpc = float(data.get("cpc", 0)) if data.get("cpc") else None
|
||||
cpm = float(data.get("cpm", 0)) if data.get("cpm") else None
|
||||
|
||||
# Extract date range from Meta API response and convert to date objects
|
||||
from datetime import date as Date
|
||||
date_start = None
|
||||
date_stop = None
|
||||
if data.get("date_start"):
|
||||
date_start = Date.fromisoformat(data["date_start"])
|
||||
if data.get("date_stop"):
|
||||
date_stop = Date.fromisoformat(data["date_stop"])
|
||||
|
||||
import json
|
||||
actions = json.dumps(data.get("actions", [])) if data.get("actions") else None
|
||||
|
||||
async with self.pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
query,
|
||||
time, campaign_id, account_id, country, impressions, clicks, spend, reach,
|
||||
ctr, cpc, cpm, actions, date_preset, date_start, date_stop
|
||||
)
|
||||
|
||||
# ========================================================================
|
||||
# QUERY HELPERS
|
||||
# ========================================================================
|
||||
|
||||
@@ -14,10 +14,17 @@ ALTER TABLE IF EXISTS account_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
|
||||
|
||||
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS date_start DATE;
|
||||
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
|
||||
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS frequency NUMERIC(10, 4);
|
||||
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS cpp NUMERIC(10, 4);
|
||||
ALTER TABLE IF EXISTS campaign_insights ADD COLUMN IF NOT EXISTS cost_per_action_type JSONB;
|
||||
|
||||
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS date_start DATE;
|
||||
ALTER TABLE IF EXISTS adset_insights ADD COLUMN IF NOT EXISTS date_stop DATE;
|
||||
|
||||
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS frequency NUMERIC(10, 4);
|
||||
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS cpp NUMERIC(10, 4);
|
||||
ALTER TABLE IF EXISTS campaign_insights_by_country ADD COLUMN IF NOT EXISTS cost_per_action_type JSONB;
|
||||
|
||||
-- ============================================================================
|
||||
-- METADATA TABLES (Regular PostgreSQL tables for caching)
|
||||
-- ============================================================================
|
||||
@@ -115,14 +122,17 @@ CREATE TABLE IF NOT EXISTS campaign_insights (
|
||||
clicks BIGINT,
|
||||
spend NUMERIC(12, 2),
|
||||
reach BIGINT,
|
||||
frequency NUMERIC(10, 4),
|
||||
|
||||
-- Calculated metrics
|
||||
ctr NUMERIC(10, 6),
|
||||
cpc NUMERIC(10, 4),
|
||||
cpm NUMERIC(10, 4),
|
||||
cpp NUMERIC(10, 4), -- Cost per reach
|
||||
|
||||
-- Actions
|
||||
actions JSONB,
|
||||
cost_per_action_type JSONB,
|
||||
|
||||
-- Metadata
|
||||
date_preset VARCHAR(50),
|
||||
@@ -163,6 +173,7 @@ CREATE TABLE IF NOT EXISTS adset_insights (
|
||||
cpc NUMERIC(10, 4),
|
||||
cpm NUMERIC(10, 4),
|
||||
|
||||
|
||||
-- Actions
|
||||
actions JSONB,
|
||||
|
||||
@@ -189,6 +200,57 @@ CREATE INDEX IF NOT EXISTS idx_adset_insights_account_time
|
||||
ON adset_insights (account_id, time DESC);
|
||||
|
||||
|
||||
-- Campaign-level insights by country (time-series data)
|
||||
CREATE TABLE IF NOT EXISTS campaign_insights_by_country (
|
||||
time TIMESTAMPTZ NOT NULL,
|
||||
campaign_id VARCHAR(50) NOT NULL REFERENCES campaigns(campaign_id),
|
||||
account_id VARCHAR(50) NOT NULL REFERENCES ad_accounts(account_id),
|
||||
country VARCHAR(2) NOT NULL, -- ISO 2-letter country code
|
||||
|
||||
-- Core metrics
|
||||
impressions BIGINT,
|
||||
clicks BIGINT,
|
||||
spend NUMERIC(12, 2),
|
||||
reach BIGINT,
|
||||
frequency NUMERIC(10, 4),
|
||||
|
||||
-- Calculated metrics
|
||||
ctr NUMERIC(10, 6),
|
||||
cpc NUMERIC(10, 4),
|
||||
cpm NUMERIC(10, 4),
|
||||
cpp NUMERIC(10, 4), -- Cost per reach
|
||||
|
||||
-- Actions
|
||||
actions JSONB,
|
||||
cost_per_action_type JSONB,
|
||||
|
||||
-- Metadata
|
||||
date_preset VARCHAR(50),
|
||||
date_start DATE, -- Actual start date of the data range from Meta API
|
||||
date_stop DATE, -- Actual end date of the data range from Meta API
|
||||
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
PRIMARY KEY (time, campaign_id, country)
|
||||
);
|
||||
|
||||
-- Convert to hypertable
|
||||
SELECT create_hypertable('campaign_insights_by_country', 'time',
|
||||
if_not_exists => TRUE,
|
||||
chunk_time_interval => INTERVAL '1 day'
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_campaign_insights_by_country_campaign_time
|
||||
ON campaign_insights_by_country (campaign_id, time DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_campaign_insights_by_country_account_time
|
||||
ON campaign_insights_by_country (account_id, time DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_campaign_insights_by_country_country
|
||||
ON campaign_insights_by_country (country, time DESC);
|
||||
|
||||
|
||||
-- Compression policy for campaign_insights_by_country
|
||||
SELECT add_compression_policy('campaign_insights_by_country', INTERVAL '7 days', if_not_exists => TRUE);
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- CONTINUOUS AGGREGATES (Pre-computed rollups for dashboards)
|
||||
-- ============================================================================
|
||||
|
||||
@@ -17,6 +17,9 @@ from facebook_business.api import FacebookAdsApi
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
class MetaRateLimiter:
|
||||
"""
|
||||
Rate limiter with exponential backoff for Meta Marketing API.
|
||||
@@ -613,13 +616,21 @@ class MetaRateLimiter:
|
||||
|
||||
# X-Ad-Account-Usage (per account)
|
||||
if stats['ad_account_usage']:
|
||||
output.append("X-Ad-Account-Usage (Per Account):")
|
||||
for account_id, usage in stats['ad_account_usage'].items():
|
||||
output.append(f" Account: {account_id}")
|
||||
output.append(f" Usage: {usage.get('acc_id_util_pct', 0):.1f}%")
|
||||
output.append(f" Reset Time: {usage.get('reset_time_duration', 0)}s")
|
||||
output.append(f" API Access Tier: {usage.get('ads_api_access_tier') or 'N/A'}")
|
||||
output.append("")
|
||||
# Only show accounts with data (skip "unknown" accounts with 0 usage)
|
||||
accounts_to_show = {
|
||||
account_id: usage
|
||||
for account_id, usage in stats['ad_account_usage'].items()
|
||||
if account_id != 'unknown' or usage.get('acc_id_util_pct', 0) > 0
|
||||
}
|
||||
|
||||
if accounts_to_show:
|
||||
output.append("X-Ad-Account-Usage (Per Account):")
|
||||
for account_id, usage in accounts_to_show.items():
|
||||
output.append(f" Account: {account_id}")
|
||||
output.append(f" Usage: {usage.get('acc_id_util_pct', 0):.1f}%")
|
||||
output.append(f" Reset Time: {usage.get('reset_time_duration', 0)}s")
|
||||
output.append(f" API Access Tier: {usage.get('ads_api_access_tier') or 'N/A'}")
|
||||
output.append("")
|
||||
|
||||
# X-Business-Use-Case-Usage
|
||||
if stats['buc_usage']:
|
||||
|
||||
@@ -20,11 +20,29 @@ from facebook_business.exceptions import FacebookRequestError
|
||||
from .database import TimescaleDBClient
|
||||
from .rate_limiter import MetaRateLimiter
|
||||
from .token_manager import MetaTokenManager
|
||||
from .view_manager import ViewManager
|
||||
|
||||
# Set up logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
common_fields = [
|
||||
AdsInsights.Field.impressions,
|
||||
AdsInsights.Field.clicks,
|
||||
AdsInsights.Field.spend,
|
||||
AdsInsights.Field.cpc,
|
||||
AdsInsights.Field.cpm,
|
||||
AdsInsights.Field.ctr,
|
||||
AdsInsights.Field.cpp,
|
||||
AdsInsights.Field.reach,
|
||||
AdsInsights.Field.frequency,
|
||||
AdsInsights.Field.actions,
|
||||
AdsInsights.Field.cost_per_action_type,
|
||||
AdsInsights.Field.date_start,
|
||||
AdsInsights.Field.date_stop,
|
||||
]
|
||||
|
||||
|
||||
class ScheduledInsightsGrabber:
|
||||
"""
|
||||
Scheduled grabber for Meta ad insights with TimescaleDB storage.
|
||||
@@ -84,6 +102,9 @@ class ScheduledInsightsGrabber:
|
||||
# Database client
|
||||
self.db: Optional[TimescaleDBClient] = None
|
||||
|
||||
# View manager for materialized views
|
||||
self.view_manager: Optional[ViewManager] = None
|
||||
|
||||
# Rate limiter with backoff (Meta best practices)
|
||||
self.rate_limiter = MetaRateLimiter(
|
||||
base_delay=2.0, # 2 seconds base delay
|
||||
@@ -406,6 +427,131 @@ class ScheduledInsightsGrabber:
|
||||
|
||||
print(f" {count} ad sets cached for {account_id}")
|
||||
|
||||
async def _master_grab_insights(
|
||||
self,
|
||||
account_id: str,
|
||||
fields: list,
|
||||
level: str,
|
||||
db_insert_func,
|
||||
date_preset: Optional[str] = None,
|
||||
start_date: Optional[date] = None,
|
||||
end_date: Optional[date] = None,
|
||||
breakdowns: Optional[list] = None,
|
||||
limit: Optional[int] = None,
|
||||
required_fields: Optional[dict] = None,
|
||||
extra_data_processor=None,
|
||||
) -> tuple[int, Optional[date]]:
|
||||
"""
|
||||
Master method to grab and store insights at any level.
|
||||
|
||||
Args:
|
||||
account_id: Ad account ID
|
||||
fields: List of AdsInsights fields to retrieve
|
||||
level: Insights level ("account", "campaign", "adset", etc.)
|
||||
db_insert_func: Database insert function to call for each insight
|
||||
date_preset: Meta date preset (e.g., "today", "yesterday"). Use either this or start_date/end_date
|
||||
start_date: Start date for custom date range (optional)
|
||||
end_date: End date for custom date range (optional)
|
||||
breakdowns: List of breakdown fields (optional)
|
||||
limit: Maximum number of results (optional)
|
||||
required_fields: Dict of field_name -> label for validation before insert
|
||||
extra_data_processor: Optional callable to process/add extra data to insight_dict
|
||||
|
||||
Returns:
|
||||
Tuple of (count of records stored, date_start from insights)
|
||||
"""
|
||||
# Build params
|
||||
params = {"level": level}
|
||||
|
||||
if date_preset:
|
||||
params["date_preset"] = date_preset
|
||||
date_preset_for_db = date_preset
|
||||
else:
|
||||
# Use time_range for custom date ranges
|
||||
params["time_range"] = {
|
||||
"since": start_date.isoformat(),
|
||||
"until": end_date.isoformat(),
|
||||
}
|
||||
params["time_increment"] = 1 # Daily breakdown
|
||||
date_preset_for_db = "custom"
|
||||
|
||||
if breakdowns:
|
||||
params["breakdowns"] = breakdowns
|
||||
|
||||
if limit:
|
||||
params["limit"] = limit
|
||||
|
||||
# Fetch insights from Meta API
|
||||
ad_account = AdAccount(account_id)
|
||||
try:
|
||||
insights = await self._rate_limited_request(
|
||||
ad_account.get_insights,
|
||||
fields=fields,
|
||||
params=params,
|
||||
)
|
||||
except FacebookRequestError as e:
|
||||
error_code = e.api_error_code()
|
||||
if error_code in [190, 102]: # Invalid OAuth token errors
|
||||
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
|
||||
raise
|
||||
|
||||
# Get account timezone from database
|
||||
account_timezone = await self._get_account_timezone(account_id)
|
||||
|
||||
# Store insights
|
||||
count = 0
|
||||
date_start_value = None
|
||||
|
||||
for insight in insights:
|
||||
insight_dict = dict(insight)
|
||||
|
||||
# Extract date_start if available (for return value)
|
||||
date_start_str = insight_dict.get("date_start")
|
||||
if date_start_str and date_start_value is None:
|
||||
date_start_value = date.fromisoformat(date_start_str)
|
||||
|
||||
# Check required fields before processing
|
||||
if required_fields:
|
||||
skip = False
|
||||
for field_name, field_label in required_fields.items():
|
||||
if not insight_dict.get(field_name):
|
||||
skip = True
|
||||
break
|
||||
if skip:
|
||||
continue
|
||||
|
||||
# Call extra processor if provided
|
||||
if extra_data_processor:
|
||||
extra_data_processor(insight_dict)
|
||||
|
||||
# Compute appropriate timestamp based on date_start and account timezone
|
||||
timestamp = self._compute_timestamp(date_start_str, account_timezone)
|
||||
|
||||
# Build kwargs for insert function based on level
|
||||
kwargs = {
|
||||
"time": timestamp,
|
||||
"account_id": account_id,
|
||||
"data": insight_dict,
|
||||
"date_preset": date_preset_for_db,
|
||||
}
|
||||
|
||||
# Add level-specific parameters
|
||||
if level == "campaign":
|
||||
kwargs["campaign_id"] = insight_dict.get("campaign_id")
|
||||
elif level == "adset":
|
||||
kwargs["adset_id"] = insight_dict.get("adset_id")
|
||||
kwargs["campaign_id"] = insight_dict.get("campaign_id")
|
||||
|
||||
# Add country for breakdown queries
|
||||
if "country" in insight_dict:
|
||||
kwargs["country"] = insight_dict.get("country")
|
||||
|
||||
# Call the appropriate database insert function with level-specific parameters
|
||||
await db_insert_func(**kwargs)
|
||||
count += 1
|
||||
|
||||
return count, date_start_value
|
||||
|
||||
async def grab_account_insights(self, account_id: str, date_preset: str = "today") -> Optional[date]:
|
||||
"""
|
||||
Grab and store account-level insights.
|
||||
@@ -433,50 +579,13 @@ class ScheduledInsightsGrabber:
|
||||
AdsInsights.Field.date_stop,
|
||||
]
|
||||
|
||||
params = {
|
||||
"date_preset": date_preset,
|
||||
"level": "account",
|
||||
}
|
||||
|
||||
ad_account = AdAccount(account_id)
|
||||
try:
|
||||
insights = await self._rate_limited_request(
|
||||
ad_account.get_insights,
|
||||
fields=fields,
|
||||
params=params,
|
||||
)
|
||||
except FacebookRequestError as e:
|
||||
# Check if it's a token error
|
||||
error_code = e.api_error_code()
|
||||
if error_code in [190, 102]: # Invalid OAuth token errors
|
||||
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
|
||||
raise
|
||||
|
||||
# Get account timezone from database
|
||||
account_timezone = await self._get_account_timezone(account_id)
|
||||
|
||||
# Store insights
|
||||
count = 0
|
||||
date_start_value = None
|
||||
|
||||
for insight in insights:
|
||||
insight_dict = dict(insight)
|
||||
|
||||
# Extract date_start if available
|
||||
date_start_str = insight_dict.get("date_start")
|
||||
if date_start_str and date_start_value is None:
|
||||
date_start_value = date.fromisoformat(date_start_str)
|
||||
|
||||
# Compute appropriate timestamp based on date_start and account timezone
|
||||
timestamp = self._compute_timestamp(date_start_str, account_timezone)
|
||||
|
||||
await self.db.insert_account_insights(
|
||||
time=timestamp,
|
||||
account_id=account_id,
|
||||
data=insight_dict,
|
||||
date_preset=date_preset,
|
||||
)
|
||||
count += 1
|
||||
count, date_start_value = await self._master_grab_insights(
|
||||
account_id=account_id,
|
||||
fields=fields,
|
||||
level="account",
|
||||
db_insert_func=self.db.insert_account_insights,
|
||||
date_preset=date_preset,
|
||||
)
|
||||
|
||||
print(f" Account insights stored for {account_id} ({count} records, date: {date_start_value})")
|
||||
return date_start_value
|
||||
@@ -490,64 +599,22 @@ class ScheduledInsightsGrabber:
|
||||
date_preset: Meta date preset
|
||||
limit: Maximum number of campaigns
|
||||
"""
|
||||
fields = [
|
||||
|
||||
fields = common_fields + [
|
||||
AdsInsights.Field.campaign_id,
|
||||
AdsInsights.Field.campaign_name,
|
||||
AdsInsights.Field.impressions,
|
||||
AdsInsights.Field.clicks,
|
||||
AdsInsights.Field.spend,
|
||||
AdsInsights.Field.ctr,
|
||||
AdsInsights.Field.cpc,
|
||||
AdsInsights.Field.cpm,
|
||||
AdsInsights.Field.reach,
|
||||
AdsInsights.Field.actions,
|
||||
AdsInsights.Field.date_start,
|
||||
AdsInsights.Field.date_stop,
|
||||
]
|
||||
|
||||
params = {
|
||||
"date_preset": date_preset,
|
||||
"level": "campaign",
|
||||
"limit": limit,
|
||||
}
|
||||
|
||||
ad_account = AdAccount(account_id)
|
||||
try:
|
||||
insights = await self._rate_limited_request(
|
||||
ad_account.get_insights,
|
||||
fields=fields,
|
||||
params=params,
|
||||
)
|
||||
except FacebookRequestError as e:
|
||||
error_code = e.api_error_code()
|
||||
if error_code in [190, 102]:
|
||||
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
|
||||
raise
|
||||
|
||||
# Get account timezone from database
|
||||
account_timezone = await self._get_account_timezone(account_id)
|
||||
|
||||
# Store insights (metadata is automatically cached from insights data)
|
||||
count = 0
|
||||
for insight in insights:
|
||||
campaign_id = insight.get('campaign_id')
|
||||
if campaign_id:
|
||||
insight_dict = dict(insight)
|
||||
|
||||
# Compute appropriate timestamp based on date_preset and account timezone
|
||||
date_start_str = insight_dict.get("date_start")
|
||||
timestamp = self._compute_timestamp(date_start_str, account_timezone)
|
||||
|
||||
# Insert insights - metadata is automatically cached from the insights data
|
||||
await self.db.insert_campaign_insights(
|
||||
time=timestamp,
|
||||
campaign_id=campaign_id,
|
||||
account_id=account_id,
|
||||
data=insight_dict,
|
||||
date_preset=date_preset,
|
||||
cache_metadata=True, # Automatically cache campaign name from insights
|
||||
)
|
||||
count += 1
|
||||
count, _ = await self._master_grab_insights(
|
||||
account_id=account_id,
|
||||
fields=fields,
|
||||
level="campaign",
|
||||
db_insert_func=self.db.insert_campaign_insights,
|
||||
date_preset=date_preset,
|
||||
limit=limit,
|
||||
required_fields={"campaign_id": "campaign_id"},
|
||||
)
|
||||
|
||||
print(f" Campaign insights stored for {account_id} ({count} records)")
|
||||
|
||||
@@ -576,54 +643,45 @@ class ScheduledInsightsGrabber:
|
||||
AdsInsights.Field.date_stop,
|
||||
]
|
||||
|
||||
params = {
|
||||
"date_preset": date_preset,
|
||||
"level": "adset",
|
||||
"limit": limit,
|
||||
}
|
||||
|
||||
ad_account = AdAccount(account_id)
|
||||
try:
|
||||
insights = await self._rate_limited_request(
|
||||
ad_account.get_insights,
|
||||
fields=fields,
|
||||
params=params,
|
||||
)
|
||||
except FacebookRequestError as e:
|
||||
error_code = e.api_error_code()
|
||||
if error_code in [190, 102]:
|
||||
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
|
||||
raise
|
||||
|
||||
# Get account timezone from database
|
||||
account_timezone = await self._get_account_timezone(account_id)
|
||||
|
||||
# Store insights (metadata is automatically cached from insights data)
|
||||
count = 0
|
||||
for insight in insights:
|
||||
adset_id = insight.get('adset_id')
|
||||
campaign_id = insight.get('campaign_id')
|
||||
if adset_id and campaign_id:
|
||||
insight_dict = dict(insight)
|
||||
|
||||
# Compute appropriate timestamp based on date_preset and account timezone
|
||||
date_start_str = insight_dict.get("date_start")
|
||||
timestamp = self._compute_timestamp(date_start_str, account_timezone)
|
||||
|
||||
# Insert insights - metadata is automatically cached from the insights data
|
||||
await self.db.insert_adset_insights(
|
||||
time=timestamp,
|
||||
adset_id=adset_id,
|
||||
campaign_id=campaign_id,
|
||||
account_id=account_id,
|
||||
data=insight_dict,
|
||||
date_preset=date_preset,
|
||||
cache_metadata=True, # Automatically cache adset/campaign from insights
|
||||
)
|
||||
count += 1
|
||||
count, _ = await self._master_grab_insights(
|
||||
account_id=account_id,
|
||||
fields=fields,
|
||||
level="adset",
|
||||
db_insert_func=self.db.insert_adset_insights,
|
||||
date_preset=date_preset,
|
||||
limit=limit,
|
||||
required_fields={"adset_id": "adset_id", "campaign_id": "campaign_id"},
|
||||
)
|
||||
|
||||
print(f" Ad set insights stored for {account_id} ({count} records)")
|
||||
|
||||
async def grab_campaign_insights_by_country(self, account_id: str, date_preset: str = "today", limit: int = 50):
|
||||
"""
|
||||
Grab and store campaign-level insights broken down by country.
|
||||
|
||||
Args:
|
||||
account_id: Ad account ID
|
||||
date_preset: Meta date preset
|
||||
limit: Maximum number of campaigns
|
||||
"""
|
||||
fields = common_fields + [
|
||||
AdsInsights.Field.campaign_id,
|
||||
AdsInsights.Field.campaign_name,
|
||||
]
|
||||
|
||||
count, _ = await self._master_grab_insights(
|
||||
account_id=account_id,
|
||||
fields=fields,
|
||||
level="campaign",
|
||||
db_insert_func=self.db.insert_campaign_insights_by_country,
|
||||
date_preset=date_preset,
|
||||
breakdowns=[AdsInsights.Breakdowns.country],
|
||||
limit=limit,
|
||||
required_fields={"campaign_id": "campaign_id", "country": "country"},
|
||||
)
|
||||
|
||||
print(f" Campaign insights by country stored for {account_id} ({count} records)")
|
||||
|
||||
async def grab_account_insights_for_date_range(
|
||||
self,
|
||||
account_id: str,
|
||||
@@ -657,48 +715,14 @@ class ScheduledInsightsGrabber:
|
||||
AdsInsights.Field.date_stop,
|
||||
]
|
||||
|
||||
# Use time_range instead of date_preset for custom date ranges
|
||||
params = {
|
||||
"time_range": {
|
||||
"since": start_date.isoformat(),
|
||||
"until": end_date.isoformat(),
|
||||
},
|
||||
"level": "account",
|
||||
"time_increment": 1, # Daily breakdown
|
||||
}
|
||||
|
||||
ad_account = AdAccount(account_id)
|
||||
try:
|
||||
insights = await self._rate_limited_request(
|
||||
ad_account.get_insights,
|
||||
fields=fields,
|
||||
params=params,
|
||||
)
|
||||
except FacebookRequestError as e:
|
||||
error_code = e.api_error_code()
|
||||
if error_code in [190, 102]:
|
||||
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
|
||||
raise
|
||||
|
||||
# Get account timezone from database
|
||||
account_timezone = await self._get_account_timezone(account_id)
|
||||
|
||||
# Store insights
|
||||
count = 0
|
||||
for insight in insights:
|
||||
insight_dict = dict(insight)
|
||||
|
||||
# Compute appropriate timestamp based on date_start and account timezone
|
||||
date_start_str = insight_dict.get("date_start")
|
||||
timestamp = self._compute_timestamp(date_start_str, account_timezone)
|
||||
|
||||
await self.db.insert_account_insights(
|
||||
time=timestamp,
|
||||
account_id=account_id,
|
||||
data=insight_dict,
|
||||
date_preset="custom", # Indicate this was a custom date range
|
||||
)
|
||||
count += 1
|
||||
count, _ = await self._master_grab_insights(
|
||||
account_id=account_id,
|
||||
fields=fields,
|
||||
level="account",
|
||||
db_insert_func=self.db.insert_account_insights,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
return count
|
||||
|
||||
@@ -736,51 +760,16 @@ class ScheduledInsightsGrabber:
|
||||
AdsInsights.Field.date_stop,
|
||||
]
|
||||
|
||||
params = {
|
||||
"time_range": {
|
||||
"since": start_date.isoformat(),
|
||||
"until": end_date.isoformat(),
|
||||
},
|
||||
"level": "campaign",
|
||||
"time_increment": 1, # Daily breakdown
|
||||
"limit": limit,
|
||||
}
|
||||
|
||||
ad_account = AdAccount(account_id)
|
||||
try:
|
||||
insights = await self._rate_limited_request(
|
||||
ad_account.get_insights,
|
||||
fields=fields,
|
||||
params=params,
|
||||
)
|
||||
except FacebookRequestError as e:
|
||||
error_code = e.api_error_code()
|
||||
if error_code in [190, 102]:
|
||||
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
|
||||
raise
|
||||
|
||||
# Get account timezone from database
|
||||
account_timezone = await self._get_account_timezone(account_id)
|
||||
|
||||
# Store insights
|
||||
count = 0
|
||||
for insight in insights:
|
||||
campaign_id = insight.get('campaign_id')
|
||||
if campaign_id:
|
||||
insight_dict = dict(insight)
|
||||
|
||||
date_start_str = insight_dict.get("date_start")
|
||||
timestamp = self._compute_timestamp(date_start_str, account_timezone)
|
||||
|
||||
await self.db.insert_campaign_insights(
|
||||
time=timestamp,
|
||||
campaign_id=campaign_id,
|
||||
account_id=account_id,
|
||||
data=insight_dict,
|
||||
date_preset="custom",
|
||||
cache_metadata=True,
|
||||
)
|
||||
count += 1
|
||||
count, _ = await self._master_grab_insights(
|
||||
account_id=account_id,
|
||||
fields=fields,
|
||||
level="campaign",
|
||||
db_insert_func=self.db.insert_campaign_insights,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
limit=limit,
|
||||
required_fields={"campaign_id": "campaign_id"},
|
||||
)
|
||||
|
||||
return count
|
||||
|
||||
@@ -819,53 +808,16 @@ class ScheduledInsightsGrabber:
|
||||
AdsInsights.Field.date_stop,
|
||||
]
|
||||
|
||||
params = {
|
||||
"time_range": {
|
||||
"since": start_date.isoformat(),
|
||||
"until": end_date.isoformat(),
|
||||
},
|
||||
"level": "adset",
|
||||
"time_increment": 1, # Daily breakdown
|
||||
"limit": limit,
|
||||
}
|
||||
|
||||
ad_account = AdAccount(account_id)
|
||||
try:
|
||||
insights = await self._rate_limited_request(
|
||||
ad_account.get_insights,
|
||||
fields=fields,
|
||||
params=params,
|
||||
)
|
||||
except FacebookRequestError as e:
|
||||
error_code = e.api_error_code()
|
||||
if error_code in [190, 102]:
|
||||
raise ValueError(f"Access token is invalid (error {error_code}): {e.api_error_message()}")
|
||||
raise
|
||||
|
||||
# Get account timezone from database
|
||||
account_timezone = await self._get_account_timezone(account_id)
|
||||
|
||||
# Store insights
|
||||
count = 0
|
||||
for insight in insights:
|
||||
adset_id = insight.get('adset_id')
|
||||
campaign_id = insight.get('campaign_id')
|
||||
if adset_id and campaign_id:
|
||||
insight_dict = dict(insight)
|
||||
|
||||
date_start_str = insight_dict.get("date_start")
|
||||
timestamp = self._compute_timestamp(date_start_str, account_timezone)
|
||||
|
||||
await self.db.insert_adset_insights(
|
||||
time=timestamp,
|
||||
adset_id=adset_id,
|
||||
campaign_id=campaign_id,
|
||||
account_id=account_id,
|
||||
data=insight_dict,
|
||||
date_preset="custom",
|
||||
cache_metadata=True,
|
||||
)
|
||||
count += 1
|
||||
count, _ = await self._master_grab_insights(
|
||||
account_id=account_id,
|
||||
fields=fields,
|
||||
level="adset",
|
||||
db_insert_func=self.db.insert_adset_insights,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
limit=limit,
|
||||
required_fields={"adset_id": "adset_id", "campaign_id": "campaign_id"},
|
||||
)
|
||||
|
||||
return count
|
||||
|
||||
@@ -1032,6 +984,7 @@ class ScheduledInsightsGrabber:
|
||||
print("Grabbing today's insights...")
|
||||
date_start = await self.grab_account_insights(account_id, date_preset="today")
|
||||
await self.grab_campaign_insights(account_id, date_preset="today", limit=50)
|
||||
await self.grab_campaign_insights_by_country(account_id, date_preset="today", limit=50)
|
||||
await self.grab_adset_insights(account_id, date_preset="today", limit=50)
|
||||
|
||||
# Track today's date from first account
|
||||
@@ -1084,6 +1037,7 @@ class ScheduledInsightsGrabber:
|
||||
print("Grabbing yesterday's insights...")
|
||||
await self.grab_account_insights(account_id, date_preset="yesterday")
|
||||
await self.grab_campaign_insights(account_id, date_preset="yesterday", limit=50)
|
||||
await self.grab_campaign_insights_by_country(account_id, date_preset="yesterday", limit=50)
|
||||
await self.grab_adset_insights(account_id, date_preset="yesterday", limit=50)
|
||||
|
||||
print(f"✓ Completed yesterday's data for {account_id}")
|
||||
@@ -1135,6 +1089,17 @@ class ScheduledInsightsGrabber:
|
||||
print("\n" + "-" * 60)
|
||||
self.rate_limiter.print_stats()
|
||||
|
||||
# Refresh materialized views after new data has been inserted
|
||||
if self.view_manager:
|
||||
print("\n" + "-" * 60)
|
||||
print("Refreshing materialized views...")
|
||||
try:
|
||||
await self.view_manager.refresh_all_views()
|
||||
print("✓ Materialized views refreshed successfully")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Warning: Failed to refresh materialized views: {e}")
|
||||
# Don't fail the entire cycle, just log the warning
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("COLLECTION CYCLE COMPLETE")
|
||||
print("="*60 + "\n")
|
||||
@@ -1164,6 +1129,10 @@ class ScheduledInsightsGrabber:
|
||||
# Initialize database schema (idempotent - safe to run multiple times)
|
||||
await self.db.initialize_schema()
|
||||
|
||||
# Initialize view manager and create/ensure views exist
|
||||
self.view_manager = ViewManager(self.db.pool)
|
||||
await self.view_manager.initialize_views()
|
||||
|
||||
# Load all accessible ad accounts
|
||||
await self.load_ad_accounts()
|
||||
|
||||
|
||||
118
src/meta_api_grabber/view_manager.py
Normal file
118
src/meta_api_grabber/view_manager.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""
|
||||
View manager for TimescaleDB materialized views.
|
||||
Handles creation, updates, and refresh of materialized views for flattened insights data.
|
||||
Views are loaded from individual SQL files in the views directory.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import pathlib
|
||||
from typing import List, Optional
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ViewManager:
|
||||
"""Manages materialized views for insights data flattening."""
|
||||
|
||||
def __init__(self, pool: asyncpg.Pool):
|
||||
"""
|
||||
Initialize view manager with a database connection pool.
|
||||
|
||||
Args:
|
||||
pool: asyncpg connection pool
|
||||
"""
|
||||
self.pool = pool
|
||||
self.views_dir = pathlib.Path(__file__).parent / "views"
|
||||
|
||||
async def initialize_views(self) -> None:
|
||||
"""
|
||||
Initialize all materialized views at startup.
|
||||
Loads and executes SQL files from the views directory in alphabetical order.
|
||||
Creates views if they don't exist, idempotent operation.
|
||||
"""
|
||||
logger.info("Initializing materialized views...")
|
||||
|
||||
if not self.views_dir.exists():
|
||||
logger.warning(f"Views directory not found at {self.views_dir}")
|
||||
return
|
||||
|
||||
# Get all .sql files in alphabetical order for consistent execution
|
||||
view_files = sorted(self.views_dir.glob("*.sql"))
|
||||
if not view_files:
|
||||
logger.warning(f"No SQL files found in {self.views_dir}")
|
||||
return
|
||||
|
||||
async with self.pool.acquire() as conn:
|
||||
for view_file in view_files:
|
||||
logger.debug(f"Loading view file: {view_file.name}")
|
||||
await self._execute_view_file(conn, view_file)
|
||||
|
||||
logger.info("✓ Materialized views initialized successfully")
|
||||
|
||||
async def _execute_view_file(self, conn: asyncpg.Connection, view_file: pathlib.Path) -> None:
|
||||
"""
|
||||
Execute SQL statements from a view file.
|
||||
|
||||
Args:
|
||||
conn: asyncpg connection
|
||||
view_file: Path to SQL file
|
||||
"""
|
||||
with open(view_file, 'r') as f:
|
||||
view_sql = f.read()
|
||||
|
||||
statements = [s.strip() for s in view_sql.split(';') if s.strip()]
|
||||
|
||||
for i, stmt in enumerate(statements, 1):
|
||||
if not stmt:
|
||||
continue
|
||||
|
||||
try:
|
||||
await conn.execute(stmt)
|
||||
logger.debug(f"{view_file.name}: Executed statement {i}")
|
||||
except Exception as e:
|
||||
error_msg = str(e).lower()
|
||||
if "does not exist" in error_msg:
|
||||
# Could be a missing dependent view or table, log it
|
||||
logger.debug(f"{view_file.name}: View or table does not exist (statement {i})")
|
||||
else:
|
||||
# Log other errors but don't fail - could be incompatible schema changes
|
||||
logger.warning(f"{view_file.name}: Error in statement {i}: {e}")
|
||||
|
||||
async def refresh_views(self, view_names: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Refresh specified materialized views.
|
||||
|
||||
Args:
|
||||
view_names: List of view names to refresh. If None, refreshes all views.
|
||||
"""
|
||||
if view_names is None:
|
||||
view_names = [
|
||||
"adset_insights_flattened",
|
||||
"account_insights_flattened",
|
||||
"campaign_insights_flattened",
|
||||
"campaign_insights_by_country_flattened",
|
||||
#"campaign_insights_by_device_flattened",
|
||||
#"campaign_insights_by_gender_flattened",
|
||||
]
|
||||
|
||||
async with self.pool.acquire() as conn:
|
||||
for view_name in view_names:
|
||||
try:
|
||||
# Use CONCURRENTLY to avoid locking
|
||||
await conn.execute(
|
||||
f"REFRESH MATERIALIZED VIEW CONCURRENTLY {view_name};"
|
||||
)
|
||||
logger.debug(f"Refreshed materialized view: {view_name}")
|
||||
except Exception as e:
|
||||
error_msg = str(e).lower()
|
||||
# View might not exist if not initialized, that's okay
|
||||
if "does not exist" in error_msg:
|
||||
logger.debug(f"View does not exist, skipping refresh: {view_name}")
|
||||
else:
|
||||
logger.warning(f"Error refreshing view {view_name}: {e}")
|
||||
|
||||
async def refresh_all_views(self) -> None:
|
||||
"""Refresh all materialized views."""
|
||||
await self.refresh_views()
|
||||
25
src/meta_api_grabber/views/SETUP_PERMISSIONS.md
Normal file
25
src/meta_api_grabber/views/SETUP_PERMISSIONS.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# View Permissions Setup
|
||||
|
||||
The scheduled grabber needs the `meta_user` to have permissions to create/drop/modify views.
|
||||
|
||||
## One-time Setup (run as superuser or database owner)
|
||||
|
||||
```sql
|
||||
-- Give meta_user the ability to create views in the public schema
|
||||
GRANT CREATE ON SCHEMA public TO meta_user;
|
||||
|
||||
-- Alternative: Make meta_user the owner of all views (if they already exist)
|
||||
-- ALTER MATERIALIZED VIEW account_insights_flattened OWNER TO meta_user;
|
||||
-- ALTER MATERIALIZED VIEW campaign_insights_flattened OWNER TO meta_user;
|
||||
-- ALTER MATERIALIZED VIEW adset_insights_flattened OWNER TO meta_user;
|
||||
```
|
||||
|
||||
Run these commands once as a superuser/database owner, then the scheduled grabber can manage views normally.
|
||||
|
||||
## Why This Is Needed
|
||||
|
||||
PostgreSQL materialized views must be owned by the user who created them. Since the scheduled grabber recreates views on startup (to apply schema changes), it needs permission to:
|
||||
- `DROP MATERIALIZED VIEW` - remove old views
|
||||
- `CREATE MATERIALIZED VIEW` - create new views
|
||||
|
||||
Without proper schema permissions, the `meta_user` cannot perform these operations.
|
||||
44
src/meta_api_grabber/views/account_insights.sql
Normal file
44
src/meta_api_grabber/views/account_insights.sql
Normal file
@@ -0,0 +1,44 @@
|
||||
DROP MATERIALIZED VIEW IF EXISTS account_insights_flattened CASCADE;
|
||||
|
||||
CREATE MATERIALIZED VIEW account_insights_flattened AS
|
||||
SELECT
|
||||
time,
|
||||
account_id,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
frequency,
|
||||
ctr,
|
||||
cpc,
|
||||
cpm,
|
||||
cpp,
|
||||
date_preset,
|
||||
date_start,
|
||||
date_stop,
|
||||
fetched_at,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'link_click') AS link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'lead') AS lead,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(cost_per_action_type)
|
||||
WHERE value->>'action_type' = 'link_click') AS cost_per_link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(cost_per_action_type)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS cost_per_landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(cost_per_action_type)
|
||||
WHERE value->>'action_type' = 'lead') AS cost_per_lead
|
||||
FROM account_insights;
|
||||
|
||||
CREATE INDEX idx_account_insights_flat_date ON account_insights_flattened(date_start, date_stop);
|
||||
|
||||
CREATE UNIQUE INDEX idx_account_insights_flat_unique ON account_insights_flattened(time, account_id);
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY account_insights_flattened;
|
||||
20
src/meta_api_grabber/views/account_insights_by_device.sql
Normal file
20
src/meta_api_grabber/views/account_insights_by_device.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
--- account insights by gender
|
||||
|
||||
|
||||
DROP VIEW IF EXISTS account_insights_by_device CASCADE;
|
||||
|
||||
CREATE VIEW account_insights_by_device AS
|
||||
SELECT
|
||||
time,
|
||||
account_id,
|
||||
device_platform,
|
||||
SUM(impressions) AS impressions,
|
||||
SUM(clicks) AS clicks,
|
||||
SUM(spend) AS spend,
|
||||
SUM(link_click) AS link_click,
|
||||
SUM(landing_page_view) AS landing_page_view,
|
||||
SUM(lead) AS lead
|
||||
FROM campaign_insights_by_device_flattened
|
||||
GROUP BY time, account_id, device_platform;
|
||||
|
||||
|
||||
53
src/meta_api_grabber/views/account_insights_by_gender.sql
Normal file
53
src/meta_api_grabber/views/account_insights_by_gender.sql
Normal file
@@ -0,0 +1,53 @@
|
||||
|
||||
|
||||
|
||||
DROP VIEW IF EXISTS account_insights_by_gender CASCADE;
|
||||
|
||||
CREATE VIEW account_insights_by_gender AS
|
||||
SELECT
|
||||
time,
|
||||
account_id,
|
||||
gender,
|
||||
SUM(impressions) AS impressions,
|
||||
SUM(clicks) AS clicks,
|
||||
SUM(spend) AS spend,
|
||||
SUM(link_click) AS link_click,
|
||||
SUM(landing_page_view) AS landing_page_view,
|
||||
SUM(lead) AS lead
|
||||
FROM campaign_insights_by_gender
|
||||
GROUP BY time, account_id, gender;
|
||||
|
||||
|
||||
DROP VIEW IF EXISTS account_insights_by_age CASCADE;
|
||||
|
||||
CREATE VIEW account_insights_by_age AS
|
||||
SELECT
|
||||
time,
|
||||
account_id,
|
||||
age,
|
||||
SUM(impressions) AS impressions,
|
||||
SUM(clicks) AS clicks,
|
||||
SUM(spend) AS spend,
|
||||
SUM(link_click) AS link_click,
|
||||
SUM(landing_page_view) AS landing_page_view,
|
||||
SUM(lead) AS lead
|
||||
FROM campaign_insights_by_age
|
||||
GROUP BY time, account_id, age;
|
||||
|
||||
DROP VIEW IF EXISTS account_insights_by_gender_and_age CASCADE;
|
||||
|
||||
CREATE VIEW account_insights_by_gender_and_age AS
|
||||
SELECT
|
||||
time,
|
||||
account_id,
|
||||
gender,
|
||||
age,
|
||||
SUM(impressions) AS impressions,
|
||||
SUM(clicks) AS clicks,
|
||||
SUM(spend) AS spend,
|
||||
SUM(link_click) AS link_click,
|
||||
SUM(landing_page_view) AS landing_page_view,
|
||||
SUM(lead) AS lead
|
||||
FROM campaign_insights_by_gender_and_age
|
||||
GROUP BY time, account_id, age, gender;
|
||||
|
||||
55
src/meta_api_grabber/views/account_insights_google.sql
Normal file
55
src/meta_api_grabber/views/account_insights_google.sql
Normal file
@@ -0,0 +1,55 @@
|
||||
DROP VIEW IF EXISTS g_account_insights CASCADE;
|
||||
CREATE VIEW g_account_insights AS
|
||||
SELECT
|
||||
time,
|
||||
account_id,
|
||||
clicks,
|
||||
impressions,
|
||||
interactions,
|
||||
cost_micros,
|
||||
cost_micros / 1000000.0 as cost,
|
||||
leads,
|
||||
engagements,
|
||||
customer_currency_code,
|
||||
account_name,
|
||||
|
||||
-- CTR (Click-Through Rate)
|
||||
(clicks::numeric / impressions_nz) * 100 as ctr,
|
||||
|
||||
-- CPM (Cost Per Mille) in micros and standard units
|
||||
(cost_micros::numeric / impressions_nz) * 1000 as cpm_micros,
|
||||
(cost_micros::numeric / impressions_nz) * 1000 / 1000000.0 as cpm,
|
||||
|
||||
-- CPC (Cost Per Click) in micros and standard units
|
||||
cost_micros::numeric / clicks_nz as cpc_micros,
|
||||
cost_micros::numeric / clicks_nz / 1000000.0 as cpc,
|
||||
|
||||
-- CPL (Cost Per Lead) in micros and standard units
|
||||
cost_micros::numeric / leads_nz as cpl_micros,
|
||||
cost_micros::numeric / leads_nz / 1000000.0 as cpl,
|
||||
|
||||
-- Conversion Rate
|
||||
(leads::numeric / clicks_nz) * 100 as conversion_rate,
|
||||
|
||||
-- Engagement Rate
|
||||
(engagements::numeric / impressions_nz) * 100 as engagement_rate
|
||||
|
||||
FROM (
|
||||
SELECT
|
||||
segments_date as time,
|
||||
customer_id as account_id,
|
||||
sum(metrics_clicks) as clicks,
|
||||
sum(metrics_impressions) as impressions,
|
||||
sum(metrics_interactions) as interactions,
|
||||
sum(metrics_cost_micros) as cost_micros,
|
||||
sum(metrics_conversions) as leads,
|
||||
sum(metrics_engagements) as engagements,
|
||||
customer_currency_code,
|
||||
customer_descriptive_name as account_name,
|
||||
-- Null-safe denominators
|
||||
NULLIF(sum(metrics_clicks), 0) as clicks_nz,
|
||||
NULLIF(sum(metrics_impressions), 0) as impressions_nz,
|
||||
NULLIF(sum(metrics_conversions), 0) as leads_nz
|
||||
FROM google.account_performance_report
|
||||
GROUP BY account_id, time, customer_currency_code, account_name
|
||||
) base;
|
||||
38
src/meta_api_grabber/views/adset_insights.sql
Normal file
38
src/meta_api_grabber/views/adset_insights.sql
Normal file
@@ -0,0 +1,38 @@
|
||||
DROP MATERIALIZED VIEW IF EXISTS adset_insights_flattened CASCADE;
|
||||
|
||||
CREATE MATERIALIZED VIEW adset_insights_flattened AS
|
||||
SELECT
|
||||
time,
|
||||
adset_id,
|
||||
campaign_id,
|
||||
account_id,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
ctr,
|
||||
cpc,
|
||||
cpm,
|
||||
date_preset,
|
||||
date_start,
|
||||
date_stop,
|
||||
fetched_at,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'link_click') AS link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'lead') AS lead
|
||||
FROM adset_insights;
|
||||
|
||||
-- Add indexes for common query patterns
|
||||
|
||||
CREATE INDEX idx_adset_insights_flat_campaign ON adset_insights_flattened(campaign_id);
|
||||
CREATE INDEX idx_adset_insights_flat_date ON adset_insights_flattened(date_start, date_stop);
|
||||
|
||||
|
||||
CREATE UNIQUE INDEX idx_adset_insights_flat_unique ON adset_insights_flattened(time, adset_id);
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY adset_insights_flattened;
|
||||
33
src/meta_api_grabber/views/campaign_insights.sql
Normal file
33
src/meta_api_grabber/views/campaign_insights.sql
Normal file
@@ -0,0 +1,33 @@
|
||||
--- campaign insights
|
||||
|
||||
DROP MATERIALIZED VIEW IF EXISTS campaign_insights_flattened CASCADE;
|
||||
|
||||
CREATE MATERIALIZED VIEW campaign_insights_flattened AS
|
||||
SELECT date_start AS "time",
|
||||
concat('act_', account_id) AS account_id,
|
||||
campaign_id,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
ctr,
|
||||
cpc,
|
||||
cpm,
|
||||
date_start,
|
||||
date_stop,
|
||||
( SELECT (jsonb_array_elements.value ->> 'value'::text)::numeric AS "numeric"
|
||||
FROM jsonb_array_elements(customcampaign_insights.actions) jsonb_array_elements(value)
|
||||
WHERE (jsonb_array_elements.value ->> 'action_type'::text) = 'link_click'::text) AS link_click,
|
||||
( SELECT (jsonb_array_elements.value ->> 'value'::text)::numeric AS "numeric"
|
||||
FROM jsonb_array_elements(customcampaign_insights.actions) jsonb_array_elements(value)
|
||||
WHERE (jsonb_array_elements.value ->> 'action_type'::text) = 'landing_page_view'::text) AS landing_page_view,
|
||||
( SELECT (jsonb_array_elements.value ->> 'value'::text)::numeric AS "numeric"
|
||||
FROM jsonb_array_elements(customcampaign_insights.actions) jsonb_array_elements(value)
|
||||
WHERE (jsonb_array_elements.value ->> 'action_type'::text) = 'lead'::text) AS lead
|
||||
FROM meta.customcampaign_insights;
|
||||
|
||||
CREATE INDEX idx_campaign_insights_flat_date ON campaign_insights_flattened(date_start, date_stop);
|
||||
|
||||
CREATE UNIQUE INDEX idx_campaign_insights_flat_unique ON campaign_insights_flattened(time, campaign_id);
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_flattened;
|
||||
36
src/meta_api_grabber/views/campaign_insights_by_country.sql
Normal file
36
src/meta_api_grabber/views/campaign_insights_by_country.sql
Normal file
@@ -0,0 +1,36 @@
|
||||
--- campaign insights by country
|
||||
|
||||
DROP MATERIALIZED VIEW IF EXISTS campaign_insights_by_country_flattened CASCADE;
|
||||
|
||||
CREATE MATERIALIZED VIEW campaign_insights_by_country_flattened AS
|
||||
SELECT date_start AS "time",
|
||||
concat('act_', account_id) AS account_id,
|
||||
campaign_id,
|
||||
country,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
frequency,
|
||||
ctr,
|
||||
cpc,
|
||||
cpm,
|
||||
date_start,
|
||||
date_stop,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'link_click') AS link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'lead') AS lead
|
||||
|
||||
FROM meta.custom_campaign_country;
|
||||
|
||||
CREATE INDEX idx_campaign_insights_by_country_flat_date ON campaign_insights_by_country_flattened(date_start, date_stop);
|
||||
|
||||
CREATE UNIQUE INDEX idx_campaign_insights_by_country_flat_unique ON campaign_insights_by_country_flattened(time, campaign_id, country);
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_by_country_flattened;
|
||||
32
src/meta_api_grabber/views/campaign_insights_by_device.sql
Normal file
32
src/meta_api_grabber/views/campaign_insights_by_device.sql
Normal file
@@ -0,0 +1,32 @@
|
||||
--- campaign insights by device
|
||||
|
||||
DROP MATERIALIZED VIEW IF EXISTS campaign_insights_by_device_flattened CASCADE;
|
||||
|
||||
CREATE MATERIALIZED VIEW campaign_insights_by_device_flattened AS
|
||||
SELECT date_start AS "time",
|
||||
concat('act_', account_id) AS account_id,
|
||||
campaign_id,
|
||||
device_platform,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
frequency,
|
||||
date_start,
|
||||
date_stop,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'link_click') AS link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'lead') AS lead
|
||||
|
||||
FROM meta.custom_campaign_device;
|
||||
|
||||
CREATE INDEX idx_campaign_insights_by_device_flat_date ON campaign_insights_by_device_flattened(date_start, date_stop);
|
||||
|
||||
CREATE UNIQUE INDEX idx_campaign_insights_by_device_flat_unique ON campaign_insights_by_device_flattened(time, campaign_id, device_platform);
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_by_device_flattened;
|
||||
71
src/meta_api_grabber/views/campaign_insights_by_gender.sql
Normal file
71
src/meta_api_grabber/views/campaign_insights_by_gender.sql
Normal file
@@ -0,0 +1,71 @@
|
||||
--- campaign insights by country
|
||||
|
||||
DROP MATERIALIZED VIEW IF EXISTS campaign_insights_by_gender_and_age CASCADE;
|
||||
|
||||
CREATE MATERIALIZED VIEW campaign_insights_by_gender_and_age AS
|
||||
SELECT date_start AS "time",
|
||||
concat('act_', account_id) AS account_id,
|
||||
campaign_id,
|
||||
gender,
|
||||
age,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
frequency,
|
||||
ctr,
|
||||
cpc,
|
||||
cpm,
|
||||
date_start,
|
||||
date_stop,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'link_click') AS link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'lead') AS lead
|
||||
|
||||
FROM meta.custom_campaign_gender;
|
||||
|
||||
CREATE INDEX idx_campaign_insights_by_gender_and_age_date ON campaign_insights_by_gender_and_age(date_start, date_stop);
|
||||
|
||||
CREATE UNIQUE INDEX idx_campaign_insights_by_gender_and_age_unique ON campaign_insights_by_gender_and_age(time, campaign_id, gender, age);
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_by_gender_and_age;
|
||||
|
||||
|
||||
DROP VIEW IF EXISTS campaign_insights_by_gender CASCADE;
|
||||
|
||||
create view campaign_insights_by_gender as
|
||||
Select time,
|
||||
sum(clicks) as clicks,
|
||||
sum(link_click) as link_click,
|
||||
sum(lead) as lead,
|
||||
sum(landing_page_view) as landing_page_view,
|
||||
sum(spend) as spend,
|
||||
sum(reach) as reach,
|
||||
sum(impressions) as impressions,
|
||||
gender,
|
||||
campaign_id,
|
||||
account_id
|
||||
from campaign_insights_by_gender_and_age
|
||||
group by time, gender, account_id, campaign_id, date_start, date_stop;
|
||||
|
||||
DROP VIEW IF EXISTS campaign_insights_by_age CASCADE;
|
||||
|
||||
create view campaign_insights_by_age as
|
||||
Select time,
|
||||
sum(clicks) as clicks,
|
||||
sum(link_click) as link_click,
|
||||
sum(lead) as lead,
|
||||
sum(landing_page_view) as landing_page_view,
|
||||
sum(spend) as spend,
|
||||
sum(reach) as reach,
|
||||
sum(impressions) as impressions,
|
||||
age,
|
||||
campaign_id,
|
||||
account_id
|
||||
from campaign_insights_by_gender_and_age
|
||||
group by time, age, account_id, campaign_id, date_start, date_stop;
|
||||
11
src/meta_api_grabber/views/grafana_permissions.sql
Normal file
11
src/meta_api_grabber/views/grafana_permissions.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Grant SELECT permissions for Grafana user on flattened views
|
||||
GRANT SELECT ON account_insights_flattened TO grafana;
|
||||
GRANT SELECT ON campaign_insights_flattened TO grafana;
|
||||
GRANT SELECT ON adset_insights_flattened TO grafana;
|
||||
|
||||
-- Grant SELECT on all existing tables and views in the schema
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA public TO grafana;
|
||||
|
||||
-- Grant SELECT on all future tables and views in the schema
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public
|
||||
GRANT SELECT ON TABLES TO grafana;
|
||||
@@ -151,7 +151,14 @@ async def test_rate_limiter():
|
||||
print(f"Max usage: {limiter.get_max_usage_pct():.1f}%")
|
||||
print(f"Throttle delay: {limiter.get_throttle_delay():.1f}s")
|
||||
print(f"Estimated time to regain access: {limiter.estimated_time_to_regain_access} min")
|
||||
print(f"Reset time duration: {limiter.reset_time_duration}s")
|
||||
|
||||
# Show per-account reset times
|
||||
if limiter.ad_account_usage:
|
||||
print("Per-account reset times:")
|
||||
for account_id, usage in limiter.ad_account_usage.items():
|
||||
reset_time = usage.get('reset_time_duration', 0)
|
||||
if reset_time > 0:
|
||||
print(f" {account_id}: {reset_time}s")
|
||||
|
||||
# Test 7: Empty/missing headers
|
||||
print("\n--- Test 7: Missing Headers ---")
|
||||
|
||||
115
tests/README.md
Normal file
115
tests/README.md
Normal file
@@ -0,0 +1,115 @@
|
||||
# Tests
|
||||
|
||||
This directory contains tests for the meta_api_grabber project.
|
||||
|
||||
## Running Tests
|
||||
|
||||
Install test dependencies:
|
||||
```bash
|
||||
uv sync --extra test
|
||||
```
|
||||
|
||||
Run all tests:
|
||||
```bash
|
||||
uv run pytest
|
||||
```
|
||||
|
||||
Run specific test file:
|
||||
```bash
|
||||
uv run pytest tests/test_field_schema_validation.py -v
|
||||
```
|
||||
|
||||
Run with verbose output:
|
||||
```bash
|
||||
uv run pytest tests/test_field_schema_validation.py -v -s
|
||||
```
|
||||
|
||||
## Test Files
|
||||
|
||||
### `test_field_schema_validation.py` (Integration Test)
|
||||
|
||||
This is a critical integration test that validates all fields requested by the grab_* methods in `scheduled_grabber.py` exist in the actual database schema.
|
||||
|
||||
**What it does:**
|
||||
1. Parses `db_schema.sql` to extract actual table columns
|
||||
2. Checks fields requested by each grab method:
|
||||
- `grab_account_insights()` → `account_insights` table
|
||||
- `grab_campaign_insights()` → `campaign_insights` table
|
||||
- `grab_adset_insights()` → `adset_insights` table
|
||||
- `grab_campaign_insights_by_country()` → `campaign_insights_by_country` table
|
||||
3. Verifies all requested fields exist in the corresponding database table
|
||||
|
||||
**Why this test is important:** When new fields are added to the Meta API field lists, this test quickly alerts you if the corresponding database columns need to be added. Since fields are only added (never removed), the test helps catch schema mismatches early.
|
||||
|
||||
**Test methods:**
|
||||
- `test_account_insights_fields()` - Validates account-level insight fields
|
||||
- `test_campaign_insights_fields()` - Validates campaign-level insight fields
|
||||
- `test_adset_insights_fields()` - Validates ad set-level insight fields
|
||||
- `test_campaign_insights_by_country_fields()` - Validates country breakdown fields
|
||||
- `test_all_tables_exist()` - Ensures all required insight tables exist
|
||||
- `test_schema_documentation()` - Prints out the parsed schema for reference
|
||||
|
||||
**Output example:**
|
||||
```
|
||||
Table: account_insights
|
||||
Columns (17): account_id, actions, clicks, cost_per_action_type, cpc, cpm, cpp, ctr, ...
|
||||
|
||||
Table: campaign_insights
|
||||
Columns (15): account_id, actions, campaign_id, clicks, cpc, cpm, ctr, ...
|
||||
```
|
||||
|
||||
## Writing Tests
|
||||
|
||||
Use markers to categorize tests:
|
||||
```python
|
||||
@pytest.mark.unit
|
||||
def test_something():
|
||||
pass
|
||||
|
||||
@pytest.mark.integration
|
||||
async def test_database_connection():
|
||||
pass
|
||||
```
|
||||
|
||||
Run only unit tests:
|
||||
```bash
|
||||
uv run pytest -m unit
|
||||
```
|
||||
|
||||
Run everything except integration tests:
|
||||
```bash
|
||||
uv run pytest -m "not integration"
|
||||
```
|
||||
|
||||
## Schema Validation Workflow
|
||||
|
||||
When you add new fields to a grab method:
|
||||
|
||||
1. **Add fields to `scheduled_grabber.py`:**
|
||||
```python
|
||||
fields = [
|
||||
...
|
||||
AdsInsights.Field.new_field, # New field added
|
||||
]
|
||||
```
|
||||
|
||||
2. **Run tests to see what's missing:**
|
||||
```bash
|
||||
uv run pytest tests/test_field_schema_validation.py -v -s
|
||||
```
|
||||
|
||||
3. **Test output will show:**
|
||||
```
|
||||
adset_insights table missing columns: {'new_field'}
|
||||
Available: [account_id, actions, adset_id, ...]
|
||||
```
|
||||
|
||||
4. **Update `db_schema.sql` with the new column:**
|
||||
```sql
|
||||
ALTER TABLE adset_insights ADD COLUMN IF NOT EXISTS new_field TYPE;
|
||||
```
|
||||
|
||||
5. **Run tests again to verify:**
|
||||
```bash
|
||||
uv run pytest tests/test_field_schema_validation.py -v
|
||||
```
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Tests for meta_api_grabber package."""
|
||||
13
tests/conftest.py
Normal file
13
tests/conftest.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Pytest configuration and fixtures."""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest."""
|
||||
config.addinivalue_line(
|
||||
"markers", "integration: marks tests as integration tests (deselect with '-m \"not integration\"')"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "unit: marks tests as unit tests"
|
||||
)
|
||||
360
tests/test_field_schema_validation.py
Normal file
360
tests/test_field_schema_validation.py
Normal file
@@ -0,0 +1,360 @@
|
||||
"""
|
||||
Integration test that validates all fields requested by grab_* methods exist in the database schema.
|
||||
|
||||
This test:
|
||||
1. Parses the SQL schema file (db_schema.sql) to extract actual table columns
|
||||
2. Reads scheduled_grabber.py to find which methods call which tables
|
||||
3. Verifies that all requested fields exist in the actual database schema
|
||||
"""
|
||||
|
||||
import re
|
||||
import pathlib
|
||||
from typing import Dict, Set, List
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def parse_sql_schema() -> Dict[str, Set[str]]:
|
||||
"""
|
||||
Parse db_schema.sql to extract table columns.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping table names to sets of column names
|
||||
"""
|
||||
schema_file = pathlib.Path(__file__).parent.parent / "src" / "meta_api_grabber" / "db_schema.sql"
|
||||
|
||||
if not schema_file.exists():
|
||||
raise FileNotFoundError(f"Schema file not found: {schema_file}")
|
||||
|
||||
with open(schema_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
tables = {}
|
||||
|
||||
# Parse CREATE TABLE statements
|
||||
# Pattern: CREATE TABLE IF NOT EXISTS table_name (...)
|
||||
create_table_pattern = r'CREATE TABLE IF NOT EXISTS (\w+)\s*\((.*?)\);'
|
||||
|
||||
for match in re.finditer(create_table_pattern, content, re.DOTALL):
|
||||
table_name = match.group(1)
|
||||
table_body = match.group(2)
|
||||
|
||||
# Extract column names (first word before space/comma)
|
||||
# Pattern: column_name TYPE ...
|
||||
column_pattern = r'^\s*(\w+)\s+\w+'
|
||||
columns = set()
|
||||
|
||||
for line in table_body.split('\n'):
|
||||
line = line.strip()
|
||||
if not line or line.startswith('--') or line.startswith('PRIMARY') or line.startswith('FOREIGN') or line.startswith('CONSTRAINT'):
|
||||
continue
|
||||
|
||||
col_match = re.match(column_pattern, line)
|
||||
if col_match:
|
||||
columns.add(col_match.group(1))
|
||||
|
||||
if columns:
|
||||
tables[table_name] = columns
|
||||
|
||||
return tables
|
||||
|
||||
|
||||
def get_field_name(field_str: str) -> str:
|
||||
"""
|
||||
Extract field name from AdsInsights.Field.xxx notation.
|
||||
|
||||
Example: 'impressions' from 'AdsInsights.Field.impressions'
|
||||
"""
|
||||
if '.' in field_str:
|
||||
return field_str.split('.')[-1]
|
||||
return field_str
|
||||
|
||||
|
||||
def extract_fields_from_grabber_source() -> Dict[str, List[str]]:
|
||||
"""
|
||||
Extract field lists from grab_* methods by reading scheduled_grabber.py source.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping method names to lists of field names
|
||||
"""
|
||||
grabber_file = pathlib.Path(__file__).parent.parent / "src" / "meta_api_grabber" / "scheduled_grabber.py"
|
||||
|
||||
if not grabber_file.exists():
|
||||
raise FileNotFoundError(f"scheduled_grabber.py not found: {grabber_file}")
|
||||
|
||||
with open(grabber_file, 'r') as f:
|
||||
source = f.read()
|
||||
|
||||
methods_to_table = {
|
||||
'grab_account_insights': 'account_insights',
|
||||
'grab_campaign_insights': 'campaign_insights',
|
||||
'grab_adset_insights': 'adset_insights',
|
||||
'grab_campaign_insights_by_country': 'campaign_insights_by_country',
|
||||
}
|
||||
|
||||
result = {}
|
||||
|
||||
for method_name in methods_to_table.keys():
|
||||
# Find the method definition by looking for: async def method_name(...)
|
||||
method_pattern = rf'async def {method_name}\s*\('
|
||||
method_match = re.search(method_pattern, source)
|
||||
|
||||
if not method_match:
|
||||
continue
|
||||
|
||||
# Get the position after the method name pattern
|
||||
start_pos = method_match.end()
|
||||
|
||||
# Now find where the method body actually starts (after the closing paren and docstring)
|
||||
# Skip to the opening paren
|
||||
open_paren_pos = start_pos - 1
|
||||
|
||||
# Count parentheses to find the closing paren of the function signature
|
||||
paren_count = 1
|
||||
pos = open_paren_pos + 1
|
||||
while pos < len(source) and paren_count > 0:
|
||||
if source[pos] == '(':
|
||||
paren_count += 1
|
||||
elif source[pos] == ')':
|
||||
paren_count -= 1
|
||||
pos += 1
|
||||
|
||||
# Now pos is after the closing paren. Find the colon
|
||||
colon_pos = source.find(':', pos)
|
||||
|
||||
# Skip past any docstring if present
|
||||
after_colon = source[colon_pos + 1:colon_pos + 10].lstrip()
|
||||
if after_colon.startswith('"""') or after_colon.startswith("'''"):
|
||||
quote_type = '"""' if after_colon.startswith('"""') else "'''"
|
||||
docstring_start = source.find(quote_type, colon_pos)
|
||||
docstring_end = source.find(quote_type, docstring_start + 3) + 3
|
||||
method_body_start = docstring_end
|
||||
else:
|
||||
method_body_start = colon_pos + 1
|
||||
|
||||
# Find the next method definition to know where this method ends
|
||||
next_method_pattern = r'async def \w+\s*\('
|
||||
next_match = re.search(next_method_pattern, source[method_body_start:])
|
||||
|
||||
if next_match:
|
||||
method_body_end = method_body_start + next_match.start()
|
||||
else:
|
||||
# Last method - use rest of file
|
||||
method_body_end = len(source)
|
||||
|
||||
method_body = source[method_body_start:method_body_end]
|
||||
|
||||
# Extract fields from the method body
|
||||
# Look for: fields = [...] or fields = common_fields + [...]
|
||||
|
||||
# First check if this method uses common_fields
|
||||
uses_common_fields = 'common_fields' in method_body[:500]
|
||||
|
||||
if uses_common_fields:
|
||||
# Pattern: fields = common_fields + [...]
|
||||
fields_pattern = r'fields\s*=\s*common_fields\s*\+\s*\[(.*?)\]'
|
||||
fields_match = re.search(fields_pattern, method_body, re.DOTALL)
|
||||
if fields_match:
|
||||
fields_str = fields_match.group(1)
|
||||
# Extract individual field names
|
||||
field_pattern = r'AdsInsights\.Field\.(\w+)'
|
||||
fields = re.findall(field_pattern, fields_str)
|
||||
|
||||
# Also get common_fields from the module level
|
||||
common_pattern = r'common_fields\s*=\s*\[(.*?)\]'
|
||||
common_match = re.search(common_pattern, source, re.DOTALL)
|
||||
if common_match:
|
||||
common_str = common_match.group(1)
|
||||
common_fields_list = re.findall(field_pattern, common_str)
|
||||
fields = common_fields_list + fields
|
||||
|
||||
result[method_name] = fields
|
||||
else:
|
||||
# Pattern: fields = [...]
|
||||
# Use bracket matching to find the correct field list
|
||||
fields_keyword_pos = method_body.find('fields =')
|
||||
|
||||
if fields_keyword_pos != -1:
|
||||
# Find the opening bracket after fields =
|
||||
bracket_pos = method_body.find('[', fields_keyword_pos)
|
||||
if bracket_pos != -1:
|
||||
# Count brackets to find the matching closing bracket
|
||||
bracket_count = 0
|
||||
end_pos = bracket_pos
|
||||
for i, char in enumerate(method_body[bracket_pos:]):
|
||||
if char == '[':
|
||||
bracket_count += 1
|
||||
elif char == ']':
|
||||
bracket_count -= 1
|
||||
if bracket_count == 0:
|
||||
end_pos = bracket_pos + i
|
||||
break
|
||||
|
||||
fields_str = method_body[bracket_pos + 1:end_pos]
|
||||
field_pattern = r'AdsInsights\.Field\.(\w+)'
|
||||
fields = re.findall(field_pattern, fields_str)
|
||||
result[method_name] = fields
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def schema_columns():
|
||||
"""Parse and cache the schema columns."""
|
||||
return parse_sql_schema()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def extracted_fields_by_method():
|
||||
"""Extract and cache the fields from each grab_* method."""
|
||||
return extract_fields_from_grabber_source()
|
||||
|
||||
|
||||
# Mapping of method names to their insight table names
|
||||
METHOD_TO_TABLE = {
|
||||
'grab_account_insights': 'account_insights',
|
||||
'grab_campaign_insights': 'campaign_insights',
|
||||
'grab_adset_insights': 'adset_insights',
|
||||
'grab_campaign_insights_by_country': 'campaign_insights_by_country',
|
||||
}
|
||||
|
||||
# Fields that are IDs/names stored in metadata tables, not in the insights table
|
||||
METADATA_ONLY_FIELDS = {
|
||||
'campaign_id', 'campaign_name',
|
||||
'adset_id', 'adset_name',
|
||||
}
|
||||
|
||||
|
||||
class TestFieldSchemaValidation:
|
||||
"""Validate that all API field requests have corresponding database columns."""
|
||||
|
||||
def test_grab_account_insights_fields(self, schema_columns, extracted_fields_by_method):
|
||||
"""Test that grab_account_insights fields exist in schema."""
|
||||
method_name = 'grab_account_insights'
|
||||
table_name = METHOD_TO_TABLE[method_name]
|
||||
|
||||
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
|
||||
|
||||
extracted_fields = set(extracted_fields_by_method[method_name])
|
||||
table_cols = schema_columns.get(table_name, set())
|
||||
assert table_cols, f"Table {table_name} not found in schema"
|
||||
|
||||
missing = extracted_fields - table_cols
|
||||
assert not missing, \
|
||||
f"{table_name} table missing columns: {missing}\n" \
|
||||
f"Method requests: {sorted(extracted_fields)}\n" \
|
||||
f"Available: {sorted(table_cols)}"
|
||||
|
||||
print(f"✓ {method_name} → {table_name}: {len(extracted_fields)} fields validated")
|
||||
|
||||
def test_grab_campaign_insights_fields(self, schema_columns, extracted_fields_by_method):
|
||||
"""Test that grab_campaign_insights fields exist in schema."""
|
||||
method_name = 'grab_campaign_insights'
|
||||
table_name = METHOD_TO_TABLE[method_name]
|
||||
|
||||
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
|
||||
|
||||
extracted_fields = set(extracted_fields_by_method[method_name])
|
||||
table_cols = schema_columns.get(table_name, set())
|
||||
assert table_cols, f"Table {table_name} not found in schema"
|
||||
|
||||
# Remove ID/name fields (stored in metadata tables, not insights table)
|
||||
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
|
||||
|
||||
missing = insight_only_fields - table_cols
|
||||
assert not missing, \
|
||||
f"{table_name} table missing columns: {missing}\n" \
|
||||
f"Method requests: {sorted(extracted_fields)}\n" \
|
||||
f"Available: {sorted(table_cols)}"
|
||||
|
||||
print(f"✓ {method_name} → {table_name}: {len(extracted_fields)} fields validated")
|
||||
|
||||
def test_grab_adset_insights_fields(self, schema_columns, extracted_fields_by_method):
|
||||
"""Test that grab_adset_insights fields exist in schema."""
|
||||
method_name = 'grab_adset_insights'
|
||||
table_name = METHOD_TO_TABLE[method_name]
|
||||
|
||||
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
|
||||
|
||||
extracted_fields = set(extracted_fields_by_method[method_name])
|
||||
table_cols = schema_columns.get(table_name, set())
|
||||
assert table_cols, f"Table {table_name} not found in schema"
|
||||
|
||||
# Remove ID/name fields (stored in metadata tables, not insights table)
|
||||
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
|
||||
|
||||
missing = insight_only_fields - table_cols
|
||||
assert not missing, \
|
||||
f"{table_name} table missing columns: {missing}\n" \
|
||||
f"Method requests: {sorted(extracted_fields)}\n" \
|
||||
f"Available: {sorted(table_cols)}"
|
||||
|
||||
print(f"✓ {method_name} → {table_name}: {len(extracted_fields)} fields validated")
|
||||
|
||||
def test_grab_campaign_insights_by_country_fields(self, schema_columns, extracted_fields_by_method):
|
||||
"""Test that grab_campaign_insights_by_country fields exist in schema."""
|
||||
method_name = 'grab_campaign_insights_by_country'
|
||||
table_name = METHOD_TO_TABLE[method_name]
|
||||
|
||||
assert method_name in extracted_fields_by_method, f"Could not extract fields from {method_name}"
|
||||
|
||||
extracted_fields = set(extracted_fields_by_method[method_name])
|
||||
table_cols = schema_columns.get(table_name, set())
|
||||
assert table_cols, f"Table {table_name} not found in schema"
|
||||
|
||||
# Remove ID/name fields (stored in metadata tables, not insights table)
|
||||
insight_only_fields = extracted_fields - METADATA_ONLY_FIELDS
|
||||
|
||||
# Country is special - it's part of the breakdown
|
||||
assert "country" in table_cols, \
|
||||
f"country field missing in {table_name} table\n" \
|
||||
f"Available: {sorted(table_cols)}"
|
||||
|
||||
missing = insight_only_fields - table_cols
|
||||
assert not missing, \
|
||||
f"{table_name} table missing columns: {missing}\n" \
|
||||
f"Method requests: {sorted(extracted_fields)}\n" \
|
||||
f"Available: {sorted(table_cols)}"
|
||||
|
||||
print(f"✓ {method_name} → {table_name}: {len(extracted_fields)} fields validated")
|
||||
|
||||
def test_all_tables_exist(self, schema_columns):
|
||||
"""Test that all required insight tables exist in schema."""
|
||||
required_tables = {
|
||||
"account_insights",
|
||||
"campaign_insights",
|
||||
"adset_insights",
|
||||
"campaign_insights_by_country",
|
||||
}
|
||||
|
||||
existing_tables = set(schema_columns.keys())
|
||||
missing = required_tables - existing_tables
|
||||
|
||||
assert not missing, \
|
||||
f"Missing tables: {missing}\n" \
|
||||
f"Found: {sorted(existing_tables)}"
|
||||
|
||||
def test_schema_documentation(self, schema_columns):
|
||||
"""Print out the parsed schema for verification."""
|
||||
print("\n" + "="*80)
|
||||
print("PARSED DATABASE SCHEMA")
|
||||
print("="*80)
|
||||
|
||||
for table_name in sorted(schema_columns.keys()):
|
||||
columns = sorted(schema_columns[table_name])
|
||||
print(f"\nTable: {table_name}")
|
||||
print(f"Columns ({len(columns)}): {', '.join(columns)}")
|
||||
|
||||
def test_extracted_fields_documentation(self, extracted_fields_by_method):
|
||||
"""Print out extracted fields from each method."""
|
||||
print("\n" + "="*80)
|
||||
print("EXTRACTED FIELDS FROM GRAB METHODS")
|
||||
print("="*80)
|
||||
|
||||
for method_name, fields in sorted(extracted_fields_by_method.items()):
|
||||
print(f"\n{method_name}:")
|
||||
print(f" Fields ({len(fields)}): {', '.join(sorted(set(fields)))}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
86
uv.lock
generated
86
uv.lock
generated
@@ -193,6 +193,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "curlify"
|
||||
version = "3.0.0"
|
||||
@@ -382,6 +391,8 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" },
|
||||
@@ -389,6 +400,8 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0d/da/343cd760ab2f92bac1845ca07ee3faea9fe52bee65f7bcb19f16ad7de08b/greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681", size = 1680760, upload-time = "2025-11-04T12:42:25.341Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
|
||||
]
|
||||
|
||||
@@ -446,6 +459,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.3.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mako"
|
||||
version = "1.3.10"
|
||||
@@ -525,6 +547,12 @@ dependencies = [
|
||||
{ name = "sqlalchemy", extra = ["asyncio"] },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
test = [
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "aiohttp", specifier = ">=3.13.1" },
|
||||
@@ -532,10 +560,13 @@ requires-dist = [
|
||||
{ name = "asyncpg", specifier = ">=0.30.0" },
|
||||
{ name = "facebook-business", specifier = ">=23.0.3" },
|
||||
{ name = "google-ads", specifier = ">=28.3.0" },
|
||||
{ name = "pytest", marker = "extra == 'test'", specifier = ">=8.0.0" },
|
||||
{ name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=0.25.0" },
|
||||
{ name = "python-dotenv", specifier = ">=1.1.1" },
|
||||
{ name = "requests-oauthlib", specifier = ">=2.0.0" },
|
||||
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.44" },
|
||||
]
|
||||
provides-extras = ["test"]
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
@@ -627,6 +658,24 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "25.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.6.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "propcache"
|
||||
version = "0.4.1"
|
||||
@@ -753,6 +802,43 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/ec/1fb891d8a2660716aadb2143235481d15ed1cbfe3ad669194690b0604492/pycountry-24.6.1-py3-none-any.whl", hash = "sha256:f1a4fb391cd7214f8eefd39556d740adcc233c778a27f8942c8dca351d6ce06f", size = 6335189, upload-time = "2024-06-01T04:11:49.711Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.19.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "8.4.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "iniconfig" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pluggy" },
|
||||
{ name = "pygments" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-asyncio"
|
||||
version = "1.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pytest" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dotenv"
|
||||
version = "1.1.1"
|
||||
|
||||
150
view_sql_archive/public/insights_flattened.sql
Normal file
150
view_sql_archive/public/insights_flattened.sql
Normal file
@@ -0,0 +1,150 @@
|
||||
-- Auto refreshes when new entries get added. New things can be extracted from actions if necessary.
|
||||
|
||||
CREATE MATERIALIZED VIEW adset_insights_flattened AS
|
||||
SELECT
|
||||
time,
|
||||
adset_id,
|
||||
campaign_id,
|
||||
account_id,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
ctr,
|
||||
cpc,
|
||||
cpm,
|
||||
date_preset,
|
||||
date_start,
|
||||
date_stop,
|
||||
fetched_at,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'link_click') AS link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'lead') AS lead
|
||||
FROM adset_insights;
|
||||
|
||||
-- Add indexes for common query patterns
|
||||
|
||||
CREATE INDEX idx_adset_insights_flat_campaign ON adset_insights_flattened(campaign_id);
|
||||
CREATE INDEX idx_adset_insights_flat_date ON adset_insights_flattened(date_start, date_stop);
|
||||
|
||||
|
||||
CREATE UNIQUE INDEX idx_adset_insights_flat_unique ON adset_insights_flattened(time, adset_id);
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY adset_insights_flattened;
|
||||
|
||||
|
||||
|
||||
--- same or atleast very similar for account_insights
|
||||
|
||||
CREATE MATERIALIZED VIEW account_insights_flattened AS
|
||||
SELECT
|
||||
time,
|
||||
account_id,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
frequency,
|
||||
ctr,
|
||||
cpc,
|
||||
cpm,
|
||||
cpp,
|
||||
date_preset,
|
||||
date_start,
|
||||
date_stop,
|
||||
fetched_at,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'link_click') AS link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'lead') AS lead,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(cost_per_action_type)
|
||||
WHERE value->>'action_type' = 'link_click') AS cost_per_link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(cost_per_action_type)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS cost_per_landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(cost_per_action_type)
|
||||
WHERE value->>'action_type' = 'lead') AS cost_per_lead
|
||||
|
||||
FROM account_insights;
|
||||
|
||||
|
||||
CREATE INDEX idx_account_insights_flat_date ON account_insights_flattened(date_start, date_stop);
|
||||
|
||||
|
||||
CREATE UNIQUE INDEX idx_account_insights_flat_unique ON account_insights_flattened(time, account_id);
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY account_insights_flattened;
|
||||
|
||||
|
||||
--- campaign insights
|
||||
|
||||
CREATE MATERIALIZED VIEW campaign_insights_flattened AS
|
||||
SELECT
|
||||
time,
|
||||
account_id,
|
||||
campaign_id,
|
||||
impressions,
|
||||
clicks,
|
||||
spend,
|
||||
reach,
|
||||
ctr,
|
||||
cpc,
|
||||
cpm,
|
||||
date_preset,
|
||||
date_start,
|
||||
date_stop,
|
||||
fetched_at,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'link_click') AS link_click,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'landing_page_view') AS landing_page_view,
|
||||
(SELECT (value->>'value')::numeric
|
||||
FROM jsonb_array_elements(actions)
|
||||
WHERE value->>'action_type' = 'lead') AS lead
|
||||
|
||||
|
||||
FROM campaign_insights;
|
||||
|
||||
CREATE INDEX idx_campaign_insights_flat_date ON campaign_insights_flattened(date_start, date_stop);
|
||||
|
||||
CREATE UNIQUE INDEX idx_campaign_insights_flat_unique ON campaign_insights_flattened(time, campaign_id);
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY campaign_insights_flattened;
|
||||
|
||||
|
||||
-- permissinos
|
||||
|
||||
-- Grant SELECT on the existing materialized view
|
||||
GRANT SELECT ON account_insights_flattened TO grafana;
|
||||
|
||||
GRANT SELECT ON campaign_insights_flattened TO grafana;
|
||||
|
||||
GRANT SELECT ON adset_insights_flattened TO grafana;
|
||||
|
||||
|
||||
-- Grant SELECT on all existing tables and views in the schema
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA public TO grafana;
|
||||
|
||||
-- Grant SELECT on all future tables and views in the schema
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public
|
||||
GRANT SELECT ON TABLES TO grafana;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
10
view_sql_archive/view.md
Normal file
10
view_sql_archive/view.md
Normal file
@@ -0,0 +1,10 @@
|
||||
To make handling the dashboard easier its good practice to create views that make the underlying data more accessible. Since data does not get updated that frequently we can also use materialized views to speed up query performance at the cost of storage.
|
||||
|
||||
## Schemas
|
||||
|
||||
public: Contains the data from the meta_api_grabber application. All ad accounts.
|
||||
|
||||
meta: Contains data from airbyte meta api connector. Unfortunatly somewhat bugged for insights on campaign and adset level. Aggregating data from the ads level is possible but much more cumbersome and error prone then querying the stuff directly. Thats why for now I'm continuing to use the meta_api_grabber
|
||||
|
||||
google: Will contain the data from google from the airbyte connector assuming we get access and the connector is good.
|
||||
|
||||
Reference in New Issue
Block a user