{
"name": "ecommerce-daily-pipeline",
"tags": {
"team": "data-engineering",
"project": "ecommerce",
"cost_center": "DE-001"
},
"tasks": [
{
"task_key": "ingest_orders",
"description": "S3에서 주문 데이터 수집",
"pipeline_task": {
"pipeline_id": "abc-123-def"
},
"timeout_seconds": 1800
},
{
"task_key": "ingest_customers",
"description": "고객 마스터 데이터 동기화",
"notebook_task": {
"notebook_path": "/Workspace/etl/ingest_customers",
"base_parameters": {
"mode": "incremental"
}
},
"new_cluster": {
"spark_version": "15.4.x-scala2.12",
"num_workers": 2,
"node_type_id": "m5.xlarge",
"aws_attributes": {
"availability": "SPOT_WITH_FALLBACK",
"first_on_demand": 1
}
},
"timeout_seconds": 1800,
"max_retries": 2,
"min_retry_interval_millis": 60000
},
{
"task_key": "transform_and_join",
"description": "주문-고객 조인 및 변환",
"depends_on": [
{"task_key": "ingest_orders"},
{"task_key": "ingest_customers"}
],
"notebook_task": {
"notebook_path": "/Workspace/etl/transform_orders",
"base_parameters": {
"target_date": "{{job.parameters.target_date}}"
}
},
"new_cluster": {
"spark_version": "15.4.x-scala2.12",
"num_workers": 4,
"node_type_id": "r5.xlarge"
},
"timeout_seconds": 3600,
"max_retries": 1
},
{
"task_key": "validate_results",
"description": "데이터 품질 검증",
"depends_on": [
{"task_key": "transform_and_join"}
],
"sql_task": {
"query": {
"query_text": "SELECT CASE WHEN COUNT(*) > 0 THEN 'PASS' ELSE 'FAIL' END AS result FROM gold.daily_revenue WHERE sale_date = CURRENT_DATE()"
},
"warehouse_id": "warehouse-id-here"
}
},
{
"task_key": "build_aggregates",
"description": "Gold 집계 테이블 생성",
"depends_on": [
{"task_key": "validate_results"}
],
"dbt_task": {
"project_directory": "/Workspace/dbt/ecommerce",
"commands": ["dbt run --select gold_models"],
"warehouse_id": "warehouse-id-here"
}
}
],
"parameters": [
{
"name": "target_date",
"default": ""
}
],
"email_notifications": {
"on_failure": ["data-team@company.com"]
},
"webhook_notifications": {
"on_failure": [{"id": "slack-webhook-id"}]
},
"schedule": {
"quartz_cron_expression": "0 0 2 * * ?",
"timezone_id": "Asia/Seoul"
},
"max_concurrent_runs": 1,
"health": {
"rules": [
{
"metric": "RUN_DURATION_SECONDS",
"op": "GREATER_THAN",
"value": 7200
}
]
}
}