cube-studio/myapp/init/init-etl-pipeline.json
2023-12-11 09:59:38 +08:00

244 lines
7.8 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

[
{
"name": "dau",
"describe": "dau计算",
"config": {
"alert_user": "admin"
},
"workflow": "airflow",
"dag_json": {
"cos导入hdfs-1686184253953": {
"label": "数据导入",
"location": [
304,
96
],
"color": {
"color": "rgba(0,170,200,1)",
"bg": "rgba(0,170,200,0.02)"
},
"template": "cos导入hdfs",
"template-group": "出库入库",
"task-config": {
"crontab": "1 1 * * *",
"selfDepend": "单实例运行",
"ResourceGroup": "default",
"alert_user": "admin,",
"timeout": "0",
"retry": "0",
"hdfsPath": "hdfs://xx/xxx",
"cosPath": "/xx/${YYYYMMDD}.tar.gz",
"ifNeedZip": "1",
"label": "数据导入"
},
"upstream": [],
"task_id": 1
},
"hdfs入库至hive-1686184263002": {
"label": "数据入库",
"location": [
304,
224
],
"color": {
"color": "rgba(0,170,200,1)",
"bg": "rgba(0,170,200,0.02)"
},
"template": "hdfs入库至hive",
"template-group": "出库入库",
"task-config": {
"crontab": "1 1 * * *",
"selfDepend": "单实例运行",
"ResourceGroup": "default",
"alert_user": "admin,",
"timeout": "0",
"retry": "0",
"charSet": "UTF-8",
"databaseName": "",
"tableName": "",
"delimiter": "9",
"failedOnZeroWrited": "1",
"partitionType": "P_${YYYYMMDDHH}",
"sourceFilePath": "",
"sourceFileNames": "*",
"sourceColumnNames": "",
"targetColumnNames": "",
"loadMode": "TRUNCATE",
"label": "数据入库"
},
"upstream": [
"cos导入hdfs-1686184253953"
],
"task_id": 2
},
"SQL-1686184276800": {
"label": "局部特征计算",
"location": [
-16,
352
],
"color": {
"color": "rgba(0,200,153,1)",
"bg": "rgba(0,200,153,0.02)"
},
"template": "SQL",
"template-group": "数据计算",
"task-config": {
"crontab": "1 1 * * *",
"selfDepend": "单实例运行",
"ResourceGroup": "default",
"alert_user": "admin,",
"timeout": "0",
"retry": "0",
"filterSQL": "\n --库名替换下面的demo_database\n use demo_database;\n\n --建表语句替换下面的demo_table修改字段。一定要加“if not exists”这样使只在第一次运行时建表\n CREATE TABLE if not exists demo_table(\n qimei36 STRING COMMENT '唯一设备ID',\n userid_id STRING COMMENT '用户id各app的用户id',\n device_id STRING COMMENT '设备id各app的device_id',\n ftime INT COMMENT '数据分区时间 格式yyyymmdd'\n )\n PARTITION BY LIST( ftime ) --定义分区字段替换掉ftime。\n (\n PARTITION p_20220323 VALUES IN ( 20220323 ), --初始分区分区名替换p_20220323分区值替换20220323\n PARTITION default\n )\n STORED AS ORCFILE COMPRESS;\n\n -- 分区,根据时间参数新建分区。\n alter table demo_table drop partition (p_${YYYYMMDD});\n alter table demo_table add partition p_${YYYYMMDD} values in (${YYYYMMDD});\n\n -- 写入用你的sql逻辑替换。\n insert table demo_table\n select * from other_db::other_table partition(p_${YYYYMMDD}) t;\n ",
"special_para": "set hive.exec.parallel = true;set hive.execute.engine=spark;set hive.multi.join.use.hive=false;set hive.spark.failed.retry=false;",
"label": "局部特征计算"
},
"upstream": [
"hdfs入库至hive-1686184263002"
],
"task_id": 3
},
"SparkScala-1686184279367": {
"label": "局部特征计算",
"location": [
304,
352
],
"color": {
"color": "rgba(0,200,153,1)",
"bg": "rgba(0,200,153,0.02)"
},
"template": "SparkScala",
"template-group": "数据计算",
"task-config": {
"crontab": "1 1 * * *",
"selfDepend": "单实例运行",
"ResourceGroup": "default",
"alert_user": "admin,",
"timeout": "0",
"retry": "0",
"jar_path": "",
"className": "",
"files": "",
"programSpecificParams": "",
"options": "",
"dynamicAllocation": "1",
"driver_memory": "2g",
"num_executors": "4",
"executor_memory": "2g",
"executor_cores": "2",
"task.main.timeout": "480",
"task.check.timeout": "5",
"label": "局部特征计算"
},
"upstream": [
"hdfs入库至hive-1686184263002"
],
"task_id": 4
},
"pyspark-1686184281148": {
"label": "局部特征计算",
"location": [
608,
352
],
"color": {
"color": "rgba(0,200,153,1)",
"bg": "rgba(0,200,153,0.02)"
},
"template": "pyspark",
"template-group": "数据计算",
"task-config": {
"crontab": "1 1 * * *",
"selfDepend": "单实例运行",
"ResourceGroup": "default",
"alert_user": "admin,",
"timeout": "0",
"retry": "0",
"py_script_path": "",
"files": "",
"pyFiles": "",
"programSpecificParams": "",
"options": "",
"dynamicAllocation": 1,
"driver_memory": "2g",
"num_executors": 4,
"executor_memory": "2g",
"executor_cores": 2,
"task.main.timeout": 480,
"task.check.timeout": "5",
"label": "局部特征计算"
},
"upstream": [
"hdfs入库至hive-1686184263002"
],
"task_id": 5
},
"hive出库至hdfs-1686184293917": {
"label": "结果计算",
"location": [
304,
496
],
"color": {
"color": "rgba(0,170,200,1)",
"bg": "rgba(0,170,200,0.02)"
},
"template": "hive出库至hdfs",
"template-group": "出库入库",
"task-config": {
"crontab": "1 1 * * *",
"selfDepend": "单实例运行",
"ResourceGroup": "default",
"alert_user": "admin,",
"timeout": "0",
"retry": "0",
"databaseName": "",
"destCheckFileName": "",
"destCheckFilePath": "",
"destFileDelimiter": "9",
"destFilePath": "",
"filterSQL": "select t1,t2,t3 from your_table where imp_date=${YYYYMMDD}",
"label": "结果计算"
},
"upstream": [
"SQL-1686184276800",
"pyspark-1686184281148",
"SparkScala-1686184279367"
],
"task_id": 6
},
"hdfs导入cos-1686184296749": {
"label": "数据导出",
"location": [
304,
608
],
"color": {
"color": "rgba(0,170,200,1)",
"bg": "rgba(0,170,200,0.02)"
},
"template": "hdfs导入cos",
"template-group": "出库入库",
"task-config": {
"crontab": "1 1 * * *",
"selfDepend": "单实例运行",
"ResourceGroup": "default",
"alert_user": "admin,",
"timeout": "0",
"retry": "0",
"hdfsPath": "hdfs://xx/xxx",
"cosPath": "/xx/xx/${YYYYMMDD}.tar.gz",
"ifNeedZip": "1",
"label": "数据导出"
},
"upstream": [
"hive出库至hdfs-1686184293917"
],
"task_id": 7
}
}
}
]