|
55 | 55 |
|
56 | 56 | PIPELINE_NAME = os.environ.get("GCP_DATAFUSION_PIPELINE_NAME", "airflow_test")
|
57 | 57 | PIPELINE = {
|
58 |
| - "name": "test-pipe", |
| 58 | + "artifact": { |
| 59 | + "name": "cdap-data-pipeline", |
| 60 | + "version": "6.5.1", |
| 61 | + "scope": "SYSTEM", |
| 62 | + "label": "Data Pipeline - System Test", |
| 63 | + }, |
59 | 64 | "description": "Data Pipeline Application",
|
60 |
| - "artifact": {"name": "cdap-data-pipeline", "version": "6.4.1", "scope": "SYSTEM"}, |
| 65 | + "name": "test-pipe", |
61 | 66 | "config": {
|
62 | 67 | "resources": {"memoryMB": 2048, "virtualCores": 1},
|
63 | 68 | "driverResources": {"memoryMB": 2048, "virtualCores": 1},
|
64 | 69 | "connections": [{"from": "GCS", "to": "GCS2"}],
|
65 | 70 | "comments": [],
|
66 | 71 | "postActions": [],
|
67 | 72 | "properties": {},
|
68 |
| - "processTimingEnabled": True, |
69 |
| - "stageLoggingEnabled": False, |
| 73 | + "processTimingEnabled": "true", |
| 74 | + "stageLoggingEnabled": "false", |
70 | 75 | "stages": [
|
71 | 76 | {
|
72 | 77 | "name": "GCS",
|
73 | 78 | "plugin": {
|
74 | 79 | "name": "GCSFile",
|
75 | 80 | "type": "batchsource",
|
76 | 81 | "label": "GCS",
|
77 |
| - "artifact": { |
78 |
| - "name": "google-cloud", |
79 |
| - "version": "0.17.3", |
80 |
| - "scope": "SYSTEM", |
81 |
| - }, |
| 82 | + "artifact": {"name": "google-cloud", "version": "0.18.1", "scope": "SYSTEM"}, |
82 | 83 | "properties": {
|
83 | 84 | "project": "auto-detect",
|
84 | 85 | "format": "text",
|
|
87 | 88 | "filenameOnly": "false",
|
88 | 89 | "recursive": "false",
|
89 | 90 | "encrypted": "false",
|
90 |
| - "schema": '{"type":"record","name":"etlSchemaBody","fields":' |
91 |
| - '[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}', |
| 91 | + "schema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\":[{\"name\"\ |
| 92 | + :\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", |
92 | 93 | "path": BUCKET_1_URI,
|
93 | 94 | "referenceName": "foo_bucket",
|
| 95 | + "useConnection": "false", |
| 96 | + "serviceAccountType": "filePath", |
| 97 | + "sampleSize": "1000", |
| 98 | + "fileEncoding": "UTF-8", |
94 | 99 | },
|
95 | 100 | },
|
96 |
| - "outputSchema": [ |
97 |
| - { |
98 |
| - "name": "etlSchemaBody", |
99 |
| - "schema": '{"type":"record","name":"etlSchemaBody","fields":' |
100 |
| - '[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}', |
101 |
| - } |
102 |
| - ], |
| 101 | + "outputSchema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\"\ |
| 102 | + :[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", |
| 103 | + "id": "GCS", |
103 | 104 | },
|
104 | 105 | {
|
105 | 106 | "name": "GCS2",
|
106 | 107 | "plugin": {
|
107 | 108 | "name": "GCS",
|
108 | 109 | "type": "batchsink",
|
109 | 110 | "label": "GCS2",
|
110 |
| - "artifact": { |
111 |
| - "name": "google-cloud", |
112 |
| - "version": "0.17.3", |
113 |
| - "scope": "SYSTEM", |
114 |
| - }, |
| 111 | + "artifact": {"name": "google-cloud", "version": "0.18.1", "scope": "SYSTEM"}, |
115 | 112 | "properties": {
|
116 | 113 | "project": "auto-detect",
|
117 | 114 | "suffix": "yyyy-MM-dd-HH-mm",
|
118 | 115 | "format": "json",
|
119 | 116 | "serviceFilePath": "auto-detect",
|
120 | 117 | "location": "us",
|
121 |
| - "schema": '{"type":"record","name":"etlSchemaBody","fields":' |
122 |
| - '[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}', |
| 118 | + "schema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\":[{\"name\"\ |
| 119 | + :\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", |
123 | 120 | "referenceName": "bar",
|
124 | 121 | "path": BUCKET_2_URI,
|
| 122 | + "serviceAccountType": "filePath", |
| 123 | + "contentType": "application/octet-stream", |
125 | 124 | },
|
126 | 125 | },
|
127 |
| - "outputSchema": [ |
128 |
| - { |
129 |
| - "name": "etlSchemaBody", |
130 |
| - "schema": '{"type":"record","name":"etlSchemaBody","fields":' |
131 |
| - '[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}', |
132 |
| - } |
133 |
| - ], |
| 126 | + "outputSchema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\"\ |
| 127 | + :[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", |
134 | 128 | "inputSchema": [
|
135 | 129 | {
|
136 | 130 | "name": "GCS",
|
137 |
| - "schema": '{"type":"record","name":"etlSchemaBody","fields":' |
138 |
| - '[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}', |
| 131 | + "schema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\":[{\"name\"\ |
| 132 | + :\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", |
139 | 133 | }
|
140 | 134 | ],
|
| 135 | + "id": "GCS2", |
141 | 136 | },
|
142 | 137 | ],
|
143 | 138 | "schedule": "0 * * * *",
|
144 | 139 | "engine": "spark",
|
145 | 140 | "numOfRecordsPreview": 100,
|
| 141 | + "description": "Data Pipeline Application", |
146 | 142 | "maxConcurrentRuns": 1,
|
147 | 143 | },
|
148 | 144 | }
|
|
0 commit comments