Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

docker-compose.yaml 11 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. #
  18. # Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
  19. #
  20. # WARNING: This configuration is for local development. Do not use it in a production deployment.
  21. #
  22. # This configuration supports basic configuration using environment variables or an .env file
  23. # The following variables are supported:
  24. #
  25. # AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow.
  26. # Default: apache/airflow:2.8.1
  27. # AIRFLOW_UID - User ID in Airflow containers
  28. # Default: 50000
  29. # AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed.
  30. # Default: .
  31. # Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode
  32. #
  33. # _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested).
  34. # Default: airflow
  35. # _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested).
  36. # Default: airflow
  37. # _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers.
  38. # Use this option ONLY for quick checks. Installing requirements at container
  39. # startup is done EVERY TIME the service is started.
  40. # A better way is to build a custom image or extend the official image
  41. # as described in https://airflow.apache.org/docs/docker-stack/build.html.
  42. # Default: ''
  43. #
  44. # Feel free to modify this file to suit your needs.
  45. ---
  46. x-airflow-common:
  47. &airflow-common
  48. # In order to add custom dependencies or upgrade provider packages you can use your extended image.
  49. # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml
  50. # and uncomment the "build" line below, Then run `docker-compose build` to build the images.
  51. # image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.8.1}
  52. build: .
  53. environment:
  54. &airflow-common-env
  55. AIRFLOW__CORE__EXECUTOR: CeleryExecutor
  56. AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
  57. AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
  58. AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
  59. AIRFLOW__CORE__FERNET_KEY: ''
  60. AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
  61. AIRFLOW__CORE__LOAD_EXAMPLES: 'true'
  62. AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
  63. # yamllint disable rule:line-length
  64. # Use simple http server on scheduler for health checks
  65. # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server
  66. # yamllint enable rule:line-length
  67. AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
  68. # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks
  69. # for other purpose (development, test and especially production usage) build/extend Airflow image.
  70. # - ${AIRFLOW_PROJ_DIR:-.}/scania_failures_2:/opt/airflow/
  71. _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
  72. volumes:
  73. - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
  74. - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
  75. - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
  76. - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
  77. - ${AIRFLOW_PROJ_DIR:-.}/my_project_secrets:/opt/airflow/my_project_secrets
  78. - ${AIRFLOW_PROJ_DIR:-.}/.dvc:/opt/airflow/.dvc
  79. - ${AIRFLOW_PROJ_DIR:-.}/.git:/opt/airflow/.git
  80. - ${AIRFLOW_PROJ_DIR:-.}/artifacts:/opt/airflow/artifacts
  81. - ${AIRFLOW_PROJ_DIR:-.}/schema.yaml:/opt/airflow/schema.yaml
  82. user: "${AIRFLOW_UID:-50000}:0"
  83. depends_on:
  84. &airflow-common-depends-on
  85. redis:
  86. condition: service_healthy
  87. postgres:
  88. condition: service_healthy
  89. services:
  90. postgres:
  91. image: postgres:13
  92. environment:
  93. POSTGRES_USER: airflow
  94. POSTGRES_PASSWORD: airflow
  95. POSTGRES_DB: airflow
  96. volumes:
  97. - postgres-db-volume:/var/lib/postgresql/data
  98. healthcheck:
  99. test: ["CMD", "pg_isready", "-U", "airflow"]
  100. interval: 10s
  101. retries: 5
  102. start_period: 5s
  103. restart: always
  104. redis:
  105. image: redis:latest
  106. expose:
  107. - 6379
  108. healthcheck:
  109. test: ["CMD", "redis-cli", "ping"]
  110. interval: 10s
  111. timeout: 30s
  112. retries: 50
  113. start_period: 30s
  114. restart: always
  115. airflow-webserver:
  116. <<: *airflow-common
  117. command: webserver
  118. ports:
  119. - "8080:8080"
  120. healthcheck:
  121. test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
  122. interval: 30s
  123. timeout: 10s
  124. retries: 5
  125. start_period: 30s
  126. restart: always
  127. depends_on:
  128. <<: *airflow-common-depends-on
  129. airflow-init:
  130. condition: service_completed_successfully
  131. airflow-scheduler:
  132. <<: *airflow-common
  133. command: scheduler
  134. healthcheck:
  135. test: ["CMD", "curl", "--fail", "http://localhost:8974/health"]
  136. interval: 30s
  137. timeout: 10s
  138. retries: 5
  139. start_period: 30s
  140. restart: always
  141. depends_on:
  142. <<: *airflow-common-depends-on
  143. airflow-init:
  144. condition: service_completed_successfully
  145. airflow-worker:
  146. <<: *airflow-common
  147. command: celery worker
  148. healthcheck:
  149. # yamllint disable rule:line-length
  150. test:
  151. - "CMD-SHELL"
  152. - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
  153. interval: 30s
  154. timeout: 10s
  155. retries: 5
  156. start_period: 30s
  157. environment:
  158. <<: *airflow-common-env
  159. # Required to handle warm shutdown of the celery workers properly
  160. # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
  161. DUMB_INIT_SETSID: "0"
  162. restart: always
  163. depends_on:
  164. <<: *airflow-common-depends-on
  165. airflow-init:
  166. condition: service_completed_successfully
  167. airflow-triggerer:
  168. <<: *airflow-common
  169. command: triggerer
  170. healthcheck:
  171. test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
  172. interval: 30s
  173. timeout: 10s
  174. retries: 5
  175. start_period: 30s
  176. restart: always
  177. depends_on:
  178. <<: *airflow-common-depends-on
  179. airflow-init:
  180. condition: service_completed_successfully
  181. airflow-init:
  182. <<: *airflow-common
  183. entrypoint: /bin/bash
  184. # yamllint disable rule:line-length
  185. command:
  186. - -c
  187. - |
  188. if [[ -z "${AIRFLOW_UID}" ]]; then
  189. echo
  190. echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m"
  191. echo "If you are on Linux, you SHOULD follow the instructions below to set "
  192. echo "AIRFLOW_UID environment variable, otherwise files will be owned by root."
  193. echo "For other operating systems you can get rid of the warning with manually created .env file:"
  194. echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user"
  195. echo
  196. fi
  197. one_meg=1048576
  198. mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg))
  199. cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat)
  200. disk_available=$$(df / | tail -1 | awk '{print $$4}')
  201. warning_resources="false"
  202. if (( mem_available < 4000 )) ; then
  203. echo
  204. echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m"
  205. echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))"
  206. echo
  207. warning_resources="true"
  208. fi
  209. if (( cpus_available < 2 )); then
  210. echo
  211. echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m"
  212. echo "At least 2 CPUs recommended. You have $${cpus_available}"
  213. echo
  214. warning_resources="true"
  215. fi
  216. if (( disk_available < one_meg * 10 )); then
  217. echo
  218. echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m"
  219. echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))"
  220. echo
  221. warning_resources="true"
  222. fi
  223. if [[ $${warning_resources} == "true" ]]; then
  224. echo
  225. echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m"
  226. echo "Please follow the instructions to increase amount of resources available:"
  227. echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin"
  228. echo
  229. fi
  230. mkdir -p /sources/logs /sources/dags /sources/plugins
  231. mkdir -p /sources/my_project_secrets /sources/.dvc /sources/.git /sources/artifacts
  232. chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins,my_project_secrets,.dvc,.git,artifacts,schema.yaml}
  233. exec /entrypoint airflow version
  234. # yamllint enable rule:line-length
  235. # chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins}
  236. # chown -R "${AIRFLOW_UID}:0" /opt/airflow
  237. # chown -R "${AIRFLOW_UID}:0" /opt/airflow/Secrets
  238. environment:
  239. <<: *airflow-common-env
  240. _AIRFLOW_DB_MIGRATE: 'true'
  241. _AIRFLOW_WWW_USER_CREATE: 'true'
  242. _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
  243. _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
  244. _PIP_ADDITIONAL_REQUIREMENTS: ''
  245. user: "0:0"
  246. volumes:
  247. - ${AIRFLOW_PROJ_DIR:-.}:/sources
  248. airflow-cli:
  249. <<: *airflow-common
  250. profiles:
  251. - debug
  252. environment:
  253. <<: *airflow-common-env
  254. CONNECTION_CHECK_MAX_COUNT: "0"
  255. # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252
  256. command:
  257. - bash
  258. - -c
  259. - airflow
  260. # You can enable flower by adding "--profile flower" option e.g. docker-compose --profile flower up
  261. # or by explicitly targeted on the command line e.g. docker-compose up flower.
  262. # See: https://docs.docker.com/compose/profiles/
  263. flower:
  264. <<: *airflow-common
  265. command: celery flower
  266. profiles:
  267. - flower
  268. ports:
  269. - "5555:5555"
  270. healthcheck:
  271. test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
  272. interval: 30s
  273. timeout: 10s
  274. retries: 5
  275. start_period: 30s
  276. restart: always
  277. depends_on:
  278. <<: *airflow-common-depends-on
  279. airflow-init:
  280. condition: service_completed_successfully
  281. volumes:
  282. postgres-db-volume:
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...