I am not able to connect to elasticsearch in kubernetes inside docker. My elasticsearch is accessed via kubernetes and I have an index called 'radius_ml_posts'. I am using elasticsearch's python library to connect to elasticsearch. When I run the whole process on my python IDE (Spyder), it works just fine. However, when I try to run it inside a docker container, I get connection issues. What am I missing? Below are my configs and code:
The localhost:9200:
{
"name" : "elasticsearch-dev-client-6858c5f9dc-zbz8p",
"cluster_name" : "elasticsearch",
"cluster_uuid" : "lJJbPJpJRaC1j7k5IGhj7g",
"version" : {
"number" : "6.7.0",
"build_flavor" : "oss",
"build_type" : "docker",
"build_hash" : "8453f77",
"build_date" : "2019-03-21T15:32:29.844721Z",
"build_snapshot" : false,
"lucene_version" : "7.7.0",
"minimum_wire_compatibility_version" : "5.6.0",
"minimum_index_compatibility_version" : "5.0.0"
},
"tagline" : "You Know, for Search"
}
My python code to connect to elasticsearch host:
def get_data_es(question):
es = Elasticsearch(hosts=[{"host": "elastic", "port": 9200}], connection_class=RequestsHttpConnection, max_retries=30,
retry_on_timeout=True, request_timeout=30)
#es = Elasticsearch(hosts='http://host.docker.internal:5000', connection_class=RequestsHttpConnection, max_retries=30, timeout=30)
doc = {'author': 'gunner','text': 'event', "timestamp": datetime.now()}
es.indices.refresh(index="radius_ml_posts")
res = es.index(index="radius_ml_posts", id = 1, body = doc)
res = es.search(index="radius_ml_posts", size = 30, body={ "query": {
"query_string": {
"default_field": "search_text",
"query": question
}
}
}
)
return res
My docker-compose.yml file:
version: '2.2'
services:
elastic:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.7.0
container_name: elastic
environment:
- discovery.type=single-node
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- data01:/usr/share/elasticsearch/data
ports:
- 9300:9300
- 9200:9200
networks:
- elastic
myimage:
image: myimage:myversion
ports:
- 5000:5000
expose:
- 5000
networks:
- elastic
volumes:
data01:
driver: local
networks:
elastic:
driver: bridge
My Dockerfile:
FROM python:3.7.4
COPY . /app
WORKDIR /app
RUN pip install --upgrade pip
RUN pip3 install -U nltk
RUN python3 -m nltk.downloader all
RUN pip --default-timeout=100 install -r requirements.txt
EXPOSE 5000
ENTRYPOINT ["python"]
CMD ["main.py"]
The docker commands I am running stepwise:
docker build -t myimage:myversion .
docker-compose up
The error I am getting:
myimage_1 | Traceback (most recent call last):
myimage_1 | File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 2446, in wsgi_app
myimage_1 | response = self.full_dispatch_request()
myimage_1 | File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1951, in full_dispatch_request
myimage_1 | rv = self.handle_user_exception(e)
myimage_1 | File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1820, in handle_user_exception
myimage_1 | reraise(exc_type, exc_value, tb)
myimage_1 | File "/usr/local/lib/python3.7/site-packages/flask/_compat.py", line 39, in reraise
myimage_1 | raise value
myimage_1 | File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1949, in full_dispatch_request
myimage_1 | rv = self.dispatch_request()
myimage_1 | File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1935, in dispatch_request
myimage_1 | return self.view_functions[rule.endpoint](**req.view_args)
myimage_1 | File "main.py", line 41, in launch_app
myimage_1 | ques = get_data_es(ques1)
myimage_1 | File "/app/Text_Cleaning.py", line 32, in get_data_es
myimage_1 | es.indices.refresh(index="radius_ml_posts")
myimage_1 | File "/usr/local/lib/python3.7/site-packages/elasticsearch/client/utils.py", line 92, in _wrapped
myimage_1 | return func(*args, params=params, headers=headers, **kwargs)
myimage_1 | File "/usr/local/lib/python3.7/site-packages/elasticsearch/client/indices.py", line 42, in refresh
myimage_1 | "POST", _make_path(index, "_refresh"), params=params, headers=headers
myimage_1 | File "/usr/local/lib/python3.7/site-packages/elasticsearch/transport.py", line 362, in perform_request
myimage_1 | timeout=timeout,
myimage_1 | File "/usr/local/lib/python3.7/site-packages/elasticsearch/connection/http_requests.py", line 157, in perform_request
myimage_1 | raise ConnectionError("N/A", str(e), e)
myimage_1 | elasticsearch.exceptions.ConnectionError: ConnectionError(HTTPConnectionPool(host='elastic', port=9200): Max retries exceeded with url: /radius_ml_posts/_refresh (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f967a9b1710>: Failed to establish a new connection: [Errno -2] Name or service not known'))) caused by: ConnectionError(HTTPConnectionPool(host='elastic', port=9200): Max retries exceeded with url: /radius_ml_posts/_refresh (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f967a9b1710>: Failed to establish a new connection: [Errno -2] Name or service not known')))
Please help in fixing the issue.
Thanks in advance.
I fixed it by using the host as:
host:"host.docker.internal"
Code change,
es = Elasticsearch(hosts=[{"host": "host.docker.internal", "port": 9200}], connection_class=RequestsHttpConnection, max_retries=30,
retry_on_timeout=True, request_timeout=30)
You can try to set the ELASTICSEARCH_NODES variable in your application environment section as and then consume the variable in your python code as http://ELASTICSEARCH_NODES:
version: '2.2'
services:
elastic:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.7.0
container_name: elastic
environment:
- discovery.type=single-node
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- data01:/usr/share/elasticsearch/data
ports:
- 9300:9300
- 9200:9200
networks:
- elastic
myimage:
image: myimage:myversion
depends_on:
- elastic
environment:
- ELASTICSEARCH_NODES=http://elastic:9200
ports:
- 5000:5000
expose:
- 5000
networks:
- elastic
volumes:
data01:
driver: local
networks:
elastic:
driver: bridge
Related
I've been trying to connect flask with mongodb over docker but constantly get the timeout error. Here's my code and error below. Please let me know where I've gone wrong? Thanks.
Also, I've intentionally chosen port 27018 instead of 27017
app.py code:
from pymongo import MongoClient
client = MongoClient(host="test_mongodb",
port = 27018,
username = "root",
password = "rootpassword",
authSource = "admin"
)
#db is same as directory created to identify database
#default port is 27017
db = client.aNewDB
#db is a new database
UserNum = db["UserNum"]
#UserNum is a new Collection
UserNum.insert_one({'num_of_users':0})
docker-compose.yml
version: '3'
services:
web:
build: ./Web
ports:
- "5000:5000"
links:
- db #Web is dependent on db
db:
image: mongo:latest
hostname: test_mongodb
environment:
- MONGO_INITDB_ROOT_USERNAME=admin
- MONGO_INITDB_ROOT_PASSWORD=password
ports:
- 27018:27018
Error during docker-compose up:
web_1 | Traceback (most recent call last):
web_1 | File "app.py", line 21, in <module>
web_1 | UserNum.insert_one({'num_of_users':0})
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/collection.py", line 628, in insert_one
web_1 | comment=comment,
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/collection.py", line 562, in _insert_one
web_1 | self.__database.client._retryable_write(acknowledged, _insert_command, session)
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/mongo_client.py", line 1447, in _retryable_write
web_1 | with self._tmp_session(session) as s:
web_1 | File "/usr/local/lib/python3.7/contextlib.py", line 112, in __enter__
web_1 | return next(self.gen)
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/mongo_client.py", line 1729, in _tmp_session
web_1 | s = self._ensure_session(session)
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/mongo_client.py", line 1712, in _ensure_session
web_1 | return self.__start_session(True, causal_consistency=False)
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/mongo_client.py", line 1657, in __start_session
web_1 | self._topology._check_implicit_session_support()
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/topology.py", line 538, in _check_implicit_session_support
web_1 | self._check_session_support()
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/topology.py", line 555, in _check_session_support
web_1 | readable_server_selector, self.get_server_selection_timeout(), None
web_1 | File "/usr/local/lib/python3.7/site-packages/pymongo/topology.py", line 240, in _select_servers_loop
web_1 | % (self._error_message(selector), timeout, self.description)
web_1 | pymongo.errors.ServerSelectionTimeoutError: test_mongodb:27018: timed out, Timeout: 30s, Topology Description: <TopologyDescription id: 62fa0685c58c2b61f79ea52e, topology_type: Unknown, servers: [<ServerDescription ('test_mongodb', 27018) server_type: Unknown, rtt: None, error=NetworkTimeout('test_mongodb:27018: timed out')>]>
flask_project_web_1 exited with code 1
In docker-compose config file, ports means expose specified ports from container network to host network, it does not do anything to tell the mongodb container to serve at port 27018, therefore, the mongodb will still open the port at 27017 even you specified the port option, therefore, you should tell the mongodb container by using the command option.
Add this line command: mongod --port 27018 into the db service, then it should be working.
like:
db:
image: mongo:latest
hostname: test_mongodb
command: mongod --port 27018
environment:
- MONGO_INITDB_ROOT_USERNAME=admin
- MONGO_INITDB_ROOT_PASSWORD=password
ports:
- 27018:27018
I'm trying to connect python with MYSQL, both are in different dockers. I can access the MYSQL from my ubuntu terminal but when I try to access with the url I used in python It doesn't work.
Docker-compose
version: "3.9" # optional since v1.27.0
services:
mysql:
image: 'mysql:latest'
restart: always
volumes:
- './my-vol/mysql_data:/var/lib/mysql'
ports:
- '3306:3306'
web:
build: .
ports:
- "5000:5000"
volumes:
- .:/my-vol
Python file
from flask import Flask
app = Flask(__name__)
import sqlalchemy as db
import mysql.connector
from mysql.connector import Error
#app.route('/db')
def python():
connection = mysql.connector.connect(host="mysql", user="root", password="root", database="test")
cursor = connection.cursor()
with connection.cursor() as cursor:
cursor.execute("Select * from test_table")
for(userId , firstName , lastName ) in cursor:
return print("{}, {}, {}".format(userId, firstName, lastName))
Finally, this is the completed error that appears when I try to access /db url.
[2021-09-13 08:34:19,119] ERROR in app: Exception on /db [GET]
web_1 | Traceback (most recent call last):
web_1 | File "/usr/local/lib/python3.8/site-packages/flask/app.py", line 2070, in wsgi_app
web_1 | response = self.full_dispatch_request()
web_1 | File "/usr/local/lib/python3.8/site-packages/flask/app.py", line 1516, in full_dispatch_request
web_1 | return self.finalize_request(rv)
web_1 | File "/usr/local/lib/python3.8/site-packages/flask/app.py", line 1535, in finalize_request
web_1 | response = self.make_response(rv)
web_1 | File "/usr/local/lib/python3.8/site-packages/flask/app.py", line 1698, in make_response
web_1 | raise TypeError(
web_1 | TypeError: The view function for 'python' did not return a valid response. The function either returned None or ended without a return statement.
web_1 | 172.24.0.1 - - [13/Sep/2021 08:34:19] "GET /db HTTP/1.1" 500 -
Your view should return a response object. Instead your view returns result of the print function that is always None.
Also, your for loop will be exited on the first iteration due to the return statement. It looks like it is not what you wanted to achieve.
Your code successfully connects to mysql, otherwise you'd get a different exception.
I'm trying to run a DAG that calls a docker container and executes a command inside it, but Airflow cannot execute the task. Follows the error launched:
*** Reading local file: /opt/airflow/logs/docker_operator_dag/docker_command_hello/2021-05-26T02:40:13.171571+00:00/2.log
[2021-05-26 02:45:26,001] {taskinstance.py:877} INFO - Dependencies all met for <TaskInstance: docker_operator_dag.docker_command_hello 2021-05-26T02:40:13.171571+00:00 [queued]>
[2021-05-26 02:45:26,030] {taskinstance.py:877} INFO - Dependencies all met for <TaskInstance: docker_operator_dag.docker_command_hello 2021-05-26T02:40:13.171571+00:00 [queued]>
[2021-05-26 02:45:26,031] {taskinstance.py:1068} INFO -
--------------------------------------------------------------------------------
[2021-05-26 02:45:26,031] {taskinstance.py:1069} INFO - Starting attempt 2 of 2
[2021-05-26 02:45:26,032] {taskinstance.py:1070} INFO -
--------------------------------------------------------------------------------
[2021-05-26 02:45:26,060] {taskinstance.py:1089} INFO - Executing <Task(DockerOperator): docker_command_hello> on 2021-05-26T02:40:13.171571+00:00
[2021-05-26 02:45:26,080] {standard_task_runner.py:52} INFO - Started process 67 to run task
[2021-05-26 02:45:26,083] {standard_task_runner.py:76} INFO - Running: ['airflow', 'tasks', 'run', 'docker_operator_dag', 'docker_command_hello', '2021-05-26T02:40:13.171571+00:00', '--job-id', '11', '--pool', 'default_pool', '--raw', '--subdir', 'DAGS_FOLDER/docker_job/docker-job.py', '--cfg-path', '/tmp/tmp3vl5dv7x', '--error-file', '/tmp/tmptazwx_tc']
[2021-05-26 02:45:26,084] {standard_task_runner.py:77} INFO - Job 11: Subtask docker_command_hello
[2021-05-26 02:45:26,181] {logging_mixin.py:104} INFO - Running <TaskInstance: docker_operator_dag.docker_command_hello 2021-05-26T02:40:13.171571+00:00 [running]> on host f1e5cfe4a07f
[2021-05-26 02:45:26,219] {taskinstance.py:1283} INFO - Exporting the following env vars:
AIRFLOW_CTX_DAG_OWNER=airflow
AIRFLOW_CTX_DAG_ID=docker_operator_dag
AIRFLOW_CTX_TASK_ID=docker_command_hello
AIRFLOW_CTX_EXECUTION_DATE=2021-05-26T02:40:13.171571+00:00
AIRFLOW_CTX_DAG_RUN_ID=manual__2021-05-26T02:40:13.171571+00:00
[2021-05-26 02:45:26,227] {taskinstance.py:1482} ERROR - Task failed with exception
Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 677, in urlopen
chunked=chunked,
File "/home/airflow/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 392, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/local/lib/python3.6/http/client.py", line 1287, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/local/lib/python3.6/http/client.py", line 1333, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/local/lib/python3.6/http/client.py", line 1282, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/local/lib/python3.6/http/client.py", line 1042, in _send_output
self.send(msg)
File "/usr/local/lib/python3.6/http/client.py", line 980, in send
self.connect()
File "/home/airflow/.local/lib/python3.6/site-packages/docker/transport/unixconn.py", line 43, in connect
sock.connect(self.unix_socket)
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.6/site-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/home/airflow/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 727, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "/home/airflow/.local/lib/python3.6/site-packages/urllib3/util/retry.py", line 410, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/home/airflow/.local/lib/python3.6/site-packages/urllib3/packages/six.py", line 734, in reraise
raise value.with_traceback(tb)
File "/home/airflow/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 677, in urlopen
chunked=chunked,
File "/home/airflow/.local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 392, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/local/lib/python3.6/http/client.py", line 1287, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/local/lib/python3.6/http/client.py", line 1333, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/local/lib/python3.6/http/client.py", line 1282, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/local/lib/python3.6/http/client.py", line 1042, in _send_output
self.send(msg)
File "/usr/local/lib/python3.6/http/client.py", line 980, in send
self.connect()
File "/home/airflow/.local/lib/python3.6/site-packages/docker/transport/unixconn.py", line 43, in connect
sock.connect(self.unix_socket)
urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionRefusedError(111, 'Connection refused'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.6/site-packages/docker/api/client.py", line 207, in _retrieve_server_version
return self.version(api_version=False)["ApiVersion"]
File "/home/airflow/.local/lib/python3.6/site-packages/docker/api/daemon.py", line 181, in version
return self._result(self._get(url), json=True)
File "/home/airflow/.local/lib/python3.6/site-packages/docker/utils/decorators.py", line 46, in inner
return f(self, *args, **kwargs)
File "/home/airflow/.local/lib/python3.6/site-packages/docker/api/client.py", line 230, in _get
return self.get(url, **self._set_request_timeout(kwargs))
File "/home/airflow/.local/lib/python3.6/site-packages/requests/sessions.py", line 555, in get
return self.request('GET', url, **kwargs)
File "/home/airflow/.local/lib/python3.6/site-packages/requests/sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "/home/airflow/.local/lib/python3.6/site-packages/requests/sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "/home/airflow/.local/lib/python3.6/site-packages/requests/adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionRefusedError(111, 'Connection refused'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.6/site-packages/airflow/models/taskinstance.py", line 1138, in _run_raw_task
self._prepare_and_execute_task_with_callbacks(context, task)
File "/home/airflow/.local/lib/python3.6/site-packages/airflow/models/taskinstance.py", line 1311, in _prepare_and_execute_task_with_callbacks
result = self._execute_task(context, task_copy)
File "/home/airflow/.local/lib/python3.6/site-packages/airflow/models/taskinstance.py", line 1341, in _execute_task
result = task_copy.execute(context=context)
File "/home/airflow/.local/lib/python3.6/site-packages/airflow/providers/docker/operators/docker.py", line 287, in execute
self.cli = self._get_cli()
File "/home/airflow/.local/lib/python3.6/site-packages/airflow/providers/docker/operators/docker.py", line 319, in _get_cli
return APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config)
File "/home/airflow/.local/lib/python3.6/site-packages/docker/api/client.py", line 190, in __init__
self._version = self._retrieve_server_version()
File "/home/airflow/.local/lib/python3.6/site-packages/docker/api/client.py", line 215, in _retrieve_server_version
'Error while fetching server API version: {0}'.format(e)
docker.errors.DockerException: Error while fetching server API version: ('Connection aborted.', ConnectionRefusedError(111, 'Connection refused'))
[2021-05-26 02:45:26,230] {taskinstance.py:1532} INFO - Marking task as FAILED. dag_id=docker_operator_dag, task_id=docker_command_hello, execution_date=20210526T024013, start_date=20210526T024526, end_date=20210526T024526
[2021-05-26 02:45:26,261] {local_task_job.py:146} INFO - Task exited with return code 1
I'm using the Airflow docker-compose found here https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html and trying to run the following DAG:
from datetime import datetime, timedelta
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.docker_operator import DockerOperator
from airflow.operators.python_operator import BranchPythonOperator
from airflow.operators.dummy_operator import DummyOperator
default_args = {
'owner' : 'airflow',
'description' : 'Use of the DockerOperator',
'depend_on_past' : False,
'start_date' : datetime(2021, 5, 1),
'email_on_failure' : False,
'email_on_retry' : False,
'retries' : 1,
'retry_delay' : timedelta(minutes=5)
}
with DAG('docker_operator_dag', default_args=default_args, schedule_interval="5 * * * *", catchup=False) as dag:
start_dag = DummyOperator(
task_id='start_dag'
)
end_dag = DummyOperator(
task_id='end_dag'
)
t1 = BashOperator(
task_id='print_current_date',
bash_command='date'
)
t2 = DockerOperator(
task_id='docker_command_sleep',
image='docker_image_task',
container_name='task___command_sleep',
api_version='auto',
auto_remove=True,
command="/bin/sleep 30",
docker_url="unix://var/run/docker.sock",
network_mode="bridge"
)
t3 = DockerOperator(
task_id='docker_command_hello',
image='docker_image_task',
container_name='task___command_hello',
api_version='auto',
auto_remove=True,
command="/bin/sleep 40",
docker_url="unix://var/run/docker.sock",
network_mode="bridge"
)
t4 = BashOperator(
task_id='print_hello',
bash_command='echo "hello world"'
)
start_dag >> t1
t1 >> t2 >> t4
t1 >> t3 >> t4
t4 >> end_dag
Also, I'm using Windows 10, I've tried adding the following in volumes: - "/var/run/docker.sock:/var/run/docker.sock" and I've succeeded in Ubuntu, but in Windows doesn't.
As requested, follow the docker-compose file:
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
#
# WARNING: This configuration is for local development. Do not use it in a production deployment.
#
# This configuration supports basic configuration using environment variables or an .env file
# The following variables are supported:
#
# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow.
# Default: apache/airflow:master-python3.8
# AIRFLOW_UID - User ID in Airflow containers
# Default: 50000
# AIRFLOW_GID - Group ID in Airflow containers
# Default: 50000
# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account.
# Default: airflow
# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account.
# Default: airflow
#
# Feel free to modify this file to suit your needs.
---
version: '3'
x-airflow-common:
&airflow-common
image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.0.2}
environment:
&airflow-common-env
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow#postgres/airflow
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow#postgres/airflow
AIRFLOW__CELERY__BROKER_URL: redis://:#redis:6379/0
AIRFLOW__CORE__FERNET_KEY: ''
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: 5 # Just to have a fast load in the front-end. Do not use it in production with those configurations.
AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
AIRFLOW__CORE__ENABLE_XCOM_PICKLING: 'true' # "_run_image of the DockerOperator returns now a python string, not a byte string" Ref: https://github.com/apache/airflow/issues/13487
volumes:
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
- ./plugins:/opt/airflow/plugins
- "/var/run/docker.sock:/var/run/docker.sock" # We will pass the Docker Deamon as a volume to allow the webserver containers start docker images. Ref: https://stackoverflow.com/q/51342810/7024760
user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}"
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
services:
postgres:
image: postgres:13
environment:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: airflow
POSTGRES_DB: airflow
volumes:
- postgres-db-volume:/var/lib/postgresql/data
healthcheck:
test: ["CMD", "pg_isready", "-U", "airflow"]
interval: 5s
retries: 5
restart: always
redis:
image: redis:latest
ports:
- 6379:6379
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 30s
retries: 50
restart: always
airflow-webserver:
<<: *airflow-common
command: webserver
ports:
- 8080:8080
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
interval: 10s
timeout: 10s
retries: 5
restart: always
airflow-scheduler:
<<: *airflow-common
command: scheduler
restart: always
airflow-worker:
<<: *airflow-common
command: celery worker
restart: always
airflow-init:
<<: *airflow-common
command: version
environment:
<<: *airflow-common-env
_AIRFLOW_DB_UPGRADE: 'true'
_AIRFLOW_WWW_USER_CREATE: 'true'
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
flower:
<<: *airflow-common
command: celery flower
ports:
- 5555:5555
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
interval: 10s
timeout: 10s
retries: 5
restart: always
volumes:
postgres-db-volume:
Here is my docker-compose.yml used to create the database container.
version: '3.7'
services:
application:
build:
context: ./app
dockerfile: dockerfile #dockerfile-prod
depends_on:
- database_mongo
- database_neo4j
- etl_pipeline
environment:
- flask_env=dev #flask_env=prod
volumes:
- ./app:/app
ports:
- "8080:8080" #- 8080:8080
database_mongo:
image: "mongo:4.2"
expose:
- 27017
volumes:
- ./data/database/mongo:/data/db
database_neo4j:
image: neo4j:latest
expose:
- 27018
volumes:
- ./data/database/neo4j:/data
ports:
- "7474:7474" # web client
- "7687:7687" # DB default port
environment:
- NEO4J_AUTH=none
etl_pipeline:
depends_on:
- database_mongo
- database_neo4j
build:
context: ./data/etl
dockerfile: dockerfile #dockerfile-prod
volumes:
- ./data/:/data/
- ./data/etl:/app/
I'm trying to connect to my neo4j database with python driver. I have already been able to connect to mongoDb with this line:
mongo_client = MongoClient(host="database_mongo")
I'm trying to do something similar to the mongoDb to connect to my neo4j with the GraphDatabase in neo4j like this:
url = "{scheme}://{host_name}:{port}".format(scheme = "bolt", host_name="database_neo4j", port = 7687)
baseNeo4j = GraphDatabase.driver(url, encrypted=False)
or with py2neo like this
neo_client = Graph(host="database_neo4j")
However, nothing of this has worked yet and so I'm not sure if I'm using the right syntax in order to use neo4j with docker. I've tried many things and looked around, but couldn't find the answer...
The whole error message is:
etl_pipeline_1 | MongoClient(host=['database_mongo:27017'], document_class=dict, tz_aware=False, connect=True)
etl_pipeline_1 | Traceback (most recent call last):
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 929, in _connect
etl_pipeline_1 | s.connect(resolved_address)
etl_pipeline_1 | ConnectionRefusedError: [Errno 111] Connection refused
etl_pipeline_1 |
etl_pipeline_1 | During handling of the above exception, another exception occurred:
etl_pipeline_1 |
etl_pipeline_1 | Traceback (most recent call last):
etl_pipeline_1 | File "main.py", line 26, in <module>
etl_pipeline_1 | baseNeo4j = GraphDatabase.driver(url, encrypted=False)
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/__init__.py", line 183, in driver
etl_pipeline_1 | return cls.bolt_driver(parsed.netloc, auth=auth, **config)
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/__init__.py", line 196, in bolt_driver
etl_pipeline_1 | return BoltDriver.open(target, auth=auth, **config)
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/__init__.py", line 359, in open
etl_pipeline_1 | pool = BoltPool.open(address, auth=auth, pool_config=pool_config, workspace_config=default_workspace_config)
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 531, in open
etl_pipeline_1 | seeds = [pool.acquire() for _ in range(pool_config.init_size)]
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 531, in <listcomp>
etl_pipeline_1 | seeds = [pool.acquire() for _ in range(pool_config.init_size)]
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 545, in acquire
etl_pipeline_1 | return self._acquire(self.address, timeout)
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 409, in _acquire
etl_pipeline_1 | connection = self.opener(address, timeout)
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 528, in opener
etl_pipeline_1 | return Bolt.open(addr, auth=auth, timeout=timeout, routing_context=routing_context, **pool_config)
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 198, in open
etl_pipeline_1 | keep_alive=pool_config.keep_alive,
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 1049, in connect
etl_pipeline_1 | raise last_error
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 1039, in connect
etl_pipeline_1 | s = _connect(resolved_address, timeout, keep_alive)
etl_pipeline_1 | File "/usr/local/lib/python3.7/site-packages/neo4j/io/__init__.py", line 943, in _connect
etl_pipeline_1 | raise ServiceUnavailable("Failed to establish connection to {!r} (reason {})".format(resolved_address, error))
etl_pipeline_1 | neo4j.exceptions.ServiceUnavailable: Failed to establish connection to IPv4Address(('172.29.0.2', 7687)) (reason [Errno 111] Connection refused)
Ok so it may not be the best answer, but for anyone else who would have this problem, I was able to solve it by adding a sleep(30) at the begining of main
You can try to create network and use it across your services. Something like this:
networks:
neo4j_network:
driver: bridge
services:
neo4j:
image: neo4j:latest
expose:
- 27018
volumes:
- ./data/database/neo4j:/data
ports:
- "7474:7474" # web client
- "7687:7687" # DB default port
environment:
- NEO4J_AUTH=none
networks:
- neo4j_network
application:
build:
context: ./app
dockerfile: dockerfile #dockerfile-prod
depends_on:
- database_mongo
- database_neo4j
- etl_pipeline
environment:
- flask_env=dev #flask_env=prod
volumes:
- ./app:/app
ports:
- "8080:8080"
networks:
- neo4j_network
Then, for your neo4j driver url (in your code), make sure to use bolt://host.docker.internal:7687
Im currently putting my django project in production with docker and celery.
for this i followed a blog post online.
now i stumbled over an error which i can't seem to fix.
when running the container with sqlite everything works just fine,
but with postgresql i get this error:
celery-beat_1 | [2020-03-20 14:32:00,052: INFO/MainProcess] Scheduler: Sending due task check_routers_online (Router.tasks.check_routers_online)
celery_1 | [2020-03-20 14:32:00,066: INFO/MainProcess] Received task: Router.tasks.check_routers_online[d4cece14-1d20-43ae-8712-1ad48ce79208]
celery_1 | [2020-03-20 14:32:00,085: ERROR/ForkPoolWorker-1] Task Router.tasks.check_routers_online[d4cece14-1d20-43ae-8712-1ad48ce79208] raised unexpected: OperationalError('no such table: Router_router')
celery_1 | Traceback (most recent call last):
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/backends/utils.py", line 86, in _execute
celery_1 | return self.cursor.execute(sql, params)
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/backends/sqlite3/base.py", line 396, in execute
celery_1 | return Database.Cursor.execute(self, query, params)
celery_1 | sqlite3.OperationalError: no such table: Router_router
celery_1 |
celery_1 | The above exception was the direct cause of the following exception:
celery_1 |
celery_1 | Traceback (most recent call last):
celery_1 | File "/usr/local/lib/python3.7/site-packages/celery/app/trace.py", line 385, in trace_task
celery_1 | R = retval = fun(*args, **kwargs)
celery_1 | File "/usr/local/lib/python3.7/site-packages/celery/app/trace.py", line 650, in __protected_call__
celery_1 | return self.run(*args, **kwargs)
celery_1 | File "/home/app/web/Router/tasks.py", line 14, in check_routers_online
celery_1 | for router in routers:
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/models/query.py", line 276, in __iter__
celery_1 | self._fetch_all()
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/models/query.py", line 1261, in _fetch_all
celery_1 | self._result_cache = list(self._iterable_class(self))
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/models/query.py", line 57, in __iter__
celery_1 | results = compiler.execute_sql(chunked_fetch=self.chunked_fetch, chunk_size=self.chunk_size)
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/models/sql/compiler.py", line 1151, in execute_sql
celery_1 | cursor.execute(sql, params)
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/backends/utils.py", line 100, in execute
celery_1 | return super().execute(sql, params)
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/backends/utils.py", line 68, in execute
celery_1 | return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
celery_1 | return executor(sql, params, many, context)
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/backends/utils.py", line 86, in _execute
celery_1 | return self.cursor.execute(sql, params)
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/utils.py", line 90, in __exit__
celery_1 | raise dj_exc_value.with_traceback(traceback) from exc_value
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/backends/utils.py", line 86, in _execute
celery_1 | return self.cursor.execute(sql, params)
celery_1 | File "/usr/local/lib/python3.7/site-packages/django/db/backends/sqlite3/base.py", line 396, in execute
celery_1 | return Database.Cursor.execute(self, query, params)
celery_1 | django.db.utils.OperationalError: no such table: Router_router
this is my docker compose file:
version: '3.3'
services:
db:
image: postgres:12.0-alpine
volumes:
- postgres_data:/var/lib/postgresql/data/
environment:
- POSTGRES_USER=***
- POSTGRES_PASSWORD=*****
- POSTGRES_DB=***
redis:
image: "redis:alpine"
web:
container_name: inventurliste_app
build: .
command: gunicorn Inventur.wsgi:application --bind 0.0.0.0:8000
volumes:
- static_volume:/home/app/web/staticfiles
expose:
- 8000
env_file:
- .env
nginx:
container_name: inventurliste_nginx
build: ./nginx
volumes:
- static_volume:/home/app/web/staticfiles
ports:
- 1337:80
depends_on:
- web
- db
celery:
build: .
command: celery -A Inventur worker -l info
depends_on:
- redis
- db
celery-beat:
build: .
command: celery -A Inventur beat -l info
depends_on:
- redis
- db
volumes:
static_volume:
postgres_data:
celery.py
import os
from celery import Celery
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'Inventur.settings')
app = Celery('Inventur')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks()
the celery part in my settings.py
CELERY_BROKER_URL = 'redis://redis:6379'
CELERY_RESULT_BACKEND = 'redis://redis:6379'
CELERY_ACCEPT_CONTENT = ['application/json']
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_BEAT_SCHEDULE = {
'check_routers_online': {
'task': 'Router.tasks.check_routers_online',
'schedule': crontab() # execute every minute
},
'refresh_all_ports': {
'task': 'Switch.tasks.refresh_all_ports',
'schedule': crontab(minute="0", hour="*/1") # execute every hour
}
}
Database part in settings.py
DATABASES = {
"default": {
"ENGINE": os.environ.get("SQL_ENGINE", "django.db.backends.sqlite3"),
"NAME": os.environ.get("SQL_DATABASE", os.path.join(BASE_DIR, "db.sqlite3")),
"USER": os.environ.get("SQL_USER", "user"),
"PASSWORD": os.environ.get("SQL_PASSWORD", "password"),
"HOST": os.environ.get("SQL_HOST", "localhost"),
"PORT": os.environ.get("SQL_PORT", "5432"),
}, }
.env
DEBUG=0
SECRET_KEY=******
DJANGO_ALLOWED_HOSTS=***
SQL_ENGINE=django.db.backends.postgresql
SQL_DATABASE=**
SQL_USER=***
SQL_PASSWORD=***
SQL_HOST=db
SQL_PORT=5432
makemigrations tells me that there are no changes
and migrate dosn't help either.
i can't let this run in sqlite so i hope somone can help me with that.
EDIT: added Database part of settings and .env file
oh and Django itself works just fine. there's no database problem with postgres
just celery seems to have a problem
Well seems like i fixed it.
i had to add the .env file to the celery service too.
updated docker-compose:
version: '3.3'
services:
db:
image: postgres:12.0-alpine
volumes:
- postgres_data:/var/lib/postgresql/data/
environment:
- POSTGRES_USER=***
- POSTGRES_PASSWORD=*****
- POSTGRES_DB=***
redis:
image: "redis:alpine"
web:
container_name: inventurliste_app
build: .
command: gunicorn Inventur.wsgi:application --bind 0.0.0.0:8000
volumes:
- static_volume:/home/app/web/staticfiles
expose:
- 8000
env_file:
- .env
nginx:
container_name: inventurliste_nginx
build: ./nginx
volumes:
- static_volume:/home/app/web/staticfiles
ports:
- 1337:80
depends_on:
- web
- db
celery:
build: .
command: celery -A Inventur worker -l info
depends_on:
- redis
- db
env_file: # here i added the .env
- .env
celery-beat:
build: .
command: celery -A Inventur beat -l info
depends_on:
- redis
- db
env_file: # and here i had to add it too
- .env
volumes:
static_volume:
postgres_data: