I created a single HDFS node in VM ( hadoop.master, IP:) 192.168.12.52. The file etc/hadoop/core-site.xmlhas the following configuration for namenode:
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master.hadoop:9000/</value>
</property>
</configuration>
I want to read a file with HDFS on my local physical desktop. For this, this is my code, which I saved in a file with the name hdfs_read.py:
from hdfs import InsecureClient
client = InsecureClient('http://192.168.12.52:9000')
with client.read('/opt/hadoop/LICENSE.txt') as reader:
features = reader.read()
print(features)
Now when I run it, I get the following timeout error:
$ python3 hdfs_read.py
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 137, in _new_conn
(self.host, self.port), self.timeout, **extra_kw)
File "/usr/lib/python3/dist-packages/urllib3/util/connection.py", line 91, in create_connection
raise err
File "/usr/lib/python3/dist-packages/urllib3/util/connection.py", line 81, in create_connection
sock.connect(sa)
OSError: [Errno 113] No route to host
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 560, in urlopen
body=body, headers=headers)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 354, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib/python3.6/http/client.py", line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1026, in _send_output
self.send(msg)
File "/usr/lib/python3.6/http/client.py", line 964, in send
self.connect()
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 162, in connect
conn = self._new_conn()
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 146, in _new_conn
self, "Failed to establish a new connection: %s" % e)
requests.packages.urllib3.exceptions.NewConnectionError: <requests.packages.urllib3.connection.HTTPConnection object at 0x7f2d88cef2b0>: Failed to establish a new connection: [Errno 113] No route to host
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 376, in send
timeout=timeout
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 610, in urlopen
_stacktrace=sys.exc_info()[2])
File "/usr/lib/python3/dist-packages/urllib3/util/retry.py", line 273, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
requests.packages.urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='192.168.12.52', port=9000): Max retries exceeded with url: /webhdfs/v1/home/edhuser/testdata.txt?user.name=embs&offset=0&op=OPEN (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x7f2d88cef2b0>: Failed to establish a new connection: [Errno 113] No route to host',))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "hdfs_read_local.py", line 3, in <module>
with client.read('/home/edhuser/testdata.txt') as reader:
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 678, in read
buffersize=buffer_size,
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 118, in api_handler
raise err
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 107, in api_handler
**self.kwargs
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 207, in _request
**kwargs
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 576, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 437, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='192.168.12.52', port=9000): Max retries exceeded with url: /webhdfs/v1/home/edhuser/testdata.txt?user.name=embs&offset=0&op=OPEN (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x7f2d88cef2b0>: Failed to establish a new connection: [Errno 113] No route to host',))
Error in sys.excepthook:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/apport_python_hook.py", line 63, in apport_excepthook
from apport.fileutils import likely_packaged, get_recent_crashes
File "/usr/lib/python3/dist-packages/apport/__init__.py", line 5, in <module>
from apport.report import Report
File "/usr/lib/python3/dist-packages/apport/report.py", line 30, in <module>
import apport.fileutils
File "/usr/lib/python3/dist-packages/apport/fileutils.py", line 23, in <module>
from apport.packaging_impl import impl as packaging
File "/usr/lib/python3/dist-packages/apport/packaging_impl.py", line 23, in <module>
import apt
File "/usr/lib/python3/dist-packages/apt/__init__.py", line 23, in <module>
import apt_pkg
ModuleNotFoundError: No module named 'apt_pkg'
Original exception was:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 137, in _new_conn
(self.host, self.port), self.timeout, **extra_kw)
File "/usr/lib/python3/dist-packages/urllib3/util/connection.py", line 91, in create_connection
raise err
File "/usr/lib/python3/dist-packages/urllib3/util/connection.py", line 81, in create_connection
sock.connect(sa)
OSError: [Errno 113] No route to host
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 560, in urlopen
body=body, headers=headers)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 354, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib/python3.6/http/client.py", line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1026, in _send_output
self.send(msg)
File "/usr/lib/python3.6/http/client.py", line 964, in send
self.connect()
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 162, in connect
conn = self._new_conn()
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 146, in _new_conn
self, "Failed to establish a new connection: %s" % e)
requests.packages.urllib3.exceptions.NewConnectionError: <requests.packages.urllib3.connection.HTTPConnection object at 0x7f2d88cef2b0>: Failed to establish a new connection: [Errno 113] No route to host
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 376, in send
timeout=timeout
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 610, in urlopen
_stacktrace=sys.exc_info()[2])
File "/usr/lib/python3/dist-packages/urllib3/util/retry.py", line 273, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
requests.packages.urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='192.168.12.52', port=9000): Max retries exceeded with url: /webhdfs/v1/home/edhuser/testdata.txt?user.name=embs&offset=0&op=OPEN (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x7f2d88cef2b0>: Failed to establish a new connection: [Errno 113] No route to host',))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "hdfs_read.py", line 3, in <module>
with client.read('/home/edhuser/testdata.txt') as reader:
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 678, in read
buffersize=buffer_size,
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 118, in api_handler
raise err
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 107, in api_handler
**self.kwargs
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 207, in _request
**kwargs
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 576, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 437, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='192.168.12.52', port=9000): Max retries exceeded with url: /webhdfs/v1/home/edhuser/testdata.txt?user.name=embs&offset=0&op=OPEN (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x7f2d88cef2b0>: Failed to establish a new connection: [Errno 113] No route to host',))
? ? , , , core-site.xml, , 9000 . 50070, 8020, 8048, hadoop , . client = InsecureClient('http://192.168.12.52:9000'), client = InsecureClient('hdfs://192.168.12.52:9000') , , client = InsecureClient('file:///192.168.12.52:9000'), - ? .
HDFS , :

, , , (/opt/hadoop/README.txt). , , , hadoop, /opt/hadoop:
$ ls /opt/hadoop/
bin lib read_from_hdfs.py write_to_hdfs_2.py
connect_to_hdfs.py libexec README.txt write_to_hdfs3.py
etc LICENSE.txt sbin write_to_hdfs.py
hdfs_read_write.py logs share
include NOTICE.txt test_storage
, HDFS , , , HDFS , hdfs dfs -get /test_storage/ ./ , . namenode, :
$ls /opt/volume/namenode/current/
edits_0000000000000000001-0000000000000000002
edits_0000000000000000003-0000000000000000010
edits_0000000000000000011-0000000000000000012
edits_0000000000000000013-0000000000000000015
edits_0000000000000000016-0000000000000000023
edits_0000000000000000024-0000000000000000025
edits_0000000000000000026-0000000000000000032
edits_0000000000000000033-0000000000000000033
edits_0000000000000000034-0000000000000000035
edits_0000000000000000036-0000000000000000037
edits_0000000000000000038-0000000000000000039
edits_0000000000000000040-0000000000000000041
edits_0000000000000000042-0000000000000000043
edits_0000000000000000044-0000000000000000045
edits_0000000000000000046-0000000000000000047
edits_0000000000000000048-0000000000000000049
edits_0000000000000000050-0000000000000000051
edits_0000000000000000052-0000000000000000053
edits_0000000000000000054-0000000000000000055
edits_0000000000000000056-0000000000000000057
edits_0000000000000000058-0000000000000000059
edits_0000000000000000060-0000000000000000061
edits_0000000000000000062-0000000000000000063
edits_0000000000000000064-0000000000000000065
edits_0000000000000000066-0000000000000000067
edits_0000000000000000068-0000000000000000070
edits_0000000000000000071-0000000000000000072
edits_0000000000000000073-0000000000000000074
edits_0000000000000000075-0000000000000000076
edits_0000000000000000077-0000000000000000078
edits_inprogress_0000000000000000079
fsimage_0000000000000000076
fsimage_0000000000000000076.md5
fsimage_0000000000000000078
fsimage_0000000000000000078.md5
seen_txid
VERSION
, , ?
EDIT: 50070 (.. client = InsecureClient('http://192.168.12.52:50070')) :
$ python3 hdfs_read_local.py
Traceback (most recent call last):
File "hdfs_read.py", line 3, in <module>
with client.read('/opt/hadoop/LICENSE.txt') as reader:
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 678, in read
buffersize=buffer_size,
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 112, in api_handler
raise err
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 107, in api_handler
**self.kwargs
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 210, in _request
_on_error(response)
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 50, in _on_error
raise HdfsError(message, exception=exception)
hdfs.util.HdfsError: File /opt/hadoop/LICENSE.txt not found.
EDIT2: /opt/hadoop/LICENSE.txt
/test_storage/LICENSE.txt, , , HDFS, Iand, python script, :
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 137, in _new_conn
(self.host, self.port), self.timeout, **extra_kw)
File "/usr/lib/python3/dist-packages/urllib3/util/connection.py", line 91, in create_connection
raise err
File "/usr/lib/python3/dist-packages/urllib3/util/connection.py", line 81, in create_connection
sock.connect(sa)
OSError: [Errno 113] No route to host
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 560, in urlopen
body=body, headers=headers)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 354, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib/python3.6/http/client.py", line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib/python3.6/http/client.py", line 1026, in _send_output
self.send(msg)
File "/usr/lib/python3.6/http/client.py", line 964, in send
self.connect()
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 162, in connect
conn = self._new_conn()
File "/usr/lib/python3/dist-packages/urllib3/connection.py", line 146, in _new_conn
self, "Failed to establish a new connection: %s" % e)
requests.packages.urllib3.exceptions.NewConnectionError: <requests.packages.urllib3.connection.HTTPConnection object at 0x7f2e87867400>: Failed to establish a new connection: [Errno 113] No route to host
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 376, in send
timeout=timeout
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 610, in urlopen
_stacktrace=sys.exc_info()[2])
File "/usr/lib/python3/dist-packages/urllib3/util/retry.py", line 273, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
requests.packages.urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='pr2.embs', port=50075): Max retries exceeded with url: /webhdfs/v1/test_storage/LICENSE.txt?op=OPEN&user.name=embs&namenoderpcaddress=192.168.12.52:9000&offset=0 (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x7f2e87867400>: Failed to establish a new connection: [Errno 113] No route to host',))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "hdfs_read_local.py", line 3, in <module>
with client.read('/test_storage/LICENSE.txt') as reader:
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 678, in read
buffersize=buffer_size,
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 118, in api_handler
raise err
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 107, in api_handler
**self.kwargs
File "/home/embs/.local/lib/python3.6/site-packages/hdfs/client.py", line 207, in _request
**kwargs
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 597, in send
history = [resp for resp in gen] if allow_redirects else []
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 597, in <listcomp>
history = [resp for resp in gen] if allow_redirects else []
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 195, in resolve_redirects
**adapter_kwargs
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 576, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 437, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='pr2.embs', port=50075): Max retries exceeded with url: /webhdfs/v1/test_storage/LICENSE.txt?op=OPEN&user.name=embs&namenoderpcaddress=192.168.12.52:9000&offset=0 (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x7f2e87867400>: Failed to establish a new connection: [Errno 113] No route to host',))