我使用 docker-compose 的模板在一台阿里云ECS (8核64GB) 上搭建了一个伪分布式集群
当我想用 python 上传文件到 HDFS 上时发生了错误
代码为
from hdfs import *
client = Client(url="http://" + hdfs_host + ":" + hdfs_port)
client.upload(hdfs_path=hdfs_path, local_path=item, cleanup=True, overwrite=True)
报错为
datanode2
50075
ERROR:root:datanode2
ERROR:root:50075
ERROR:hdfs.client:Error while uploading. Attempting cleanup.
Traceback (most recent call last):
File "C:\Users\Yingchao Ha\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\connection.py", line 176, in _new_conn
conn = connection.create_connection(
File "C:\Users\Yingchao Ha\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\util\connection.py", line 96, in create_connection
raise err
File "C:\Users\Yingchao Ha\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\util\connection.py", line 86, in create_connection
sock.connect(sa)
TimeoutError: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。
问题分析:
根据在网络上查询的解决方案,大约是可以访问 namenode,但不能访问 datanode,我想尝试一条解决方案就是修改 hdfs-site.xml
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>true</value>
</property>
但是我是通过 docker-compose 搭建的,所以想知道如何修改 hdfs-site.xml
hadoop.env 如下
CORE_CONF_fs_defaultFS=hdfs://namenode:9000
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hive_hosts=*
CORE_CONF_hadoop_proxyuser_hive_groups=*
CORE_CONF_hadoop_proxyuser_hbase_hosts=*
CORE_CONF_hadoop_proxyuser_hbase_groups=*
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_mapreduce_map_output_compress=true
YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec
YARN_CONF_yarn_nodemanager_resource_memory___mb=16384
YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8
YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle
MAPRED_CONF_mapreduce_framework_name=yarn
MAPRED_CONF_mapred_child_java_opts=-Xmx4096m
MAPRED_CONF_mapreduce_map_memory_mb=4096
MAPRED_CONF_mapreduce_reduce_memory_mb=8192
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m
MAPRED_CONF_yarn_app_mapreduce_am_env=HADOOP_MAPRED_HOME=/opt/hadoop-2.7.4/
MAPRED_CONF_mapreduce_map_env=HADOOP_MAPRED_HOME=/opt/hadoop-2.7.4/
MAPRED_CONF_mapreduce_reduce_env=HADOOP_MAPRED_HOME=/opt/hadoop-2.7.4/
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql:5432/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
HIVE_SITE_CONF_hbase_zookeeper_quorum=zoo1:2181,zoo2:2181,zoo3:2181
同时,docker-compose.yml 部分相关代码如下
services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
container_name: namenode
hostname: namenode
volumes:
- ./data/hadoop/namenode:/hadoop/dfs/name
environment:
CLUSTER_NAME: "hadoop"
env_file:
- ./hadoop.env
- ./datanode1.env
- ./datanode2.env
- ./datanode3.env
ports:
- 50070:50070
- 9000:9000
- 8020:8020
networks:
default:
ipv4_address: 172.23.0.31
resourcemanager:
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8
container_name: resourcemanager
hostname: resourcemanager
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
env_file:
- ./hadoop.env
- ./datanode1.env
- ./datanode2.env
- ./datanode3.env
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode1:50075 datanode2:50075 datanode3:50075"
ports:
- 8088:8088
- 8030:8030
- 8031:8031
- 8032:8032
- 8033:8033
networks:
default:
ipv4_address: 172.23.0.44
historyserver:
image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8
container_name: historyserver
hostname: historyserver
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
env_file:
- ./hadoop.env
- ./datanode1.env
- ./datanode2.env
- ./datanode3.env
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode1:50075 datanode2:50075 datanode3:50075 resourcemanager:8088"
ports:
- 8188:8188
networks:
default:
ipv4_address: 172.23.0.33
nodemanager1:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
container_name: nodemanager1
hostname: nodemanager1
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
env_file:
- ./hadoop.env
- ./datanode1.env
- ./datanode2.env
- ./datanode3.env
ports:
- 8142:8042
- 8140:8040
- 41655:46655
environment:
SERVICE_PRECONDITIO: "namenode:50070 datanode1:50075 datanode2:50075 datanode3:50075 resourcemanager:8088"
networks:
default:
ipv4_address: 172.23.0.41
nodemanager2:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
container_name: nodemanager2
hostname: nodemanager2
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
env_file:
- ./hadoop.env
- ./datanode1.env
- ./datanode2.env
- ./datanode3.env
ports:
- 8242:8042
- 8240:8040
- 42655:46655
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode1:50075 datanode2:50075 datanode3:50075 resourcemanager:8088"
networks:
default:
ipv4_address: 172.23.0.42
nodemanager3:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
container_name: nodemanager3
hostname: nodemanager3
depends_on:
- namenode
- datanode1
- datanode2
- datanode3
env_file:
- ./hadoop.env
- ./datanode1.env
- ./datanode2.env
- ./datanode3.env
ports:
- 8342:8042
- 8340:8040
- 43655:46655
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode1:50075 datanode2:50075 datanode3:50075 resourcemanager:8088"
networks:
default:
ipv4_address: 172.23.0.43
datanode1:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
container_name: datanode1
hostname: datanode1
depends_on:
- namenode
env_file:
- ./hadoop.env
- ./datanode1.env
volumes:
- ./data/hadoop/datanode1:/hadoop/dfs/data
ports:
- 50175:50075
- 50110:50110
environment:
SERVICE_PRECONDITION: "namenode:50070"
networks:
default:
ipv4_address: 172.23.0.35
datanode2:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
container_name: datanode2
hostname: datanode2
depends_on:
- namenode
env_file:
- ./hadoop.env
- ./datanode2.env
volumes:
- ./data/hadoop/datanode2:/hadoop/dfs/data
ports:
- 50275:50075
- 50210:50210
environment:
SERVICE_PRECONDITION: "namenode:50070"
networks:
default:
ipv4_address: 172.23.0.36
datanode3:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
container_name: datanode3
hostname: datanode3
depends_on:
- namenode
env_file:
- ./hadoop.env
- ./datanode3.env
volumes:
- ./data/hadoop/datanode3:/hadoop/dfs/data
ports:
- 50375:50075
- 50310:50310
environment:
SERVICE_PRECONDITION: "namenode:50070"
networks:
default:
ipv4_address: 172.23.0.37
本地 hosts 文件已修改,web UI均可访问,另外,第二个问题是,我如何修改yarn参数来匹配目前 8核64GB 的配置
之前改过一些,重启后好像并没有解决,还遇到过spark无法申请到资源的问题,所以才升级了机器性能
你好,我是有问必答小助手,非常抱歉,本次您提出的有问必答问题,技术专家团超时未为您做出解答
本次提问扣除的有问必答次数,将会以问答VIP体验卡(1次有问必答机会、商城购买实体图书享受95折优惠)的形式为您补发到账户。
因为有问必答VIP体验卡有效期仅有1天,您在需要使用的时候【私信】联系我,我会为您补发。