Hadoop install:修订间差异

来自牛奶河Wiki
跳到导航 跳到搜索
无编辑摘要
无编辑摘要
第45行: 第45行:
====Setup====
====Setup====


======# core-site.xml (Common组件)======
=====core-site.xml (Common组件)=====
<configuration>
<configuration>
 
  <property>
 <property>
    <!-- 配置hdfs地址 -->
 
    <name>fs.defaultFS</name>
   <!-- 配置hdfs地址 -->
    <value>hdfs://g2-hdfs-01:9000</value>
 
  </property>
   <name>fs.defaultFS</name>
  <property>
 
    <name>io.file.buffer.size</name>
   <value>hdfs://g2-hdfs-01:9000</value>
    <value>131072</value>
 
  </property>
 </property>
  <property>
 
      <!-- 保存临时文件目录 -->
 <property>
    <name>hadoop.tmp.dir</name>
 
    <value>/u01/hdfs/tmp</value>
   <name>io.file.buffer.size</name>
  </property>
 
</configuration>
   <value>131072</value>
=====hdfs-site.xml (HDFS组件)=====
 
<configuration>
 </property>
  <property>
 
    <!-- 主节点地址 -->
 <property>
    <name>dfs.namenode.http-address</name>
 
    <value>g2-hdfs-01:50070</value>
   <!-- 保存临时文件目录 -->
  </property>
 
  <property>
   <name>hadoop.tmp.dir</name>
      <!-- 第二节点地址 -->
 
    <name>dfs.namenode.secondary.http-address</name>
   <value>/u01/hdfs/tmp</value>
    <value>g2-hdfs-02:50170</value>
 
  </property>
 </property>
  <property>
 
    <name>dfs.namenode.name.dir</name>
</configuration>
    <value>file:/u01/hdfs/dfs/nn</value>
 
  </property>
 
  <property>
======# hdfs-site.xml (HDFS组件)======
    <name>dfs.datanode.data.dir</name>
<configuration>
    <value>file:/u01/hdfs/dfs/dn</value>
 
  </property>
 <property>
  <property>  
 
    <name>dfs.webhdfs.enabled</name>  
   <!-- 主节点地址 -->
    <value>true</value>  
 
  </property>
   <name>dfs.namenode.http-address</name>
  <property>
 
      <!-- 配置false后,无需权限即可生成dfs上的文件 -->
   <value>g2-hdfs-01:50070</value>
    <name>dfs.permissions</name>
 
    <value>false</value>
 </property>
  </property>
 
</configuration>
 <property>
 
-- del
   <!-- 第二节点地址 -->
  <property>
 
      <!-- 备份数为默认值3 -->
   <name>dfs.namenode.secondary.http-address</name>
    <name>dfs.replication</name>
 
    <value>3</value>
   <value>g2-hdfs-02:50170</value>
  </property>
 
  <property>
 </property>
    <name>dfs.blocksize</name>
 
    <value>268435456</value>
 <property>
  </property>
 
  <property>
   <name>dfs.namenode.name.dir</name>
    <name>dfs.namenode.handler.count</name>
 
    <value>100</value>
   <value>file:/u01/hdfs/dfs/nn</value>
  </property>
 
=====mapred-site.xml=====
 </property>
<configuration>
 
  <property>
 <property>
    <name>mapreduce.framework.name</name>
 
    <value>yarn</value>  
   <name>dfs.datanode.data.dir</name>
  </property>
 
</configuration>
   <value>file:/u01/hdfs/dfs/dn</value>
 
-- del
 </property>
  <property>
 
      <name>mapreduce.jobhistory.address</name>
 <property>  
      <value>g2-hdfs-01:10020</value>
 
  </property>
   <name>dfs.webhdfs.enabled</name>  
  <property>
 
      <name>mapreduce.jobhistory.webapp.address</name>
   <value>true</value>  
      <value>g2-hdfs-01:19888</value>
 
  </property>
 </property>
  <property>
 
      <name>mapreduce.application.classpath</name>
 <property>
      <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/ hadoop/mapreduce/lib/*</value>
 
  </property>
   <!-- 配置false后,无需权限即可生成dfs上的文件 -->
=====yarn-site.xml=====
 
<configuration>
   <name>dfs.permissions</name>
  <property>
 
    <name>yarn.resourcemanager.hostname</name>
   <value>false</value>
    <value>g2-hdfs-01</value>
 
  </property>
 </property>
  <property>
 
    <name>yarn.nodemanager.aux-services</name>
</configuration>
    <value>mapreduce_shuffle</value>
 
  </property>
 
  <property>
-- del
    <name>yarn.resourcemanager.webapp.address</name>
 
    <value>g2-hdfs-01:8088</value>
 <property>
  </property>
 
  <property>
   <!-- 备份数为默认值3 -->
    <name>yarn.scheduler.maximum-allocation-mb</name>
 
    <value>32768</value>
   <name>dfs.replication</name>
  </property>
 
  <property>
   <value>3</value>
    <name>yarn.nodemanager.vmem-check-enabled</name>
 
    <value>false</value>
 </property>
  </property>
 
  <property>
 <property>
    <name>yarn.nodemanager.env-whitelist</name>
 
    <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPE ND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
   <name>dfs.blocksize</name>
  </property>
 
</configuration>
   <value>268435456</value>
  <property>
 
    <name>yarn.resourcemanager.webapp.address</name>
 </property>
    <value>hadoop01/192.168.44.5:8088</value>
 
    <description>配置外网只需要替换外网ip为真实ip,否则默认为 localhost:8088</description>
 <property>
  </property>
 
   <name>dfs.namenode.handler.count</name>
yarn.resourcemanager.hostname
 
指定yarn的ResourceManager管理界面的地址,不配的话,Active Node始终为0
   <value>100</value>
yarn.scheduler.maximum-allocation-mb
 
每个节点可用内存,单位MB,默认8182MB
 </property>
yarn.nodemanager.aux-services
 
reducer获取数据的方式
 
yarn.nodemanager.vmem-check-enabled
======# mapred-site.xml======
false = 忽略虚拟内存的检查
<configuration>
 
 <property>
 
   <name>mapreduce.framework.name</name>
 
   <value>yarn</value>  
 
 </property>
 
</configuration>
 
 
-- del
 
 <property>
 
    <name>mapreduce.jobhistory.address</name>
 
    <value>g2-hdfs-01:10020</value>
 
 </property>
 
 <property>
 
    <name>mapreduce.jobhistory.webapp.address</name>
 
    <value>g2-hdfs-01:19888</value>
 
 </property>
 
 <property>
 
    <name>mapreduce.application.classpath</name>
 
   <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
 
 </property>
 
 
======# yarn-site.xml======
<configuration>
 
 <property>
 
   <name>yarn.resourcemanager.hostname</name>
 
   <value>g2-hdfs-01</value>
 
 </property>
 
 <property>
 
   <name>yarn.nodemanager.aux-services</name>  
 
   <value>mapreduce_shuffle</value>
 
 </property>
 
 <property>
 
   <name>yarn.resourcemanager.webapp.address</name>
 
   <value>g2-hdfs-01:8088</value>
 
 </property>
 
 <property>
 
   <name>yarn.scheduler.maximum-allocation-mb</name>
 
   <value>32768</value>
 
 </property>
 
 <property>
 
   <name>yarn.nodemanager.vmem-check-enabled</name>
 
   <value>false</value>
 
 </property>
 
 <property>
 
   <name>yarn.nodemanager.env-whitelist</name>
 
  <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
 
 </property>
 
</configuration>
 
 
 
yarn.resourcemanager.hostname
 
指定yarn的ResourceManager管理界面的地址,不配的话,Active Node始终为0
 
yarn.scheduler.maximum-allocation-mb
 
每个节点可用内存,单位MB,默认8182MB
 
yarn.nodemanager.aux-services
 
reducer获取数据的方式
 
yarn.nodemanager.vmem-check-enabled
 
false = 忽略虚拟内存的检查
 
 
 
 <property>
 
   <name>yarn.resourcemanager.webapp.address</name>
 
   <value>hadoop01/192.168.44.5:8088</value>
 
   <description>配置外网只需要替换外网ip为真实ip,否则默认为localhost:8088</description>
 
 </property>





2023年2月12日 (日) 10:18的版本

ENV

USER

groupadd hadoop -g 1001

useradd hdfs -g hadoop -u 1001

Java

/usr/bin/java -> /etc/alternatives/java -> /usr/java/jdk1.8.0_221-amd64/jre/bin/java

#  /opt/hadoop-3.3.0

ln -s /opt/hadoop-3.3.0 /opt/hadoop

# .bash_profile

# hadoop, 20201010, Adam

export HADOOP_HOME=/opt/hadoop

export PATH=$PATH:$HADOOP_HOME/bin

export PATH=$PATH:$HADOOP_HOME/sbin

export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop

Hadoop 配置

配置 Hadoop 环境脚本文件中的 JAVA_HOME 参数

# hadoop是守护线程 读取不到 /etc/profile 里面配置的JAVA_HOME路径

# /opt/hadoop/etc/hadoop/

# hadoop-env.sh, mapred-env.sh, yarn-env.sh

cp hadoop-env.sh hadoop-env.sh.20210409

cp mapred-env.sh mapred-env.sh.20210409

cp yarn-env.sh yarn-env.sh.20210409

echo '

# hdfs, 20210409, Adam

export JAVA_HOME=/usr/java/jdk1.8.0_361' >>

Setup

core-site.xml (Common组件)
<configuration>
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://g2-hdfs-01:9000</value>
  </property>
  <property>
    <name>io.file.buffer.size</name>
    <value>131072</value>
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/u01/hdfs/tmp</value>
  </property>
</configuration>
hdfs-site.xml (HDFS组件)
<configuration>
  <property>
    <name>dfs.namenode.http-address</name>
    <value>g2-hdfs-01:50070</value>
  </property>
  <property>
    <name>dfs.namenode.secondary.http-address</name>
    <value>g2-hdfs-02:50170</value>
  </property>
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>file:/u01/hdfs/dfs/nn</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>file:/u01/hdfs/dfs/dn</value>
  </property>
  <property> 
    <name>dfs.webhdfs.enabled</name> 
    <value>true</value> 
  </property>
  <property>
    <name>dfs.permissions</name>
    <value>false</value>
  </property>
</configuration>

-- del
  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  <property>
    <name>dfs.blocksize</name>
    <value>268435456</value>
  </property>
  <property>
    <name>dfs.namenode.handler.count</name>
    <value>100</value>
  </property>
mapred-site.xml
<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value> 
  </property>
</configuration>

-- del
  <property>
     <name>mapreduce.jobhistory.address</name>
     <value>g2-hdfs-01:10020</value>
  </property>
  <property>
     <name>mapreduce.jobhistory.webapp.address</name>
     <value>g2-hdfs-01:19888</value>
  </property>
  <property>
     <name>mapreduce.application.classpath</name>
     <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/ hadoop/mapreduce/lib/*</value>
  </property>
yarn-site.xml
<configuration>
  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>g2-hdfs-01</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>  
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address</name>
    <value>g2-hdfs-01:8088</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>32768</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.nodemanager.env-whitelist</name>
    <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPE ND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
  </property>
</configuration>
  <property>
    <name>yarn.resourcemanager.webapp.address</name>
    <value>hadoop01/192.168.44.5:8088</value>
    <description>配置外网只需要替换外网ip为真实ip,否则默认为 localhost:8088</description>
  </property>

yarn.resourcemanager.hostname
指定yarn的ResourceManager管理界面的地址,不配的话,Active Node始终为0
yarn.scheduler.maximum-allocation-mb
每个节点可用内存,单位MB,默认8182MB
yarn.nodemanager.aux-services
reducer获取数据的方式
yarn.nodemanager.vmem-check-enabled
false = 忽略虚拟内存的检查


# workers