2013年5月14日 星期二

[記事] Hadoop / Hbase installation

屬於阿宅世界的技術文章,想看的再點開,切莫自誤 !



Prerequirement
JDK
SSHD

Environment

10.0.0.1 h01
10.0.0.2 h02
10.0.0.3 h03
10.0.0.4 h04
10.0.0.5 h05
h01 - hadoop master name node
h02 - hadoop secondary name node
h03, h04, h05 - hadoop data node

Hadoop in Single Node

# hadoop will connecti to every host by ssh in default
# setup the passwordless login
shell> ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
shell> cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys

shell> mkdir /home
shell> wget http://apache.cdpa.nsysu.edu.tw/hadoop/common/hadoop-1.1.2/hadoop-1.1.2.tar.gz
shell> mv hadoop-1.1.2 hadoop
shell> cd hadoop

shell> mkdir -p /var/log/hadoop
shell> vi conf/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-6-openjdk
export HADOOP_LOG_DIR=/var/log/hadoop


shell> mkdir /tmp/hadoop
shell> vi conf/core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://h01:9000</value>
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/tmp/hadoop</value>
  </property>
</configuration>


shell> mkdir -p /home/hadoop/dfs/name
shell> mkdir -p /home/hadoop/dfs/data
shell> vi conf/hdfs-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <!-- Local storage of name node -->
    <name>dfs.name.dir</name>
    <value>/home/hadoop/dfs/name</value>
  </property>
  <property>
    <!-- Local storage of data node -->
    <name>dfs.data.dir</name>
    <value>/home/hadoop/dfs/data</value>
  </property>

  <property>
    <name>dfs.webhdfs.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.support.append</name>
    <value>true</value>
  </property>
</configuration>


shell> mkdir -p /home/hadoop/mapred/system
shell> vi conf/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <!-- Host of JobTracker -->
    <name>mapred.job.tracker</name>
    <value>h01:9001</value>
  </property>
  <property>
    <!-- Path of the HDFS where the MapReduce framework stores system files -->
    <name>mapred.system.dir</name>
    <value>/home/hadoop/mapred/system</value>
  </property>
</configuration>


shell> bin/hadoop namenode -format
shell> bin/start-all.sh
shell> netstat -altp
# You should see
# Name node listen on 50070
# Data nodes listen on 50075
# Secondary name node listens on 50090,
# JobTracker listens on 50030
# TaskTracker listens on 50060

shell> bin/stop-all.s

Hadoop in cluster

shell> scp ~/.ssh/authorized_keys h02:~/.ssh/authorized_keys
shell> scp ~/.ssh/authorized_keys h03:~/.ssh/authorized_keys
shell> scp ~/.ssh/authorized_keys h04:~/.ssh/authorized_keys
shell> scp ~/.ssh/authorized_keys h05:~/.ssh/authorized_keys

shell> cd /home/hadoop

#Add the hostname of the SecondaryNameNode
shell> vi conf/master
h02The hostname of the SecondaryNameNode
h02

#Add the hostname of the DataNode/TaskTracker
shell> vi conf/slaves
h03
h04
h05


shell> rm -rf /home/hadoop/dfs/name/*
shell> rm -rf /home/hadoop/dfs/data/*

shell> cd ..
shell> tar cvf my_hadoop.tar hadoop
shell> scp my_hadoop.tar h02:/home/
shell> scp my_hadoop.tar h03:/home/
shell> scp my_hadoop.tar h04:/home/
shell> scp my_hadoop.tar h05:/home/

# ssh to h02, h03, h04, h05  and extract my_hadoop.tar in /home and add ip/hostname to /etc/hosts
# make sure not using "_" in the hostname in /etc/hosts, or it will cause something strange = =!

# switch back to h01 and start hadoop
shell> bin/hadoop namenode -format

# now you should see something in http://h01:50070/ through browser

Using HDFS by FUSE

# Install prerequirement
shell> sudo apt-get install ant subversion gcc g++ gawk libtool automake make libfuse-dev

# Export (BASH) environment variables
shell> vi ~/.bash_profile
export HADOOP_HOME=/home/hadoop
export HBASE_HOME=/home/hbase
export FORREST_HOME=/home/forrest
export JAVA_HOME=/usr/lib/jvm/java-6-openjdk
export PATH=$PATH:$HADOOP_HOME/bin:$FORREST_HOME/bin
export OS_ARCH=amd64
export OS_BIT=64
export LD_LIBRARY_PATH=/usr/lib:/usr/local/lib:$HADOOP_HOME/build/libhdfs:/usr/lib/jvm/java-6-openjdk/jre/lib/amd64/server


shell> cd /home/
shell> svn co http://svn.apache.org/repos/asf/forrest/trunk

shell> cd /home/hadoop

shell> ant compile-c++-libhdfs -Dlibhdfs=1 -Dislibhdfs=1

shell> ln -s $HADOOP_HOME/c++/Linux-amd64-64/lib/ $HADOOP_HOME/build/libhdfs

shell> vi src/c++/task- controller/configure.ac
# mark AC_SYS_LARGEFILE

shell> cd /home/forrest/main
shell> ./build.sh

shell> cd /home/hadoop
shell> ant package -Dforrest.home=$FORREST_HOME
shell> ant compile-contrib -Dlibhdfs=1 -Dfusedfs=1


# mount the HDFS

shell> cd /home/hadoop/src/contrib/fuse-dfs/src
shell> ./fuse_dfs_wrapper.sh dfs://h01:9000 /mnt

shell> ls /mnt

shell> df
Filesystem           1K-blocks      Used Available Use% Mounted on

fuse_dfs             296878080         0 296878080   0% /mnt

HBASE

shell> cd /home
shell> wget http://ftp.tc.edu.tw/pub/Apache/hbase/stable/hbase-0.94.7.tar.gz
shell> tar zxvf hbase-0.94.7.tar.gz
shell> mv hbase-0.94.7 hbase
shell> cd hbase
shell> vi conf/hbase-env.sh
export HBASE_HOME=/home/hbase
export JAVA_HOME=/usr/lib/jvm/java-6-openjdk


shell> vi hbase-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://h01:9000/home/hbase</value>
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.master</name>
    <value>h01:60000</value>
  </property>
  <property>
    <name>hbase.zookeeper.property.dataDir</name>
    <value>/home/hbase/zookeeper</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>h01</value>
  </property>
</configuration>


shell> vi conf/regionservers
h03
h04
h05


# replace the hadoop core lib of hbase
shell> cp /home/hadoop/hadoop-core-1.1.2.jar /home/hbase/lib/hadoop-core-x.x.x.jar

shell> bin/start-hbase.sh
shell> bin/hbase shell

shell> bin/stop-hbase.sh

0 意見:

張貼留言