Xgboost install by cluster yarn
yum 安装基础的依赖
yum install gcc-c++ fuse-devel git hadoop-libhdfs-devel
配置环境变量
export MVN_HOME=/opt/soft/apache-maven-3.6.0
export PATH=$MVN_HOME/bin:$PATH
export XGB_HOME=/home/download
export PATH=$XGB_HOME:$PATH
export HDFS_LIB_PATH=${XGB_HOME}/xgboost-packages/libhdfs
export LD_LIBRARY_PATH=${XGB_HOME}/xgboost-packages/lib64:$JAVA_HOME/jre/lib/amd64/server:/${XGB_HOME}/xgboost-packages/libhdfs:$LD_LIBRARY_PATH
export HADOOP_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=/opt/cloudera/parcels/CDH/lib/hadoop-hdfs
export HADOOP_MAPRED_HOME=/opt/cloudera/parcels/CDH/lib/hadoop-yarn
export HADOOP_YARN_HOME=$HADOOP_MAPRED_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
xgboost git clone and install
mkdir -p xgboost-packages/lib64
cd xgboost-packages
git clone --recursive https://github.com/dmlc/xgboost
cd xgboost
cp make/config.mk ./
vim config.mk
add line # HADOOP_HOME = /usr/lib/hadoop
mkdir build
cd build
cmake .. USE_HDFS=ON # 开启HDFS
make -j4
install python package on env
cd python-package/ && python setup.py install
install jvm-package on env
cd jvm-packages/ && mvn install:install-file -Dfile=xgboost4j-spark-0.83-jar-with-dependencies.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark -Dversion=0.7 -Dpackaging=jar
转载请注明出处,本文采用 CC4.0 协议授权