400-800-0824
info@ymatrix.cn
400-800-0824
info@ymatrix.cn
400-800-0824
info@ymatrix.cn
400-800-0824
info@ymatrix.cn
400-800-0824
info@ymatrix.cn
YMatrix 文档
关于 YMatrix
标准集群部署
数据写入
数据迁移
数据查询
运维监控
参考指南
工具指南
数据类型
存储引擎
执行引擎
系统配置参数
SQL 参考
常见问题(FAQ)
新架构 FAQ
集群部署 FAQ
SQL 查询 FAQ
MatrixGate FAQ
运维 FAQ
监控告警 FAQ
PXF FAQ
PLPython FAQ
性能 FAQ
export HADOOP_HOME=/data/hadoop-2.9.2
export HADOOP_CONF_DIR=/data/hadoop-2.9.2/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH:
mkdir /data/hadoop-2.9.2/name
mkdir /data/hadoop-2.9.2/data
mkdir /data/hadoop-2.9.2/tmp
## 参数配置
cat hadoop-env.sh
export JAVA_HOME=/data/jdk1.8
vim $HADOOP_CONF_DIR/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://matrixdb01:9000</value>
</property>
<property>
<name>hadoop.proxyuser.mxadmin.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mxadmin.groups</name> --代表访问hadoop的用户组,必须加
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mxadmin.users</name> --代表访问hadoop的用户,必须加
<value>*</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop-2.9.2/tmp</value>
</property>
</configuration>
vim $HADOOP_CONF_DIR/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/data/hadoop-2.9.2/name</value>
</property>
<property>
<name>hadoop.data.dir</name>
<value>/data/hadoop-2.9.2/data</value>
</property>
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
</configuration>
vim $HADOOP_CONF_DIR/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
vim $HADOOP_CONF_DIR/yarn-site.xml
<?xml version="1.0"?>
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>matrixdb01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
vim $HADOOP_CONF_DIR/slaves
matrixdb01
matrixdb02
matrixdb03
scp -r /data/hadoop-2.9.2 matrixdb02:/data/
scp -r /data/hadoop-2.9.2 matrixdb03:/data/
[mxadmin@matrixdb01 hadoop]$ start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [matrixdb01]
matrixdb01: starting namenode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-namenode-matrixdb01.out
matrixdb03: starting datanode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-datanode-matrixdb03.out
matrixdb01: starting datanode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-datanode-matrixdb01.out
matrixdb02: starting datanode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-datanode-matrixdb02.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-secondarynamenode-matrixdb01.out
starting yarn daemons
resourcemanager running as process 10127. Stop it first.
matrixdb03: starting nodemanager, logging to /data/hadoop-2.9.2/logs/yarn-mxadmin-nodemanager-matrixdb03.out
matrixdb02: starting nodemanager, logging to /data/hadoop-2.9.2/logs/yarn-mxadmin-nodemanager-matrixdb02.out
matrixdb01: starting nodemanager, logging to /data/hadoop-2.9.2/logs/yarn-mxadmin-nodemanager-matrixdb01.out
[mxadmin@matrixdb01 hadoop]$ jps
11889 NameNode
13010 Jps
12484 NodeManager
12247 SecondaryNameNode
12058 DataNode
10127 ResourceManager
创建文件夹:
[mxadmin@matrixdb01 hadoop]$ hdfs dfs -mkdir /greenplum
[mxadmin@matrixdb01 data]$ hdfs dfs -mkdir /greenplum/pxf_examples
[mxadmin@matrixdb01 data]$ hdfs dfs -ls /
drwxr-xr-x - mxadmin supergroup 0 2020-12-30 11:14 /greenplum
[mxadmin@matrixdb01 data]$ hdfs dfs -ls /greenplum/
drwxr-xr-x - mxadmin supergroup 0 2020-12-30 11:14 /greenplum/pxf_examples
[mxadmin@matrixdb01 data] echo 'Prague,Jan,101,4875.33
Rome,Mar,87,1557.39
Bangalore,May,317,8936.99
Beijing,Jul,411,11600.67' > pxf_hdfs_simple.txt
hdfs dfs -put /data/pxf_hdfs_simple.txt /greenplum/pxf_examples/
[mxadmin@matrixdb01 data]$ hdfs dfs -cat /greenplum/pxf_examples/pxf_hdfs_simple.txt
Prague,Jan,101,4875.33
Rome,Mar,87,1557.39
Bangalore,May,317,8936.99
Beijing,Jul,411,11600.67
PXF 默认有一个 Default Server,存放在 $PXF_CONF/servers/default
mkdir $PXF_CONF/servers/single_hdfs
cp $HADOOP_CONF_DIR/core-site.xml $PXF_CONF/servers/single_hdfs/
cp $HADOOP_CONF_DIR/hdfs-site.xml $PXF_CONF/servers/single_hdfs/
scp -r single_hdfs matrixdb02:pwd
scp -r single_hdfs matrixdb03:pwd
createdb pxfhdfs
pxfhdfs=# CREATE EXTENSION pxf_fdw ;
CREATE SERVER hdfs_svr FOREIGN DATA WRAPPER hdfs_pxf_fdw OPTIONS ( config 'single_hdfs' );
CREATE USER MAPPING FOR mxadmin SERVER hdfs_svr;
CREATE FOREIGN TABLE pxf_hdfs_table (location text, month text,
num_orders int, total_sales float8) SERVER hdfs_svr OPTIONS ( resource '/greenplum/pxf_examples/pxf_hdfs_simple.txt', format 'text',
delimiter ',');
pxfhdfs=# SELECT * FROM pxf_hdfs_table ;
location | month | num_orders | total_sales
-----------+-------+------------+-------------
Prague | Jan | 101 | 4875.33
Rome | Mar | 87 | 1557.39
Bangalore | May | 317 | 8936.99
Beijing | Jul | 411 | 11600.67
(4 rows)
确保 HDFS 目录已创建
CREATE FOREIGN TABLE pxf_hdfsdir_table (location text, month
text, num_orders int, total_sales float8) SERVER hdfs_svr OPTIONS (resource '/greenplum/pxf_dir_examples', format 'text', delimiter ',');
pxfhdfs=# INSERT INTO pxf_hdfsdir_table SELECT * FROM pxf_hdfs_table ;
INSERT 0 4
pxfhdfs=# SELECT COUNT(*) FROM pxf_hdfsdir_table ;
count
-------
4
(1 row)