5.3 HBase 的安装与Shell操作
1 HBase的安装
-
下载安装包 http://archive.cloudera.com/cdh5/cdh/5/hbase-1.2.0-cdh5.7.0.tar.gz
-
配置伪分布式环境
-
环境变量配置
export HBASE_HOME=/root/bigdata/hbase export PATH=$HBASE_HOME/bin:$PATH
-
配置hbase-env.sh
export JAVA_HOME=/root/bigdata/jdk export HBASE_MANAGES_ZK=false --如果你是使用hbase自带的zk就是true,如果使用自己的zk就是false
-
配置hbase-site.xml
<configuration> <property> <name>hbase.rootdir</name> <value>hdfs://hadoop-master:9000/hbase</value> </property> <property> <name>hbase.cluster.distributed</name> <value>true</value> </property> <property> <name>hbase.master</name> <value>hadoop-master:60000</value> </property> <property> <name>hbase.zookeeper.quorum</name> <value>hadoop-master:2181</value> </property> <property> <name>hbase.zookeeper.property.clientPort</name> <value>2181</value> </property> <property> <name>hbase.zookeeper.property.dataDir</name> <value>/root/bigdata/zookeeper-3.4.14/dataDir</value> </property> <property> <name>hbase.unsafe.stream.capability.enforce</name> <value>false</value> </property> </configuration>
-
启动hbase(启动的hbase的时候要保证hadoop集群已经启动)
/hbase/bin/start-hbase.sh
-
输入hbase shell(进入shell命令行)
-
2 HBase shell
- HBase DDL 和 DML 命令
名称 | 命令表达式 |
---|---|
创建表 | create '表名', '列族名1','列族名2','列族名n' |
添加记录 | put '表名','行名','列名:','值 |
查看记录 | get '表名','行名' |
查看表中的记录总数 | count '表名' |
删除记录 | delete '表名', '行名','列名' |
删除一张表 | 第一步 disable '表名' 第二步 drop '表名' |
查看所有记录 | scan "表名称" |
查看指定表指定列所有数据 | scan '表名' ,{COLUMNS=>'列族名:列名'} |
更新记录 | 重写覆盖 |
- 连接集群
hbase shell
- 创建表
create 'user','base_info'
- 删除表
disable 'user'
drop 'user'
- 创建名称空间
create_namespace 'test'
- 展示现有名称空间
list_namespace
- 创建表的时候添加namespace
create 'test:user','base_info'
- 显示某个名称空间下有哪些表
list_namespace_tables 'test'
-
插入数据
put ‘表名’,‘rowkey的值’,’列族:列标识符‘,’值‘
put 'user','rowkey_10','base_info:username','Tom'
put 'user','rowkey_10','base_info:birthday','2014-07-10'
put 'user','rowkey_10','base_info:sex','1'
put 'user','rowkey_10','base_info:address','Tokyo'
put 'user','rowkey_16','base_info:username','Mike'
put 'user','rowkey_16','base_info:birthday','2014-07-10'
put 'user','rowkey_16','base_info:sex','1'
put 'user','rowkey_16','base_info:address','beijing'
put 'user','rowkey_22','base_info:username','Jerry'
put 'user','rowkey_22','base_info:birthday','2014-07-10'
put 'user','rowkey_22','base_info:sex','1'
put 'user','rowkey_22','base_info:address','Newyork'
put 'user','rowkey_24','base_info:username','Nico'
put 'user','rowkey_24','base_info:birthday','2014-07-10'
put 'user','rowkey_24','base_info:sex','1'
put 'user','rowkey_24','base_info:address','shanghai'
put 'user','rowkey_25','base_info:username','Rose'
put 'user','rowkey_25','base_info:birthday','2014-07-10'
put 'user','rowkey_25','base_info:sex','1'
put 'user','rowkey_25','base_info:address','Soul'
- 查询表中的所有数据
scan 'user'
#HBase中一般存储数据量都很大 很少使用全表查询 scan会加上一些条件限制
- Scan查询中添加限制条件
scan '名称空间:表名', {
COLUMNS => ['列族名1', '列族名2'], LIMIT => 10, STARTROW => '起始的rowkey'} # 通过COLUMNS LIMIT STARTROW 等条件缩小查询范围
#LIMIT=>2 限制输出两行
scan 'user' ,{
COLUMNS =>['base_info'],LIMIT=>2}
## 返回结果
ROW COLUMN+CELL
rowkey_10 column=base_info:address, timestamp=1558323139732, value=Tokyo
rowkey_10 column=base_info:birthday, timestamp=1558323139636, value=2014-07-10
rowkey_10 column=base_info:sex, timestamp=1558323139678, value=1
rowkey_10 column=base_info:username, timestamp=1558323918953, value=Tom4
rowkey_16 column=base_info:address, timestamp=1558323139963, value=beijing
rowkey_16 column=base_info:birthday, timestamp=1558323139866, value=2014-07-10
rowkey_16 column=base_info:sex, timestamp=1558323139907, value=1
#STARTROW 限制起始的Rowkey
scan 'user' ,{
COLUMNS =>['base_info'],LIMIT=>2,STARTROW=>'rowkey_16'}
#返回结果:
ROW COLUMN+CELL
rowkey_16 column=base_info:address, timestamp=1558323139963, value=beijing
rowkey_16 column=base_info:birthday, timestamp=1558323139866, value=2014-07-10
rowkey_16 column=base_info:sex, timestamp=1558323139907, value=1
rowkey_22 column=base_info:address, timestamp=1558323140188, value=Newyork
rowkey_22 column=base_info:birthday, timestamp=1558323140107, value=2014-07-10
rowkey_22 column=base_info:sex, timestamp=1558323140143, value=1
rowkey_22 column=base_info:username, timestamp=1558323140036, value=Jerry
-
scan查询添加过滤器
- ROWPREFIXFILTER rowkey 前缀过滤器
scan 'user', { ROWPREFIXFILTER=>'rowkey_22'} #显示结果 ROW COLUMN+CELL rowkey_22 column=base_info:address, timestamp=1558323140188, value=Newyork rowkey_22 column=base_info:birthday, timestamp=1558323140107, value=2014-07-10 rowkey_22 column=base_info:sex, timestamp=1558323140143, value=1 rowkey_22 column=base_info:username, timestamp=1558323140036, value=Jerry 1 row(s) Took 0.0120 seconds
-
查询某个rowkey的数据
get 'user','rowkey_16'
- 查询某个列簇的数据
get 'user','rowkey_16','base_info'
get 'user','rowkey_16','base_info:username'
get 'user', 'rowkey_16', {
COLUMN => ['base_info:username','base_info:sex']}
- 删除表中的数据
delete 'user', 'rowkey_16', 'base_info:username'
- 清空数据
truncate 'user'
- 操作列簇
alter 'user', NAME => 'f2'
alter 'user', 'delete' => 'f2'
-
HBase 追加型数据库 会保留多个版本数据
desc 'user' Table user is ENABLED user COLUMN FAMILIES DESCRIPTION {NAME => 'base_info', VERSIONS => '1', EVICT_BLOCKS_ON_CLOSE => 'false', NEW_VERSION_B HE_DATA_ON_WRITE => 'false', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', MI ER => 'NONE', CACHE_INDEX_ON_WRITE => 'false', IN_MEMORY => 'false', CACHE_BLOOM se', COMPRESSION => 'NONE', BLOCKCACHE => 'false', BLOCKSIZE => '65536'}
- VERSIONS=>'1’说明最多可以显示一个版本 修改数据
put 'user','rowkey_10','base_info:username','Tom'
- 指定显示多个版本
get 'user','rowkey_10',{ COLUMN=>'base_info:username',VERSIONS=>2}
- 修改可以显示的版本数量
alter 'user',NAME=>'base_info',VERSIONS=>10
-
通过时间戳查询
- 通过TIMERANGE 指定时间范围
scan 'user',{ COLUMNS => 'base_info', TIMERANGE => [1558323139732, 1558323139866]} get 'user','rowkey_10',{ COLUMN=>'base_info:username',VERSIONS=>2,TIMERANGE => [1558323904130, 1558323918954]}
- 通过时间戳过滤器 指定具体时间戳的值
scan 'user',{ FILTER => 'TimestampsFilter (1558323139732, 1558323139866)'} get 'user','rowkey_10',{ COLUMN=>'base_info:username',VERSIONS=>2,FILTER => 'TimestampsFilter (1558323904130, 1558323918954)'}
- 获取最近多个版本的数据
get 'user','rowkey_10',{ COLUMN=>'base_info:username',VERSIONS=>10} COLUMN CELL base_info:username timestamp=1558323918953, value=Tom4 base_info:username timestamp=1558323904133, value=Tom3 base_info:username timestamp=1558323758696, value=Tom2 base_info:username timestamp=1558323139575, value=Tom
- 通过指定时间戳获取不同版本的数据
get 'user','rowkey_10',{ COLUMN=>'base_info:username',TIMESTAMP=>1558323904133} #返回结果如下 COLUMN CELL base_info:username timestamp=1558323904133, value=Tom3 get 'user','rowkey_10',{ COLUMN=>'base_info:username',TIMESTAMP=>1558323918953} #返回结果如下 COLUMN CELL base_info:username timestamp=1558323918953, value=Tom4
-
命令表