create table myhive.test_map(
id int, name string, members map<string,string>, age int
)
row format delimited
fields terminated by ','
COLLECTION ITEMS TERMINATED BY '#'
MAP KEYS TERMINATED BY ':';
常用查询语句
# 查询全部
select * from myhive.test_map;
# 查询father、mother这两个map的key
select id, name, members['father'] father, members['mother'] mother, age from myhive.test_map;
# 查询全部map的key,使用map_keys函数,结果是array类型
select id, name, map_keys(members) as relation from myhive.test_map;
# 查询全部map的value,使用map_values函数,结果是array类型
select id, name, map_values(members) as relation from myhive.test_map;
# 查询map类型的KV对数量
select id,name,size(members) num from myhive.test_map;
# 查询map的key中有brother的数据
select * from myhive.test_map where array_contains(map_keys(members), 'brother');
create table myhive.test_struct(
id string, info struct<name:string, age:int>
)
row format delimited
fields terminated by '#'
COLLECTION ITEMS TERMINATED BY ':';
常用查询
select * from hive_struct;
# 直接使用列名.子列名 即可从struct中取出子列查询
select ip, info.name from hive_struct;
11、数据查询
查询语句基本语法如下(跟普通数据库sql查询基本一样): SELECT [ALL | DISTINCT]select_expr, select_expr, ... FROM table_reference [WHERE where_condition] [GROUP BYcol_list] [HAVING where_condition] [ORDER BYcol_list] [CLUSTER BYcol_list | [DISTRIBUTE BY col_list] [SORT BY col_list] ] [LIMIT number]
# 排序查询
SELECT * FROM orders WHERE useraddress like '%广东%' ORDER BY totalmoney DESC LIMIT 1;
# 分组查询
SELECT userid, AVG(totalmoney) AS avg_money FROM itheima.orders GROUP BY userid HAVING avg_money > 10000;
# Join连接查询
SELECT o.orderid, o.userid, u.username, o.totalmoney, o.useraddress, o.paytime FROM itheima.orders o LEFT JOIN itheima.users u ON o.userid = u.userid;
# RLIKE查询(支持正则)
SELECT * FROM itheima.orders WHERE useraddress RLIKE '.*广东.*';
SELECT * FROM itheima.orders WHERE userphone RLIKEE '188\\S{4}0\\S{3}';
# UNION查询
SELECT t_id FROM itheima.course WHERE t_id = '周杰轮'
UNION ALL
SELECT t_id FROM itheima.course WHERE t_id = '王力鸿'
12、数据抽样
基于随机分桶抽样语法: SELECT ... FROM tbl TABLESAMPLE(BUCKET x OUT OF y ON(colname | rand()))
SELECT * FROM orders TABLESAMPLE(BUCKET 1 OUT OF 10 ON rand());
基于数据块抽样语法(每一次抽样的结果都一致): SELECT ... FROM tbl TABLESAMPLE(num ROWS | num PERCENT | num(K|M|G));