• student表

    idname
    1zhangsan
    2lisi
    3wangwu
  • teacher表

    idname
    1teacher_zhaoliu
    2teacher_wangqi

基本查询

全表查询

select * from student;
select * from teacher;

特定列查询

select id from student;
select name from teacher;

列别名

select id as a, name as b from student;

算数运算符:可以对结果集合进行算数运算以及位运算

select id, id + 1 from student;
select id, id * 2 from student;

op

逻辑运算符: and or not

select id from student where id = 1 and name = 'zhangsan';
select id from student where id = 1 or id = 2;
select id from student where not id = 4;

常用函数

select max(id) from student;  # 最大值
select min(id) from teacher;  # 最小值
select count(*) from student; # 计数
select avg(id) from student; # 平均

限制结果行数,设置偏移量

select * from student limit 2;
select * from student limit 2 offset 1;

Where 条件

常用where

select * from student where id < 3;
select * from student where id = 2;
select * from student where id > 1;

between

select * from student where id between 1 and 3; # 1, 2, 3

is null

select * from student where id is null;

in: 查询是in括号内的条目

select * from student where id in(1, 3); # 1, 3

Like 通配

% 表示0个或n个字符

select name from student where name like 'zhang%';

_ 表示通配一个字符

select name from student where name like 'zhangsa_';

RLike可以通过正则来匹配

# 查找姓名中有字母n的
select name from student where name rlike '[n]'; # zhangsan, wangwu

Group by 分组统计

select id, avg(id) from student group by id;

having: 只能用于group by分组统计语句。

select id, avg(id) from student group by id having id > 1;

Join 连接

内连接,

# 找出student表和teacher表中id相同的id与name
select s.id, s.name, t.id, t.name from student as s join teacher as t on s.id = t.id;

join

左外连接: 返回join左边表中符合where子句的所有记录

select s.id, s.name, t.id, t.name from student as s LEFT join teacher as t on s.id = t.id;

leftjoin

右外连接: 返回join右边表中符合where子句的所有记录

select s.id, s.name, t.id, t.name from student as s RIGHT join teacher as t on s.id = t.id;

rightjoin

全外连接: 返回所有表中符合where语句条件的所有记录,如果有字段无符合条件的值,则用NULL代替

select s.id, s.name, t.id, t.name from student as s FULL join teacher as t on s.id = t.id;

fulljoin

笛卡尔积

select s.id, s.name, t.id, t.name from student as s, teacher as t;

dkr 注:如果提示err 则需要关闭严格模式

set hive.strict.checks.cartesian.product=false;
set hive.mapred.mode=nonstrict;

排序

order by:对结果排序

select * from student order by name asc;      # 名称升序
select * from student order by id desc;       # id降序
select * from student order by name, id desc; # 多关键字降序

sort by:每个Reducer内部进行排序,但对最终结果集合来说不是排序

set mapreduce.job.reduces=2; # 设置两个reducer
# set mapreduce.job.reduces; # 查看reducer个数
select * from student sort by id desc;

sortby 可以看到结果集并非降序

distribute by: 类似MapReduce中的partition,进行分区,需结合sort by使用

cluster by: 当distribute by和sort by字段相同时,可以使用cluster by

导出表至HDFS

export table tmp to '/user/hive/exp/tmp';

从HDFS导入表

导入的表不能事先存在

import table ipt from '/user/hive/exp/tmp';
# import external table exipt from '/user/hive/exp/tmp'; # 导入进外部表
select * from ipt;

exptimpt

从HDFS导入分区

导出分区表

export table part to '/user/hive/exp/part';

导入分区

import table part1 partition(dt='2022-05-01') from '/user/hive/exp/part';

import table part2 partition(dt='2022-05-02') from '/user/hive/exp/part';

part

导出查询结果

将查询结果导出至本地

insert overwrite local directory '/root/exp' row format delimited fields terminated by '\t' select * from tmp;

local

将查询结果导出至HDFS(没有local)

insert overwrite directory '/user/hive/exp.txt' row format delimited fields terminated by '\t' select * from tmp;

将Hive Shell的查询结果保存到本地

hive -e 'select * from curd.tmp;' > /root/exp.txt

exp