第十六题:同时在线问题
create table sql1_16 (
id int,
stt string,
edt string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_16.txt' into table sql1_16;
id stt edt
1001,2021-06-14 12:12:12,2021-06-14 18:12:12
1003,2021-06-14 13:12:12,2021-06-14 16:12:12
1004,2021-06-14 13:15:12,2021-06-14 20:12:12
1002,2021-06-14 15:12:12,2021-06-14 16:12:12
1005,2021-06-14 15:18:12,2021-06-14 20:12:12
1001,2021-06-14 20:12:12,2021-06-14 23:12:12
1006,2021-06-14 21:12:12,2021-06-14 23:15:12
1007,2021-06-14 22:12:12,2021-06-14 23:10:12
计算出平台最高峰同时在线的主播人数。将开始时间和结束时间分开,变成两条数据,进行累加即可。
-- 计算出平台最高峰同时在线的主播人数。
with t1 as (
select id,stt,1 num from sql1_16
union all
select id,edt,-1 num from sql1_16
),t2 as (
select *,sum(num) over ( order by stt) n from t1
)
select max(n) from t2;
第十七题:车辆每天上传N条数据,要求获得每日最后一条数据,请用至少三种方式写出来,并说明三种方式的区别
create table sql1_17 (
vin string,
pkgts string,
value string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_17.txt' into table sql1_17;
vin pkgts value
1111111111111111,2022-06-01 16:54:57,60.0
1111111111111111,2022-06-01 07:40:27,6.0
2222222222222222,2022-06-01 19:40:27,7.0
1111111111111111,2022-06-26 12:16:45,64.0
2222222222222222,2022-06-05 23:00:42,18.0
2222222222222222,2022-06-05 09:10:17,44.0
2222222222222222,2022-06-05 13:00:25,19.0
2222222222222222,2022-06-13 16:14:04,43.0
2222222222222222,2022-06-13 07:21:08,40.0
-- 获得每日最后一条数据,请用至少三种方式写出来,并说明三种方式的区别
方式一 group by + exists
with t1 as
( select max(pkgts) max from sql1_17 group by substr(pkgts,1,10) )
select * from sql1_17 where exists (select * from t1 where pkgts = t1.max);
方式二: 窗口函数row_number
with t1 as ( select *,row_number() over (partition by substr(pkgts,1,10) order by pkgts desc ) num from sql1_17 )
select vin, pkgts, value from t1 where num =1;
方式三: 窗口函数first_value
with t1 as
( select *,first_value(pkgts) over (partition by substr(pkgts,1,10) order by pkgts desc)pkgts1 from sql1_17 )
select vin, pkgts, value from t1 where pkgts = pkgts1;
第十八题:打折日期交叉问题
create table sql1_18(
brand string,
stt string,
edt string
)
row format delimited
fields terminated by ",";
load data local inpath "/home/homedata/sql_1/sql1_18.txt" into table sql1_18;
brand stt edt
oppo,2021-06-05,2021-06-09
oppo,2021-06-11,2021-06-21
vivo,2021-06-05,2021-06-15
vivo,2021-06-09,2021-06-21
redmi,2021-06-05,2021-06-21
redmi,2021-06-09,2021-06-15
redmi,2021-06-17,2021-06-26
huawei,2021-06-05,2021-06-26
huawei,2021-06-09,2021-06-15
huawei,2021-06-17,2021-06-21
这道题目主要的难点就是在于日期的交叉,现在有两个思路,第一种是(F-A+1)+(H-G+1),第二种表示是(B-A)+(C-B)+(D-C)+(E-D)+(F-E)+1+(H-G)+1
不论是哪一个思路都需要将开始日期和结束日期分开,然后进行汇总,如下展示:
with t1 as (
select brand,stt,1 status from sql1_18
union all
select brand,edt,-1 status from sql1_18
)select *,sum(status) over (partition by brand order by stt ) gs from t1;
第一种思路的解法:
首先需要获取出每一段的起始和结束时间,将数据中的status=gs=1 和 gs=0的数据筛选出来。同时将起始时间和结束时间相结合
with t1 as (
select brand,stt,1 status from sql1_18
union all
select brand,edt,-1 status from sql1_18
),t2 as (
select *,sum(status) over (partition by brand order by stt ) gs from t1
)
select brand,stt,`if`(status = 1,lead(stt,1,null) over (partition by brand order by stt),null) edt from t2 where status = gs or gs = 0;
然后将其中有null的数据排除,进行日期的相减即可
with t1 as (
select brand,stt,1 status from sql1_18
union all
select brand,edt,-1 status from sql1_18
),t2 as (
select *,sum(status) over (partition by brand order by stt ) gs from t1
),t3 as (
select brand,stt,`if`(status = 1,lead(stt,1,null) over (partition by brand order by stt),null) edt from t2 where status = gs or gs = 0
)
select brand,sum(datediff(edt,stt)+1) day from t3 where edt is not null group by brand;
第二种思路的解法:
思路:
SQL打折日期交叉问题_sql题各品牌打折总天数-CSDN博客
with t1 as (
select brand,stt,1 status from sql1_18
union all
select brand,edt,-1 status from sql1_18
),t2 as (
select *,sum(status) over (partition by brand order by stt ) gs from t1
),t3 as (
select *,`if`(gs!=0,lead(stt,1,stt) over (partition by brand order by stt),stt ) ett from t2
),t4 as (
select *,datediff(ett,stt) days from t3
)
select brand,sum(days)+count(`if`(gs=0,1,null)) tds from t4 group by brand;
第十九题:删除重复数据,name列相同的为重复数据
CREATE TABLE sql1_19 (
`id` int,
`name` string
) ;
INSERT INTO sql1_19 VALUES ('1', 'A')
,('2', 'A')
,('3', 'A')
,('4', 'B')
,('5', 'B')
,('6', 'C')
,('7', 'B')
,('8', 'B')
,('9', 'B')
,('10', 'E')
,('11', 'E')
,('12', 'E');
select * from sql1_19;
删除name相同的数据,重复数据只保留一条id最大的
select max(id) id ,name from sql1_19 group by name;
第二十题:差值分组问题
create table sql1_20(
id int,
ts string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_20.txt' into table sql1_20;
select * from sql1_20;
id ts(秒)
1001,17523641234
1001,17523641256
1002,17523641278
1001,17523641334
1002,17523641434
1001,17523641534
1001,17523641544
1002,17523641634
1001,17523641638
1001,17523641654
每两条数据间隔60秒以内则为同一组——sum + over(order)时窗口大小会逐渐变大,让每一组开始的数据为1 ,同组为0 ,用1隔开所有的组。
with t1 as (
SELECT id ,ts ,ts-lag(ts,1,ts) OVER (PARTITION BY id order BY ts ) seconds FROM sql1_20
) ,t2 as (
select id, ts,`if`(seconds <60 ,0,1) groupa from t1
)
select id,ts,sum(groupa)over (partition by id order by ts)+1 groups from t2;