一、连续登录问题
问题:1)、每个用户连续登录最大天数
2)、连续登录大于三天的用户数
分析:本质都是计算用户连续登录天数
方案一:利用排序窗口
select a.user_id
,a.date_rslt
,count(1) as cnt
from (
select
t.user_id
,t.login_time
,date_sub(login_time, num) as date_rslt
from (
select
user_id
,login_time
,row_number() over(partition by user_id order by login_time) as num
from login_log
) t
) a
group by a.user_id,a.date_rslt
方案二、增量加全量
连续访问天数v_days(最新flag值为1,则v_days累加,否则为0)
历史最大访问天数max_days (从max_days、v_days中取最大值)
select
coaleasce(h.user_id,i.user_id) as user_id,
if(i.user_id is not null,v_days+1,0) as v_days,
greatest(max_days,if(i.user_id is not null,v_days+1,0)) as max_days
from
history_ds h
full join
log_time i
扩展1:连续登录,中间间隔1天也算
select user_id
,group_id
,count(login_date) as continuous_login_days
from (
select
user_id
,login_date
,sum(if(date_diff>1,1,0)) over(partition by user_id order by login_date rows between unboundedpreceding and current row) as group_id
from (
select
user_id
,login_date
,datediff(login_date,last_login_date) as date_diff
from (
select
user_id
,login_date
,lag(login_date,1,'1970-01-01') over(partition by user_id order by login_date) as last_login_date
from test_login
)t1
)t2
)t3
group by user_id
,group_id;
扩展2:断点排序
连续日期的数据对应的值发生变化,重新排序
select
a,
b,
row_number() over( partition by b,a_diff order by a) as c
from
(
select
a,
b,
a-num as a_diff
from
(
select
a,
b,
row_number() over( partition by b order by a ) as num
from t1
)tmp1
)tmp2
order by a;
大数据面试题之SQL题_大数据sql面试题-CSDN博客