首先是建一个表存每个用户的最大时间窗。这里用到的方法是用窗口函数lead得到每个start_time下一个时间,然后用datediff得到两个时间的天数差,这里记得要给结果+1.然后用max得到最大时间窗。
with temp as
(select dc.uid, max(dc.diff) as days_window from
(select uid,
start_time,
datediff(lead(start_time) over (partition by uid order by start_time), start_time)+1 as diff
from exam_record
where year(start_time)=2021) as dc
group by dc.uid
having count(distinct date_format(start_time, "%Y-%m-%d"))>=2),
然后再开一个新表temp2存每日平均。
temp2 as
(select uid,
count(start_time) / (datediff(max(start_time), min(start_time))+1) as day_avg
from exam_record
where year(start_time)=2021
group by uid)
最后把两个表连接再简单计算一下就OK了。
select uid, days_window,
round(days_window * day_avg,2) as avg_exam_cnt
from temp left join temp2 using(uid)
order by days_window desc, avg_exam_cnt desc
以下是完整版
with temp as
(select dc.uid, max(dc.diff) as days_window from
(select uid,
start_time,
datediff(lead(start_time) over (partition by uid order by start_time), start_time)+1 as diff
from exam_record
where year(start_time)=2021) as dc
group by dc.uid
having count(distinct date_format(start_time, "%Y-%m-%d"))>=2),
temp2 as
(select uid,
count(start_time) / (datediff(max(start_time), min(start_time))+1) as day_avg
from exam_record
where year(start_time)=2021
group by uid)
select uid, days_window,
round(days_window * day_avg,2) as avg_exam_cnt
from temp left join temp2 using(uid)
order by days_window desc, avg_exam_cnt desc