# 查询在2021年各用户作答试卷的日期、及每天作答的试卷数目
select uid, date_format(start_time, '%Y%m%d') start_date, count(start_time) fin_num
from exam_record
where year(start_time)='2021'
group by uid, date_format(start_time, '%Y%m%d')
order by uid,start_date

# 查询在2021年各用户作答试卷的日期及其下一次作答日期、还有每天作答的试卷数目
select uid, start_date, lead(start_date,1)over(partition by uid order by start_date) next_date, fin_num
from (
    select uid, date_format(start_time, '%Y%m%d') start_date, count(start_time) fin_num
    from exam_record
    where year(start_time)='2021'
    group by uid, date_format(start_time, '%Y%m%d')
    order by uid,start_date
) k1
# -----------完整代码--------------
with k2 as
(
    select uid, start_date, lead(start_date,1)over(partition by uid order by start_date) next_date, fin_num
    from (
        select uid, date_format(start_time, '%Y%m%d') start_date, count(start_time) fin_num
        from exam_record
        where year(start_time)='2021'
        group by uid, date_format(start_time, '%Y%m%d')
        order by uid,start_date
    ) k1
)
select uid, max(datediff(next_date, start_date)+1) days_window, 
round(sum(fin_num)/(datediff(max(start_date),min(start_date))+1)*max(datediff(next_date, start_date)+1),2) avg_exam_cnt
from k2
group by uid
having max(start_date)-min(start_date)>0
order by days_window desc, avg_exam_cnt desc;

坑1:使用start_time作试卷作答记录计数,未提交试卷也算作答试卷了;

坑2:不要用max(start_date)-min(start_date)去计算日期差,会有小误差,应当使用datediff()函数来计算两者之间日期差;

【积累】

为了简化代码,把常需使用的子查询用with 表名 as 储存为临时表,方便直接调用;

group by即分组的基础上,可以用聚合函数进行简单的数字运算,然后将其查询为新字段值;

lead(字段名,n)over()窗口函数可以用来生成新列以装入后移n行的值;