- 分组:按用户分组
- 范围:2021年至少有两天作答过试卷
-
where year(start_time) = '2021'
-
group by uid
-
having max_diff_days > 1
- 计算最大时间窗days_window:
-
max(datediff(next_start_time, start_time))+1
- 根据上述式子,需要找出下次作答时间next_start_time
-
lead(start_time) over (partition by uid order by start_time) as next_start_time
- 计算平均作答试卷套数
-
round(exam_cnt / max_diff_days * days_window, 2) as avg_exam_cnt
- 根据以上式子需统计此用户作答的总试卷数
-
count(start_time) as exam_cnt
- 还有计算该用户最早最晚作答相差天数:
-
datediff(max(start_time), min(start_time))+1 as max_diff_days
整理后完整代码如下:
SELECT uid, days_window,
ROUND(exam_cnt / max_diff_days * days_window, 2) AS avg_exam_cnt
FROM (
SELECT uid,
MAX(DATEDIFF(next_start_time, start_time)) + 1 AS days_window,
COUNT(start_time) AS exam_cnt,
DATEDIFF(MAX(start_time), MIN(start_time)) + 1 AS max_diff_days
FROM (
SELECT uid, start_time,
LEAD(start_time) OVER (PARTITION BY uid ORDER BY start_time ASC) AS next_start_time
FROM exam_record
WHERE YEAR(start_time) = '2021'
) t1
GROUP BY uid
HAVING max_diff_days > 1
) t2
ORDER BY days_window DESC, avg_exam_cnt DESC