参照别人的的,想要分析自己的 错误; 重新编写之后发现错误在于 自己对于GROUP BY 理解还是不够到位,(老生常谈 SELCET 所有选项必须依赖)
# WITH t_exam_record_stat as #(别人的思路改写)
# (
# SELECT uid,
# count(start_time) as exam_cnt, -- 此人作答的总试卷数
# DATEDIFF(max(start_time), min(start_time))+1 as diff_days, -- 最早一次作答和最晚一次作答的相差天数
# max(DATEDIFF(next_start_time, start_time))+1 as days_window -- 两次作答的最大时间窗
# FROM (
# SELECT uid, exam_id, start_time,
# lead(start_time) over(partition by uid ORDER BY start_time) as next_start_time -- 将连续的下次作答时间拼上
# FROM exam_record
# WHERE year(start_time)=2021
# ) as t_exam_record_lead
# GROUP BY uid
# )
WITH t_exam_record_stat as # (自己的查漏补缺)
(
SELECT uid
,MAX(TIMESTAMPDIFF(DAY,last_day,start_day)+1) as days_window -- 两次作答的最大时间窗
,count(start_day) as exam_cnt -- 此人作答的总试卷数
,DATEDIFF(max(start_day), min(start_day))+1 as diff_days -- 最早一次作答和最晚一次作答的相差天数
FROM
(
SELECT uid, date(start_time) as start_day
,LAG(date(start_time),1) over(PARTITION by uid ORDER BY date(start_time) ) as last_day
# 连续两次作答的时间
,start_time
FROM exam_record
WHERE YEAR(start_time) =2021
) a
GROUP BY uid
)
# SELECT *
# FROM t_exam_record_stat
SELECT uid, days_window, round(days_window*exam_cnt/diff_days, 2) as avg_exam_cnt
FROM t_exam_record_stat
WHERE diff_days>1
ORDER BY days_window DESC, avg_exam_cnt DESC
自己的,问题是在提交后有一个 测试没有通过,有一条的avg_exam_cnt 和实际有差异 (问题已经通过改写解决)
#
# WITH t1 AS # 要做起始在这个一步就把需要的数据都补齐,以免后面再需要
#(
# SELECT * ,TIMESTAMPDIFF(DAY,last_day,start_day)+1 as days_window
# FROM
# (
# SELECT uid, date(start_time) as start_day
# ,LAG(date(start_time),1) over(PARTITION by uid ORDER BY date(start_time) ) as last_day
# # 连续两次作答的时间
# ,start_time
# FROM exam_record
# WHERE YEAR(start_time) =2021
# ) a
# )
# SELECT *
# FROM
# (
# SELECT uid,MAX(days_window) as days_window
# ,ROUND(count(start_time)/(MAX(start_day)-MIN(start_day)+1) * MAX(days_window),2) as avg_exam_cnt
# FROM t1
# GROUP BY uid
# ) b
# WHERE days_window>=2
# ORDER BY days_window DESC,avg_exam_cnt DESC;
# 理解错题意 找出来时间创建口最大的那个用户(唯一一个人)的信心,题目中要求 窗口>=2即可
# SELECT uid,days_window,avg_exam_cnt
# FROM
# (
# SELECT *
# ,rank() over(ORDER BY days_window DESC) as rk
# FROM
# (
# SELECT uid,MAX(days_window) as days_window
# ,ROUND(count(1)/(MAX(start_day)-MIN(start_day)+1) * MAX(days_window),2) as avg_exam_cnt
# FROM t1
# GROUP BY uid
# ) b
# )c
# WHERE rk = 1