题目拆解为两个部分:
- 哪些用户(6/7级)
- 统计哪些维度的数据(总活跃月份数、2021年活跃天数、2021年试卷作答活跃天数、2021年答题活跃天数)
两个问题分别创建临时表:
# 定位6/7级用户
WITH cnt AS (
SELECT
uid
FROM
user_info
WHERE
level in(6, 7)
),
# 利用UNION ALL联合“试卷表”和“题目表”
# 统计维度均与日期相关,可以通过聚合日期计数
aaa AS (
SELECT
uid,
# “试卷表”和“题目表” 的id分别为9和8开头,可以用来识别来源,解决“试卷作答活跃天数”和
#“答题活跃天数”
exam_id did,
# 得到年月,解决“总活跃月份数”问题
date_format(submit_time, '%Y%m') y_m,
# year=2021,所以将不是2021年的日期记作null,count不计入
case
when year(submit_time) = '2021' then date_format(submit_time, '%Y%m%d')
else null
end as y_d,
score
FROM
exam_record
UNION ALL
SELECT
uid,
question_id did,
date_format(submit_time, '%Y%m') y_m,
case
when year(submit_time) = '2021' then date_format(submit_time, '%Y%m%d')
else null
end as y_d,
score
FROM
practice_record
)
最后一步:
SELECT
c.uid,
count(distinct a.y_m) act_month_total,
# 临时表中非2021年日期是null,不参与计数
count(distinct a.y_d) act_days_2021,
# 同是利用null不参与count计数的方法
count(distinct if(left(a.did, 1) = '9', a.y_d, null)) act_days_2021_exam,
count(distinct if(left(a.did, 1) = '8', a.y_d, null)) act_days_2021_question
FROM
cnt c
LEFT JOIN aaa a USING(uid)
LEFT JOIN exam_record e USING(uid)
LEFT JOIN practice_record p USING(uid)
GROUP BY
c.uid
ORDER BY
act_month_total DESC,
act_days_2021 DESC