题目拆解为两个部分:

  • 哪些用户(6/7级)
  • 统计哪些维度的数据(总活跃月份数、2021年活跃天数、2021年试卷作答活跃天数、2021年答题活跃天数)

两个问题分别创建临时表:

# 定位6/7级用户
WITH cnt AS (
  SELECT
    uid
  FROM
    user_info
  WHERE
    level in(6, 7)
),
# 利用UNION ALL联合“试卷表”和“题目表”
# 统计维度均与日期相关,可以通过聚合日期计数
aaa AS (
  SELECT
    uid,
    # “试卷表”和“题目表” 的id分别为9和8开头,可以用来识别来源,解决“试卷作答活跃天数”和
    #“答题活跃天数”
    exam_id did,
    # 得到年月,解决“总活跃月份数”问题
    date_format(submit_time, '%Y%m') y_m,
    # year=2021,所以将不是2021年的日期记作null,count不计入
    case
      when year(submit_time) = '2021' then date_format(submit_time, '%Y%m%d')
      else null
    end as y_d,
    score
  FROM
    exam_record
  UNION ALL
  SELECT
    uid,
    question_id did,
    date_format(submit_time, '%Y%m') y_m,
    case
      when year(submit_time) = '2021' then date_format(submit_time, '%Y%m%d')
      else null
    end as y_d,
    score
  FROM
    practice_record
)

最后一步:

SELECT
  c.uid,
  count(distinct a.y_m) act_month_total,
  # 临时表中非2021年日期是null,不参与计数
  count(distinct a.y_d) act_days_2021,
  # 同是利用null不参与count计数的方法
  count(distinct if(left(a.did, 1) = '9', a.y_d, null)) act_days_2021_exam,
  count(distinct if(left(a.did, 1) = '8', a.y_d, null)) act_days_2021_question
FROM
  cnt c
  LEFT JOIN aaa a USING(uid)
  LEFT JOIN exam_record e USING(uid)
  LEFT JOIN practice_record p USING(uid)
GROUP BY
  c.uid
ORDER BY
  act_month_total DESC,
  act_days_2021 DESC