WITH t_test_user AS
(
SELECT *
,ROW_NUMBER() OVER(PARTITION BY uid ORDER BY dt) AS ranking
FROM(
SELECT uid, artical_id, DATE(in_time) AS dt
FROM tb_user_log
UNION
SELECT uid, artical_id, DATE(out_time) AS dt
FROM tb_user_log
) data1
)
SELECT user_grade
,ROUND(COUNT(DISTINCT uid)/(SELECT COUNT(DISTINCT uid) FROM tb_user_log), 2) AS ratio
FROM(
SELECT uid
,"新晋用户" AS user_grade
FROM t_test_user
WHERE ranking = 1 AND DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 7
UNION
SELECT uid
,"忠实用户" AS user_grade
FROM t_test_user
WHERE ranking != 1 AND DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 7
UNION
SELECT uid
,"沉睡用户" AS user_grade
FROM t_test_user
WHERE uid NOT IN(
SELECT DISTINCT uid
FROM t_test_user
WHERE DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 7
) AND DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 30
UNION
SELECT uid
,"流失用户" AS user_grade
FROM t_test_user
WHERE uid NOT IN(
SELECT DISTINCT uid
FROM t_test_user
WHERE DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 30
)
) data2
GROUP BY user_grade
ORDER BY ratio DESC
- 以上为首次完成,通过UNION进行合并,当然UNION中每个数据集中存在相同用户的多行样本
SELECT user_grade, ROUND(COUNT(uid) / MAX(user_cnt), 2) as ratio
FROM (
SELECT uid, user_cnt,
CASE
WHEN last_dt_diff >= 30 THEN "流失用户"
WHEN last_dt_diff >= 7 THEN "沉睡用户"
WHEN first_dt_diff < 7 THEN "新晋用户"
ELSE "忠实用户"
END as user_grade
FROM (
SELECT uid, user_cnt,
TIMESTAMPDIFF(DAY,first_dt,cur_dt) as first_dt_diff,
TIMESTAMPDIFF(DAY,last_dt,cur_dt) as last_dt_diff
FROM (
SELECT uid, MIN(DATE(in_time)) as first_dt,
MAX(DATE(out_time)) as last_dt
FROM tb_user_log
GROUP BY uid
) as t_uid_first_last
LEFT JOIN (
SELECT MAX(DATE(out_time)) as cur_dt,
COUNT(DISTINCT uid) as user_cnt
FROM tb_user_log
) as t_overall_info ON 1
) as t_user_info
) as t_user_grade
GROUP BY user_grade
ORDER BY ratio DESC;
- 以上为题解中他人解法,通过构造first_dt_diff与last_dt_diff衡量各用户首次登入与末次登入与当前日期的差值,从而可以直接通过CASE 设置user_grade,后者思想优于前者思想
- 注意first考虑MIN intime,last考虑MAX outtime