WITH t_test_user AS
(
    SELECT *
            ,ROW_NUMBER() OVER(PARTITION BY uid ORDER BY dt) AS ranking
    FROM(
        SELECT uid, artical_id, DATE(in_time) AS dt
                
        FROM tb_user_log
        UNION
        SELECT uid, artical_id, DATE(out_time) AS dt
        FROM tb_user_log
    ) data1

)


SELECT user_grade
        ,ROUND(COUNT(DISTINCT uid)/(SELECT COUNT(DISTINCT uid) FROM tb_user_log), 2) AS ratio
FROM(
    SELECT uid
            ,"新晋用户" AS user_grade
    FROM t_test_user
    WHERE ranking = 1 AND DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 7

    UNION 
    SELECT uid
            ,"忠实用户" AS user_grade
    FROM t_test_user
    WHERE ranking != 1 AND DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 7

    UNION 
    SELECT uid
            ,"沉睡用户" AS user_grade
    FROM t_test_user
    WHERE uid NOT IN(
        SELECT DISTINCT uid
        FROM t_test_user
        WHERE DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 7
    ) AND DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 30

    UNION 
    SELECT uid
            ,"流失用户" AS user_grade
    FROM t_test_user
    WHERE uid NOT IN(
        SELECT DISTINCT uid
        FROM t_test_user
        WHERE DATEDIFF((SELECT MAX(dt) FROM t_test_user), dt) < 30
    ) 
) data2
GROUP BY user_grade
ORDER BY ratio DESC



  • 以上为首次完成,通过UNION进行合并,当然UNION中每个数据集中存在相同用户的多行样本

SELECT user_grade, ROUND(COUNT(uid) / MAX(user_cnt), 2) as ratio
FROM (
    SELECT uid, user_cnt,
        CASE
            WHEN last_dt_diff >= 30 THEN "流失用户"
            WHEN last_dt_diff >= 7 THEN "沉睡用户"
            WHEN first_dt_diff < 7 THEN "新晋用户"
            ELSE "忠实用户"
        END as user_grade
    FROM (
        SELECT uid, user_cnt,
            TIMESTAMPDIFF(DAY,first_dt,cur_dt) as first_dt_diff, 
            TIMESTAMPDIFF(DAY,last_dt,cur_dt) as last_dt_diff
        FROM (
            SELECT uid, MIN(DATE(in_time)) as first_dt,
                MAX(DATE(out_time)) as last_dt
            FROM tb_user_log
            GROUP BY uid
        ) as t_uid_first_last
        LEFT JOIN (
            SELECT MAX(DATE(out_time)) as cur_dt,
                COUNT(DISTINCT uid) as user_cnt
            FROM tb_user_log
        ) as t_overall_info ON 1
    ) as t_user_info
) as t_user_grade
GROUP BY user_grade
ORDER BY ratio DESC;

  • 以上为题解中他人解法,通过构造first_dt_diff与last_dt_diff衡量各用户首次登入与末次登入与当前日期的差值,从而可以直接通过CASE 设置user_grade,后者思想优于前者思想
  • 注意first考虑MIN intime,last考虑MAX outtime