1.首先建立用户活跃度表格,划分用户必须依照活跃度表格
2.用户划分依据
这里我用的是找到每个用户最早的登陆时间记为min_date, 以及最新的登陆时间为max_date进行判断
-- 忠实用户(近7天活跃过且非新晋用户)(非近7天新增):min_date<(今天-6天)AND max_date >=( 今天-6天)
-- 新晋用户(近7天新增): min_date >= (今天-6天)OR max_date >=(今天-6天)【可省略】
-- 沉睡用户(近7天未活跃但更早前活跃过): max_date < (今天-6天) AND min_date>=(今天-29天)
-- 流失用户(近30天未活跃但更早前活跃过): min_date <(今天-29天)【可省略】 OR max_date <(今天-29天)
3.根据提到过的条件,我们就还需要存储今天
用cte进行存储大量变量,得到结果
WITH base AS (
SELECT uid,
MIN(dt) AS min_date,
MAX(dt) AS max_date
FROM (
SELECT uid,
DATE(in_time) AS dt
FROM tb_user_log
UNION
SELECT uid,
DATE(out_time) AS dt
FROM tb_user_log
) AS tmp
GROUP BY uid
),
max_date_cte AS (
SELECT MAX(max_date) AS max_dt
FROM base
),
user_classification AS (
SELECT *,
CASE
WHEN min_date < DATE_SUB((SELECT max_dt FROM max_date_cte), INTERVAL 6 DAY)
AND max_date >= DATE_SUB((SELECT max_dt FROM max_date_cte), INTERVAL 6 DAY)
THEN "忠实用户"
WHEN min_date >= DATE_SUB((SELECT max_dt FROM max_date_cte), INTERVAL 6 DAY)
THEN "新晋用户"
WHEN min_date >= DATE_SUB((SELECT max_dt FROM max_date_cte), INTERVAL 29 DAY)
AND max_date < DATE_SUB((SELECT max_dt FROM max_date_cte), INTERVAL 6 DAY)
THEN "沉睡用户"
WHEN max_date < DATE_SUB((SELECT max_dt FROM max_date_cte), INTERVAL 29 DAY)
THEN "流失用户"
END AS user_grade
FROM base
)
SELECT user_grade,
ROUND(COUNT(*) / (SELECT COUNT(uid) FROM base), 2) AS ratio
FROM user_classification
GROUP BY user_grade
ORDER BY ratio DESC;