WITH t1 AS (
-- 1) 先筛选 2023-01-01 ~ 2023-01-31 的活跃数据,并去重
-- (避免同一用户同一天多条记录影响连续天数统计)
SELECT DISTINCT
fdate,
user_id
FROM tb_dau
WHERE fdate BETWEEN '2023-01-01' AND '2023-01-31'
),
t2 AS (
-- 2) 给每个用户按日期升序编号(行号)
-- 连续日期配合 row_number() 可用于构造“连续段标识”
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY user_id
ORDER BY fdate
) AS rnk
FROM t1
),
t3 AS (
-- 3) 用“日期 - 行号”的方式识别连续登录区间
-- 连续日期减去对应行号后,会得到相同的 same_day(锚点日期)
-- 然后按 user_id + same_day 分组,统计每段连续天数
SELECT
user_id,
DATE_SUB(fdate, INTERVAL rnk DAY) AS same_day,
COUNT(*) AS consec_day
FROM t2
GROUP BY
user_id,
same_day
)
-- 4) 对每个用户取最长连续登录天数
SELECT
user_id,
MAX(consec_day) AS max_consec_days
FROM t3
GROUP BY user_id;