#去重加筛选
WITH t1 AS (
SELECT DISTINCT fdate, user_id
FROM tb_dau
WHERE fdate BETWEEN '2023-1-1' AND '2023-1-31'
),
/*
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY fdate) 排序
因为排序是等差的,所以日期减去排序,如果日期也是等差的,结果就会是同一个新的日期。
如,2022-1-10 -3 = 2022-1-7
2022-1-11 -4 = 2022-1-7
2022-1-12 -5 = 2022-1-7
2022-1-20 -6 = 2022-1-14
date_sub(日期,interval 排序 day)
*/
t2 AS (
SELECT user_id, fdate,DATE_SUB(fdate, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY fdate) day) sub_date
FROM t1
),
#将用户id和新日期分组,统计连续天数个数
t3 As (
select user_id,COUNT(*) as consec_day
from t2
group by user_id,sub_date
)
#max函数求最长连续天数
SELECT user_id,max(consec_day) as max_consec_days from t3 group by user_id;