with t1 as(
    select user_id, count(continday) as consec_days
    from
    (
        select user_id, 
        # data_sub函数
        # 用登陆日期-连续递增的数字(排序的位次),
        # 如果日期连续,就会产生相同的结果
        date_sub(fdate, interval (
            dense_rank() 
            over (partition by user_id order by fdate)
        ) day
        ) continday
        from tb_dau
        where year(fdate) = 2023 and month(fdate) = 1
    ) t2
    # 对计算结果进行分组,相同的在一组,表示连续登录
    group by user_id, continday
)

select user_id, max(consec_days) as max_consec_days
from t1
group by user_id
WITH ordered_logins AS (
    SELECT 
        user_id,
        fdate,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY fdate) AS rn
    FROM 
        tb_dau
    WHERE 
        fdate BETWEEN '2023-01-01' AND '2023-01-31'
),
grouped_logins AS (
    SELECT 
        user_id,
        fdate,
        rn,
        DATE_SUB(fdate, INTERVAL rn DAY) AS grp
    FROM 
        ordered_logins
)
SELECT 
    user_id,
    MAX(consec_days) AS max_consec_days
FROM (
    SELECT 
        user_id,
        grp,
        COUNT(*) AS consec_days
    FROM 
        grouped_logins
    GROUP BY 
        user_id, grp
) AS consecutive
GROUP BY 
    user_id;

两方案逻辑完全一致,只是用 dense_rank() 替代了 ROW_NUMBER()。两者的区别:

  • dense_rank():排名不跳过重复值(但在这个场景中,日期是唯一的,所以和 ROW_NUMBER() 效果相同)。
  • ROW_NUMBER():严格按顺序生成唯一排名。