with t1 as(
select user_id, count(continday) as consec_days
from
(
select user_id,
# data_sub函数
# 用登陆日期-连续递增的数字(排序的位次),
# 如果日期连续,就会产生相同的结果
date_sub(fdate, interval (
dense_rank()
over (partition by user_id order by fdate)
) day
) continday
from tb_dau
where year(fdate) = 2023 and month(fdate) = 1
) t2
# 对计算结果进行分组,相同的在一组,表示连续登录
group by user_id, continday
)
select user_id, max(consec_days) as max_consec_days
from t1
group by user_id
WITH ordered_logins AS (
SELECT
user_id,
fdate,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY fdate) AS rn
FROM
tb_dau
WHERE
fdate BETWEEN '2023-01-01' AND '2023-01-31'
),
grouped_logins AS (
SELECT
user_id,
fdate,
rn,
DATE_SUB(fdate, INTERVAL rn DAY) AS grp
FROM
ordered_logins
)
SELECT
user_id,
MAX(consec_days) AS max_consec_days
FROM (
SELECT
user_id,
grp,
COUNT(*) AS consec_days
FROM
grouped_logins
GROUP BY
user_id, grp
) AS consecutive
GROUP BY
user_id;
两方案逻辑完全一致,只是用 dense_rank() 替代了 ROW_NUMBER()。两者的区别:
dense_rank():排名不跳过重复值(但在这个场景中,日期是唯一的,所以和ROW_NUMBER()效果相同)。ROW_NUMBER():严格按顺序生成唯一排名。

京公网安备 11010502036488号