# 根据题目含义可知,要根据用户活跃天数来分类用户,使用分组查询和窗口函数
# 将用户行为日志每条记录的in_time和out_time提取作为两条记录
with t as(
select uid, in_time hy_time
from tb_user_log
union
select uid, out_time
from tb_user_log
)
# 统计用户分级情况及各类用户所占比例
select user_grade, round(count(*)/sum(count(*))over(),2) ratio
from (
# 根据用户活跃情况来分类用户
select uid, case
when datediff(date(max(today)), date(min(hy_time))) <= 6 then '新晋用户'
when datediff(date(max(today)), date(max(hy_time))) <= 6 then '忠实用户'
when datediff(date(max(today)), date(max(hy_time))) <= 29 then '沉睡用户'
else '流失用户'
end as user_grade
from (
# 对表t使用窗口函数dense_rank()over()按uid分组来排名
select uid, hy_time, dense_rank()over(partition by uid order by hy_time) rank_t, max(hy_time)over() today
from t
) t1
where rank_t = 1 or rank_t = (
# 当rank_t等于该用户的最大活跃时间的排名时,即该用户的最近活跃时间予以保留
select max(rank_t)
from (
select uid, hy_time, dense_rank()over(partition by uid order by hy_time) rank_t
from t
) t2
where t2.uid = t1.uid
)
group by uid
) k
group by user_grade
order by ratio desc, user_grade;
好笨重的代码,但是凭自己脑子想出来的,我也不算太嫌弃,起码思路还算正确。另外,测试用例中沉睡用户和流失用户的比例一样,如果只按照ratio排序的话,这两类用户的顺序会颠倒不同于期望结果,于是加了一个user_grade排序保证其能通过测试用例。



京公网安备 11010502036488号