# 新晋用户:近7天新增 n7 = 1 # 忠实用户:不是近7天新增,近7天活跃 n7 = 0 & a7 =1 # 沉睡用户:不是近7天新增,近7天不活跃 n7 = 0 & a7 =0 # 流失用户:不是近7天新增,近30天不活跃 n7 = 0 & a30 =0 # 有2个标签: 1,n7, 是不是7日新增,是 = 1,不是=0 # 2,a7, 是不是7日活跃,是 = 1, 不是=0 # 3,a30, 是不是30 日活跃,是 = 1, 不是=0 # 第一步,先看新增情况,用户是不是7日内新增 新增时间 > 今天-6 ----注意不要写反了 # 按照用户id, in_time 顺序排序,然后看他的第一次登陆时间 # ----- FIRST_VALUE(in_time) over (partition by uid order by in_time) # 然后看看首次登陆时间,是不是晚于 今天减去六天,今天减去六天应该是 # ----- DATE_ADD("2021-11-04", INTERVAL -6 DAY) 或者 # ----- DATE_SUB("2021-11-04", INTERVAL 6 DAY) # 然后case when,如果首次登陆时间 晚于7天前,那么 n7 = 1, 否则为0.这样就可以得到 with new_tag as( select uid, (case when (FIRST_VALUE(in_time) over (partition by uid order by in_time)) > DATE_ADD("2021-11-04", INTERVAL -6 DAY) then 1 else 0 end ) as n7 # 是不是近7天新增 from tb_user_log ), # 第二步,看活跃情况, # 联合登入时间和登出时间,得出用户的活跃时间,去重 active_days as( select uid,date(in_time) as active_date from tb_user_log union select uid,date(out_time) as active_date from tb_user_log order by uid, active_date ), # 如果最后一次活跃时间 大于 (今天减去6天), 那么就是7日活跃,a7 = 1, 否则 a7 = 0 # 如果最后一次活跃时间 大于 (今天减去29天), 那么就是30日活跃,a30 = 1, 否则 a7 = 0 active_tag as( select uid, active_date, (case when (FIRST_VALUE(active_date) over (partition by uid order by active_date desc)) > DATE_ADD("2021-11-04", INTERVAL -6 DAY) then 1 else 0 end ) as a7, (case when (FIRST_VALUE(active_date) over (partition by uid order by active_date desc)) > DATE_ADD("2021-11-04", INTERVAL -29 DAY) then 1 else 0 end ) as a30 from active_days ), # 第三步,给用户,附上标签 tagged_user as ( select l.uid,n7,a7,a30, ( case when n7=1 then "新晋用户" when n7=0 and a7=1 then "忠实用户" when n7=0 and a7=0 and a30 = 1 then "沉睡用户" when n7=0 and a30=0 then "流失用户" else "None" end) as user_grade from tb_user_log l left join new_tag on l.uid = new_tag.uid left join active_tag on l.uid =active_tag.uid ), # 第四步,要用户ID,用户标签,去重 distinct_user as( select distinct uid,user_grade from tagged_user ) # 第五步,计算各个类别的占比, 先算各个类别的数量: count(*) over(partition by user_grade) # 再算所有user的数量:(select count(distinct uid) from tb_user_log) select distinct user_grade, round(count(*) over(partition by user_grade) / (select count(distinct uid) from tb_user_log),2) as ratio from distinct_user order by ratio desc