# 新晋用户:近7天新增                      n7 = 1
# 忠实用户:不是近7天新增,近7天活跃          n7 = 0 & a7 =1 
# 沉睡用户:不是近7天新增,近7天不活跃        n7 = 0 & a7 =0
# 流失用户:不是近7天新增,近30天不活跃        n7 = 0 & a30 =0

# 有2个标签: 1,n7, 是不是7日新增,是 = 1,不是=0        
#           2,a7, 是不是7日活跃,是 = 1, 不是=0
#           3,a30, 是不是30 日活跃,是 = 1, 不是=0


# 第一步,先看新增情况,用户是不是7日内新增 新增时间 > 今天-6  ----注意不要写反了
# 按照用户id, in_time 顺序排序,然后看他的第一次登陆时间
# ----- FIRST_VALUE(in_time) over (partition by uid order by in_time)
# 然后看看首次登陆时间,是不是晚于 今天减去六天,今天减去六天应该是 
# ----- DATE_ADD("2021-11-04", INTERVAL -6 DAY) 或者
# ----- DATE_SUB("2021-11-04", INTERVAL 6 DAY)
# 然后case when,如果首次登陆时间 晚于7天前,那么 n7 = 1, 否则为0.这样就可以得到

with new_tag as(
select uid,
        (case when (FIRST_VALUE(in_time) over (partition by uid order by in_time)) >  DATE_ADD("2021-11-04", INTERVAL -6 DAY) then 1 else 0 end ) as n7 # 是不是近7天新增
    from tb_user_log
),

# 第二步,看活跃情况,
# 联合登入时间和登出时间,得出用户的活跃时间,去重
active_days as(
select uid,date(in_time) as active_date from tb_user_log
union 
select uid,date(out_time) as active_date from tb_user_log
order by uid, active_date
),

# 如果最后一次活跃时间 大于 (今天减去6天), 那么就是7日活跃,a7 = 1, 否则 a7 = 0
# 如果最后一次活跃时间 大于 (今天减去29天), 那么就是30日活跃,a30 = 1, 否则 a7 = 0

active_tag as(
select uid, active_date,
       (case when (FIRST_VALUE(active_date) over (partition by uid order by active_date desc)) >  DATE_ADD("2021-11-04", INTERVAL -6 DAY) then 1 else 0 end ) as a7,
       (case when (FIRST_VALUE(active_date) over (partition by uid order by active_date desc)) >  DATE_ADD("2021-11-04", INTERVAL -29 DAY) then 1 else 0 end ) as a30
from active_days
),


# 第三步,给用户,附上标签
tagged_user as (
select l.uid,n7,a7,a30,
       ( case when n7=1 then "新晋用户"
            when n7=0 and a7=1 then "忠实用户"
            when n7=0 and a7=0 and a30 = 1 then "沉睡用户"
            when n7=0 and a30=0 then "流失用户"
            else "None" end) as user_grade
from tb_user_log l
left join new_tag on l.uid = new_tag.uid
left join active_tag on l.uid =active_tag.uid
),

# 第四步,要用户ID,用户标签,去重
distinct_user as(
select distinct uid,user_grade
from tagged_user
)

# 第五步,计算各个类别的占比, 先算各个类别的数量: count(*) over(partition by user_grade)
#                  再算所有user的数量:(select count(distinct uid) from tb_user_log)
 select distinct user_grade,
       round(count(*) over(partition by user_grade) / (select count(distinct uid)
       from tb_user_log),2) as ratio
 from distinct_user
 order by ratio desc