# 思路:
# 1、利用video_id将两表拼起来,保留author,month(start_time),if_follow
# 2、设置新字段cum_cnt,if_follow=1 为1,if_follow=2 为-1,else 为0 用于后续计算当前粉丝
# 3、设置新字段max_date max(start_time) over(partition by month(start_time)) 选取每个月的最大日期,每个月最大日期对应的max_date_sum是截至当月的累计粉丝数
# 4、利用author month(start_time)分组,sum(cum_cnt)计算当月累计粉丝数量,count(*)计算当月总播放量
# 5、设置新字段:max_date_sum,利用sum(cum_cnt) over(partition by author order by author,month(start_time))

select distinct t3.author,t3.month_date,t3.fans_growth_rate,t4.total_fans from 
(
# 第4步:生成每个创作者每个月的涨粉率
select t1.author,t1.month_date,round(sum(t1.cum_cnt)/count(*),3) as fans_growth_rate
 from (
# 第1、2、3步如下
select v1.author,u1.start_time,date_format(u1.start_time,'%Y-%m') as month_date,
max(start_time) over(partition by month(start_time)) as max_date,
(case when if_follow = 1 then 1 
 when if_follow = 2 then -1 else 0 end) as cum_cnt from tb_user_video_log as u1 
 left join tb_video_info as v1 on u1.video_id=v1.video_id
     where year(u1.start_time)=2021) t1 
 group by t1.author,t1.month_date ) t3 
 inner join 
 (
#  第5步,计算每个用户截至当前月的累计粉丝数
#      注意:因为可能存在一个用户在同一天登录多次,存在重复的记录,需要去重
 select t2.author,t2.month_date,
 t2.total_fans from (
     select t5.author,t5.month_date,t5.start_time,t5.max_date,
     sum(t5.cum_cnt) over(partition by t5.author order by t5.month_date)  as total_fans 
     from 
 (
 select v1.author,u1.start_time,date_format(u1.start_time,'%Y-%m') as month_date,
max(u1.start_time) over(partition by v1.author,month(u1.start_time)) as max_date,
(case when if_follow = 1 then 1 
 when if_follow = 2 then -1 else 0 end) as cum_cnt from tb_user_video_log as u1 
 left join tb_video_info as v1 on u1.video_id=v1.video_id
     where year(u1.start_time)=2021) t5  )t2
 where t2.start_time=t2.max_date 
 ) t4 
 on  t3.author=t4.author and t3.month_date = t4.month_date 
 order by t3.author,t4.total_fans