# 题目:找出近一个月发布的视频中热度最高的top3视频。

# 最近播放日期以end_time-结束观看时间为准,假设为T,则最近一个月按[T-29, T]闭区间统计。
# 结果中热度保留为整数,并按热度降序排序。
# 输出结果为video_id, hot_index

# 第一步:确定最后日期:
WITH max_end_date AS (
    SELECT MAX(end_time) AS max_date FROM tb_user_video_log
),

# 第二步,确定近一个月内发布的歌曲

recent_videos AS (
                 SELECT video_id, release_time
                 FROM tb_video_info
                 WHERE release_time BETWEEN (SELECT max_date - INTERVAL 29 DAY FROM max_end_date) 
                                        AND (SELECT max_date FROM max_end_date)
    ),

# 第三步,确定一个月内歌曲的播放情况

song_play_info AS(
                select i.video_id, 
                        start_time,
                        end_time,
                        (case when timestampdiff(second,start_time,end_time) >= duration then 1 
                        else 0 end) if_full, #是否完播
                        if_like, 
                        if_retweet,
                        (case when comment_id is not null then 1 else 0 end) as if_commented 
                from tb_user_video_log l
                join tb_video_info i
                on l.video_id = i.video_id
                where i.video_id in (select video_id from recent_videos) # 确保是最近1个月发布的歌曲
                  and end_time between (SELECT max_date - INTERVAL 29 DAY FROM max_end_date)
                                  AND (SELECT max_date FROM max_end_date)  # 确保是近一个月的播放数据
),
# 第四步,统计情况
# 第三步,对每个指标进行计算,并且限定在最近一个月内
            # 先计算每个视频的完播率。完播率 = 完播的次数/播放总次数 !!!需要加一列,是否完播
            # 点赞数 = sum(if_like)
            # 评论数 = sum(if_commented) 
            # 转发数 = sum(if_retweet)
            # 第四步,新鲜度的计算:新鲜度=1/(最近无播放天数+1) 
            # 最近无播放天数的计算:这个视频最后一次播放日期,和所有视频的最后一个播放日期的差值

 indexes as (
            select video_id,
                sum(if_full)/count(*) as full_play_rate, 
                sum(if_like) as like_cnt,
                sum(if_commented) as comment_cnt,
                sum(if_retweet) as retweet_cnt,
                datediff((select max(end_time) from tb_user_video_log),max(end_time)) as no_play
            from song_play_info
            group by video_id
 )
 # 注意DATEDIFF的精度问题,2022-05-08 21:16 和2022-05-09 21:15使用timestampdiff的话就是0,相差不到一天,使用DATE DIFF就没有这个问题了

select video_id,
round((100 * full_play_rate + 5 * like_cnt + 3 * comment_cnt + 2 * retweet_cnt) 
* (1/(no_play+1)),0) as hot_index
from indexes
order by hot_index desc
limit 3 # 不要忘记这个