# 题目:找出近一个月发布的视频中热度最高的top3视频。 # 最近播放日期以end_time-结束观看时间为准,假设为T,则最近一个月按[T-29, T]闭区间统计。 # 结果中热度保留为整数,并按热度降序排序。 # 输出结果为video_id, hot_index # 第一步:确定最后日期: WITH max_end_date AS ( SELECT MAX(end_time) AS max_date FROM tb_user_video_log ), # 第二步,确定近一个月内发布的歌曲 recent_videos AS ( SELECT video_id, release_time FROM tb_video_info WHERE release_time BETWEEN (SELECT max_date - INTERVAL 29 DAY FROM max_end_date) AND (SELECT max_date FROM max_end_date) ), # 第三步,确定一个月内歌曲的播放情况 song_play_info AS( select i.video_id, start_time, end_time, (case when timestampdiff(second,start_time,end_time) >= duration then 1 else 0 end) if_full, #是否完播 if_like, if_retweet, (case when comment_id is not null then 1 else 0 end) as if_commented from tb_user_video_log l join tb_video_info i on l.video_id = i.video_id where i.video_id in (select video_id from recent_videos) # 确保是最近1个月发布的歌曲 and end_time between (SELECT max_date - INTERVAL 29 DAY FROM max_end_date) AND (SELECT max_date FROM max_end_date) # 确保是近一个月的播放数据 ), # 第四步,统计情况 # 第三步,对每个指标进行计算,并且限定在最近一个月内 # 先计算每个视频的完播率。完播率 = 完播的次数/播放总次数 !!!需要加一列,是否完播 # 点赞数 = sum(if_like) # 评论数 = sum(if_commented) # 转发数 = sum(if_retweet) # 第四步,新鲜度的计算:新鲜度=1/(最近无播放天数+1) # 最近无播放天数的计算:这个视频最后一次播放日期,和所有视频的最后一个播放日期的差值 indexes as ( select video_id, sum(if_full)/count(*) as full_play_rate, sum(if_like) as like_cnt, sum(if_commented) as comment_cnt, sum(if_retweet) as retweet_cnt, datediff((select max(end_time) from tb_user_video_log),max(end_time)) as no_play from song_play_info group by video_id ) # 注意DATEDIFF的精度问题,2022-05-08 21:16 和2022-05-09 21:15使用timestampdiff的话就是0,相差不到一天,使用DATE DIFF就没有这个问题了 select video_id, round((100 * full_play_rate + 5 * like_cnt + 3 * comment_cnt + 2 * retweet_cnt) * (1/(no_play+1)),0) as hot_index from indexes order by hot_index desc limit 3 # 不要忘记这个