-- Step1:找“首发完听用户”(user × song 粒度)
with base_user as (
select
s.artist_id,
s.song_id,
p.user_id,
-- 取首发日最早播放时间(用于划分时段)
min(p.play_start) as first_play_time,
-- 按最早播放时间划分时段
case
when hour(min(p.play_start)) between 6 and 11 then 'morning'
when hour(min(p.play_start)) between 12 and 17 then 'afternoon'
else 'night'
end as time_slot
from
t_song s
join
t_play p on s.song_id = p.song_id
where
-- 条件1:首发当天
date(p.play_start) = date(s.release_time)
-- 条件2:完整播放
and p.play_sec >= s.duration_sec
group by
s.artist_id,
s.song_id,
p.user_id
),
-- Step2:统计基数(分母)
cut as (
select
artist_id,
time_slot,
count(*) as base_user_cnt -- 注意:不是 distinct user,是 user × song
from
base_user
group by
artist_id,
time_slot
),
-- Step3:构造日期(D+1 / D+3 / D+7)
date_ as (
select
artist_id,
song_id,
date(release_time) as release_date,
date_add(date(release_time), interval 1 day) as d1,
date_add(date(release_time), interval 3 day) as d3,
date_add(date(release_time), interval 7 day) as d7
from
t_song
),
-- Step4:判断回流(核心逻辑)
agg as (
select
b.artist_id,
b.time_slot,
-- D+1 回流人数(user × song 维度)
count(distinct case
when date(p.play_start) = d.d1 then concat(b.user_id, '-', b.song_id)
end) as d1_cnt,
-- D+3
count(distinct case
when date(p.play_start) = d.d3 then concat(b.user_id, '-', b.song_id)
end) as d3_cnt,
-- D+7
count(distinct case
when date(p.play_start) = d.d7 then concat(b.user_id, '-', b.song_id)
end) as d7_cnt
from
base_user b
-- 关联歌曲发布时间(拿到 D+N)
join
date_ d
on b.artist_id = d.artist_id
and b.song_id = d.song_id
-- 找该用户的后续播放记录
left join
t_play p
on p.user_id = b.user_id
-- 限定:同一歌手的任意歌曲 + 完整播放
left join
t_song s2
on p.song_id = s2.song_id
and s2.artist_id = b.artist_id
and p.play_sec >= s2.duration_sec
-- 只保留“有效回流记录”
where
s2.song_id is not null
group by
b.artist_id,
b.time_slot
)
-- Step5:计算回流率
select
c.artist_id,
c.time_slot,
c.base_user_cnt,
round(ifnull(a.d1_cnt, 0) / c.base_user_cnt, 2) as d1_rate,
round(ifnull(a.d3_cnt, 0) / c.base_user_cnt, 2) as d3_rate,
round(ifnull(a.d7_cnt, 0) / c.base_user_cnt, 2) as d7_rate
from
cut c
left join
agg a
on c.artist_id = a.artist_id
and c.time_slot = a.time_slot
-- 排序规则
order by
d7_rate desc,
c.base_user_cnt desc,
c.artist_id,
case
when c.time_slot = 'morning' then 1
when c.time_slot = 'afternoon' then 2
else 3
end;