【类型】:时间函数经典题
【场景】:留存率、复购率、
【分类】:时间函数
分析思路
难点:
1.时间函数的应用 2.复购率
(1)统计每个货物用户的购买记录;利用窗口函数根据货号、用户分组按购买时间排序
- [使用] row_number();timestampdiff()
(2)统计零食类商品中复购率top3高的商品
求解代码
在时间的处理上,可能会遇到的问题,关于timestampdiff
错误代码1
#统计零食类商品中复购率top3高的商品
select
product_id,
round(count(distinct if(ranking >= 2,uid,null))/count(distinct uid),3) rate
from(
#统计每个货物用户的购买记录;利用窗口函数根据货号、用户分组按购买时间排序
select
product_id,
uid,
row_number() over(partition by product_id,uid order by event_time) ranking
from tb_order_detail
left join tb_product_info using(product_id)
left join tb_order_overall using (order_id)
where tag='零食'
and status=1
and 90 > timestampdiff(day,event_time,(
select
max(date(event_time))
from tb_order_overall))
) main
group by product_id
order by rate desc,product_id
limit 3
错误代码2
#统计零食类商品中复购率top3高的商品
select
product_id,
round(count(distinct if(ranking >= 2,uid,null))/count(distinct uid),3) rate
from(
#统计每个货物用户的购买记录;利用窗口函数根据货号、用户分组按购买时间排序
select
product_id,
uid,
row_number() over(partition by product_id,uid order by event_time) ranking
from tb_order_detail
left join tb_product_info using(product_id)
left JOIN tb_order_overall using (order_id)
where tag='零食'
and status=1
and 90 > timestampdiff(date(
(select
max(event_time)
from tb_order_overall)),date(event_time))
) main
group by product_id
order by rate desc,product_id
limit 3
不通过案例输出
1 8002|0.500
2 8004|0.500 1 8004|0.500
3 8003|0.333 2 8003|0.333
3 8002|0.000
在时间的处理上,为什么使用timestampdiff()会导致提交不通过,但使用datediff()和date_sub就可以,因为用错了timestampdiff()
正确代码
方法一
with子句
#统计零食类商品中复购率top3高的商品
with
main as(
#统计近90天内购买商品至少两次的人数
select
uid,
product_id,
count(distinct uid) as payusr_num
from tb_product_info
join tb_order_detail using(product_id)
join tb_order_overall using(order_id)
where tag = '零食'
and status=1
and date(event_time) > (
select
date(date_sub(max(event_time),interval 90 day))
from tb_order_overall)
group by product_id,uid having count(*) >= 2
)
,attr as(
#统计近90天内购买商品的总人数
select
product_id,
if(count(distinct uid) = 0,0,count(distinct uid)) as usr_num
from tb_product_info
join tb_order_detail using(product_id)
join tb_order_overall using(order_id)
where tag = '零食'
and status=1
and date(event_time) > (
select
date(date_sub(max(event_time),interval 90 day))
from tb_order_overall)
group by product_id
)
select
product_id,
round(if(payusr_num is null,0,payusr_num/usr_num),3) as repurchase_rate
from main
right join attr using(product_id)
order by repurchase_rate desc
limit 3
使用datediff()
#统计零食类商品中复购率top3高的商品
select
product_id,
round(count(distinct if(ranking >= 2,uid,null))/count(distinct uid),3) rate
from(
#统计每个货物用户的购买记录;利用窗口函数根据货号、用户分组按购买时间排序
#在时间的处理上,这道题不知道为什么不能使用timestampdiff(),会导致提交不通过
#使用datediff()和date_sub就可以
select
product_id,
uid,
row_number() over(partition by product_id,uid order by event_time) ranking
from tb_order_detail
left join tb_product_info using(product_id)
left JOIN tb_order_overall using (order_id)
where tag='零食'
and status=1
and datediff(date(
(select
max(event_time)
from tb_order_overall)),date(event_time)) < 90
) main
group by product_id
order by rate desc,product_id
limit 3
方法二
多表连接
#统计零食类商品中复购率top3高的商品
select
product_id,
round(count(distinct if(ranking >= 2,uid,null))/count(distinct uid),3) rate
from(
#统计每个货物用户的购买记录;利用窗口函数根据货号、用户分组按购买时间排序
#在时间的处理上,这道题不知道为什么不能使用timestampdiff(),会导致提交不通过
#使用datediff()和date_sub就可以
select
product_id,
uid,
row_number() over(partition by product_id,uid order by event_time) ranking
from tb_order_detail
left join tb_product_info using(product_id)
left JOIN tb_order_overall using (order_id)
where tag='零食'
and status=1
and date(event_time) > date(
(select
date_sub(max(event_time),interval 90 day)
from tb_order_overall)
)
) main
group by product_id
order by rate desc,product_id
limit 3
输出结果
运行main表
1 8001|102|1
2 8001|102|2
3 8002|101|1
4 8002|101|2
5 8002|105|1
6 8003|101|1
7 8003|102|1
8 8003|102|2
9 8003|104|1