with sales_part as(
select
product_id,
sum(monthly_quantity) * unit_price as total_sales,
unit_price,
sum(monthly_quantity) as total_quantity,
round(sum(monthly_quantity) * unit_price / 12, 2) as avg_monthly_sales,
max(monthly_quantity) as max_monthly_quantity
from
(
select
products.product_id,
sum(quantity) as monthly_quantity,
unit_price
from
products
left join orders on products.product_id=orders.product_id and year(order_date)=2023
group by
month(order_date), product_id
) as monthly_quantity_orders
group by
product_id
),
age_part as(
select
product_id,
customer_age_group
from
(
select
product_id,
customer_age_group,
row_number() over(partition by product_id order by cnt desc, customer_age_group) as age_rank
from
(
select
product_id,
customer_age_group,
sum(quantity) as cnt
from
(
select
customer_id,
(
case
when customer_age>=1 and customer_age<=10 then "1-10"
when customer_age>=11 and customer_age<=20 then "11-20"
when customer_age>=21 and customer_age<=30 then "21-30"
when customer_age>=31 and customer_age<=40 then "31-40"
when customer_age>=41 and customer_age<=50 then "41-50"
when customer_age>=51 and customer_age<=60 then "51-60"
else "61+"
end
) as customer_age_group
from customers
) as customers_grouped
inner join orders on customers_grouped.customer_id=orders.customer_id
group by
product_id, customer_age_group
) as product_age_cnt
) as ranked_tb
where age_rank=1
)
select
sales_part.product_id,
total_sales,
unit_price,
total_quantity,
avg_monthly_sales,
max_monthly_quantity,
customer_age_group
from
sales_part
inner join age_part on sales_part.product_id=age_part.product_id
order by
total_sales desc, product_id
/*
写在最后
花了将近三个半小时才做出来,开始想直接使用窗口函数(纯窗口不使用group by),但是写到一半发现窗口函数类似给原表添加一个聚合字段,最后还是要group by,而且窗口函数结果作为子查询的话不好对子查询聚合的结果在父查询再次聚合(对于sum这一类容易出现问题),所以改用group by了,只有最后的处理customer_age_group的时候使用了row_number()排名。
对于这个问题我分了两部分分别处理(sales_part和age_part),
1、age_part:
因为需要输出的字段中除了最后一个customer_age_group是对orders inner join customers按照customer_age(需要使用case转换提前一下),product_id分组之后再按product_id使用窗口函数聚合不同customer_age_group进行排名(注意使用row_number)之外,其他的字段都是对orders和products处理就能得到的
2、sales_part:
对orders和products处理就能得到的,但是最好使用group by,除去max_monthly_quantity之外其他部分其实聚合一次就能得到,但是由于聚合两次干脆外层复用monthly_quantity,然后就是注意使用left join(虽然直接join可能也能过)
得到上面两部分直接inner join起来就行了,最后再提醒一下容易出错的地方,就是with tb_name as(...)如果有多个的格式是with t1 as(...),t2 as(...)中间一直报SQL syntax error找了半天......
*/