-- 一开始的想法,不过没有实施

-- 每个日期新用户的次日留存率
-- 从第一天开始,不要最后一天
-- 首先生成一个每天的新用户的表
-- 其次生成一个每天用户登录表,并将其时间减1
-- 最后联立这两个表

-- 第二种想法
select date, case when p is null then round(0.000,3) else p end as p
from
(select distinct date as date from login) as d
left outer join 
(
select first_date, round(count(b.user_id)/count(a.user_id),3) as p
from
(select user_id, min(date) as first_date from login group by user_id) as a
left outer join
(select user_id, date as second_date
from login 
where (user_id, date) in (select user_id, date_add(min(date) ,interval 1 day)  from login group by user_id)) as b
on a.user_id = b.user_id
group by a.first_date) as c
on c.first_date = d.date
order by date;

就是在求总体次日留存率的代码的基础上,进行了分组。

然后,在分组的情况下,只能求出有留存率的日期,此时再联立全部日期临时表,就可以求出来最终结果。