#第一步: 去重,原表数据有个别重复的,这些会影响统计,先做去重筛选工作
SELECT DISTINCT device_id,question_id, result, DATE FROM question_practice_detail
#第二步:按照device_id分组,以date排序,用窗口函数lead,构造一个字段next_day,表示当前日期的后一条记录
SELECT question_id, result, device_id,DATE, lead(DATE,1) over(PARTITION BY device_id ORDER BY DATE) "next_day" FROM (SELECT DISTINCT device_id,question_id, result, DATE FROM question_practice_detail) t
#第三步:判断date和nexy_day是否只差一天,是的话表示该用户第二天有继续答题,赋值1以资鼓励,否则就位0
SELECT *,IF(DATE + INTERVAL 1 DAY = next_day,1,0) "next_day_question" FROM (SELECT question_id, result, device_id,DATE, lead(DATE,1) over(PARTITION BY device_id ORDER BY DATE) "next_day" FROM (SELECT DISTINCT device_id,question_id, result, DATE FROM question_practice_detail) t) tt
#第四步:根据第三步,赋值1的都是第二天答题的用户,这时候可以求平均率了
SELECT ROUND(SUM(next_day_question) / COUNT(*),4) "avg_ret" FROM (SELECT *,IF(DATE + INTERVAL 1 DAY = next_day,1,0) "next_day_question" FROM (SELECT question_id, result, device_id,DATE, lead(DATE,1) over(PARTITION BY device_id ORDER BY DATE) "next_day" FROM (SELECT DISTINCT device_id,question_id, result, DATE FROM question_practice_detail) t) tt) ttt