#第一步:找出user_profile表中浙江大学的数据,并和question_practice_detail连接
#第二步:将第一步生成的表连接question_detail表,构成整体表,以便后面分析,注意加一个过滤条件WHERE qd.difficult_level IS NOT NULL
#第三步:有了步骤二的表,就很容易分析数据了。这里我原本用avg()窗口函数的,但无奈结果都是0.9999,无法精准到1.0000,只能用sum(做对的题目数)除以count(各个难度的题目数量)
SELECT up.device_id, dq.question_id, dq.result FROM (SELECT device_id FROM user_profile WHERE university = "浙江大学") up LEFT JOIN question_practice_detail dq ON up.device_id = dq.device_id
#第二步:将第一步生成的表连接question_detail表,构成整体表,以便后面分析,注意加一个过滤条件WHERE qd.difficult_level IS NOT NULL
SELECT qd.difficult_level, CASE WHEN t.result = "wrong" THEN 0 ELSE 1 END "result" FROM (SELECT up.device_id, dq.question_id, dq.result FROM (SELECT device_id FROM user_profile WHERE university = "浙江大学") up LEFT JOIN question_practice_detail dq ON up.device_id = dq.device_id) t LEFT JOIN question_detail qd ON t.question_id = qd.question_id WHERE qd.difficult_level IS NOT NULL
#第三步:有了步骤二的表,就很容易分析数据了。这里我原本用avg()窗口函数的,但无奈结果都是0.9999,无法精准到1.0000,只能用sum(做对的题目数)除以count(各个难度的题目数量)
SELECT DISTINCT difficult_level, ROUND( SUM(result) over(PARTITION BY difficult_level ) / COUNT(difficult_level) over(PARTITION BY difficult_level ) ,4) "correct_rate" FROM (SELECT qd.difficult_level, CASE WHEN t.result = "wrong" THEN 0 ELSE 1 END "result" FROM (SELECT up.device_id, dq.question_id, dq.result FROM (SELECT device_id FROM user_profile WHERE university = "浙江大学") up LEFT JOIN question_practice_detail dq ON up.device_id = dq.device_id) t LEFT JOIN question_detail qd ON t.question_id = qd.question_id WHERE qd.difficult_level IS NOT NULL) tt ORDER BY correct_rate