快速入门

使用 user_profile 表 1-17题

1. 查询多列

-- 创建表 user_profile
drop table if exists user_profile;
CREATE TABLE `user_profile` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`gender` varchar(14) NOT NULL,
`age` int ,
`university` varchar(32) NOT NULL,
`province` varchar(32)  NOT NULL);
INSERT INTO user_profile VALUES(1,2138,'male',21,'北京大学','BeiJing');
INSERT INTO user_profile VALUES(2,3214,'male',null,'复旦大学','Shanghai');
INSERT INTO user_profile VALUES(3,6543,'female',20,'北京大学','BeiJing');
INSERT INTO user_profile VALUES(4,2315,'female',23,'浙江大学','ZheJiang');
INSERT INTO user_profile VALUES(5,5432,'male',25,'山东大学','Shandong');

-- 查询多列
SELECT  device_id,gender,age,university
from user_profile

2. 查询所有列

select * from user_profile;
-- 性能优化更好
SELECT id,device_id,gender,age,university,province from user_profile;

3. 查询结果去重

select DISTINCT university from user_profile;
-- 以 分组 来筛选出 去重的结果
select university from user_profile GROUP BY university;

4. 查询结果限制返回行数

-- 查看 前2个用户 明细 设备ID数据
SELECT device_id from user_profile LIMIT 2
select device_id FROM user_profile WHERE id in(1,2)
select device_id FROM user_profile WHERE id <= 2
select device_id FROM user_profile limit 0,2

5. 将查询后的列重新命名

-- device_id 改名为 user_infos_example
select device_id as user_infos_example from user_profile LIMIT 2

6. 查找学校是北大的学生信息

-- 请你从 用户信息表中 取出 满足条件的数据, 结果 返回设备id 和 学校
select device_id,university from user_profile where university = "北京大学";

7. 查找年龄大于24岁的用户信息

-- 省略了 as (user_profile as u)
SELECT u.device_id,u.gender,u.age,u.university  from user_profile u where age > 24;

8. 查找某个年龄段的用户信息

-- 针对 20岁及以上 且 23岁及以下的 用户 开展分析, 请你取出 满足条件的 设备ID、性别、年龄.
select device_id,gender,age from user_profile where age between 20 and 23 
SELECT device_id,gender,age from user_profile where age>=20 and age<=23
SELECT device_id,gender,age FROM user_profile WHERE age IN (20,21,22,23)

9. 查找除复旦大学的用户信息

-- 查看 除复旦大学以外的 所有用户明细
SELECT device_id,gender,age,university from user_profile 
where university != '复旦大学'
-- where university not like '复旦大学'
-- where university not in ('复旦大学')

10. 用 where 过滤空值练习

-- 取出所有 年龄值不为空的用户的 设备ID,性别,年龄,学校的信息
-- 过滤空值的三种方法: 
-- (1) where 列名 is not null
-- (2) where 列名 != 'null'  比较
-- (3) where 列名 <> 'null'  正则表达式
select device_id,gender,age,university from user_profile WHERE age != 'null'

11. 高级操作符练习(1)

-- 找到 男性 且 GPA在3.5以上(不包括3.5)的 用户
 select device_id,gender,age,university,gpa from user_profile 
 where gender = 'male' and gpa > 3.5 
 -- where gender in ('male') and gpa >3.5

12. 高级操作符练习(2）

-- 学校为 北大 或 GPA在3.7以上(不包括3.7)的 用户 (使用 OR 实现) 
-- 不考虑 索引 和 去重、 device_id 升序处理
select device_id,gender,age,university,gpa from user_profile 
where university = '北京大学' or gpa > 3.7

13. where in 和 not in

-- 找到 学校为北大、复旦和山大的 同学

-- not in 通不过 测试案例
select  device_id,gender,age,university,gpa from user_profile 
where university not in ('浙江大学')

select  device_id,gender,age,university,gpa from user_profile 
where university in ('北京大学', '复旦大学', '山东大学')
-- WHERE university='北京大学' or university='复旦大学' or university='山东大学'

14. 操作符混合运用

-- 找到 gpa在3.5以上(不包括3.5)的山东大学用户 或 gpa在3.8以上(不包括3.8)的复旦大学同学
-- 解法1, 可以 不套括号, and 的优先级 比 or 高
select device_id,gender,age,university,gpa from user_profile WHERE (gpa>3.5 and university = '山东大学') or (gpa>3.8 and university = '复旦大学')
                                            
-- 解法2, union
SELECT device_id, gender, age, university,gpa from user_profile where gpa > 3.8 and university = '复旦大学' UNION
SELECT device_id, gender, age, university,gpa from user_profile where gpa > 3.5 and university = '山东大学'

15. 查看学校名称中含北京的用户

-- 查看 所有大学中带有北京的用户的 信息
-- 匹配串中可包含如下四种通配符:
-- _ 表示 匹配 任意一个字符;
-- % 表示 匹配 0个 或 多个字符;
-- [ ] 表示 匹配 [ ] 中的 任意一个字符 (若要 比较的字符 是连续的, 则可以用 连字符 "-" 表达);
-- [^ ] 表示 不匹配 [ ] 中的 任意一个字符
select device_id,age,university from user_profile where university LIKE '%北京%'

16. 查找GPA 最高值

-- 复旦大学学生 gpa最高值
select MAX(gpa) as gpa from user_profile where university = '复旦大学'
-- 解法2： 排序取 第一个, desc 降序 (即 从大到小)
-- 系统 默认从小到大排序, 即 asc  
select gpa from user_profile where university = '复旦大学' 
order by gpa DESC LIMIT 1 
-- LIMIT 0,1

17. 计算男生人数以及平均GPA

-- round() 返回一个数值, 该数值 是按照指定的小数位数 进行 四舍五入运算的结果.
-- round(avg(gpa),1) 进行 平均数的四舍五入, 保留 1位小数
select COUNT(gender) as male_num,round(avg(gpa),1) as avg_gpa
from user_profile where gender = 'male'

18. 分组计算练习题

用户信息表: user_profile 18-20题

-- 30天内活跃天数字段 (active_days_within_30)
-- 发帖数量字段 (question_cnt)
-- 回答数量字段 (answer_cnt)
drop table if exists user_profile;
CREATE TABLE `user_profile` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`gender` varchar(14) NOT NULL,
`age` int ,
`university` varchar(32) NOT NULL,
`gpa` float,
`active_days_within_30` float,
`question_cnt` float,
`answer_cnt` float
);
INSERT INTO user_profile VALUES(1,2138,'male',21,'北京大学',3.4,7,2,12);
-- 表示:id为1的用户的常用信息为 使用的设备id为2138, 性别为男, 年龄21岁, 北京大学, gpa为3.4, 在过去的30天里面活跃了7天, 发帖数量为2, 回答数量为12
INSERT INTO user_profile VALUES(2,3214,'male',null,'复旦大学',4.0,15,5,25);
INSERT INTO user_profile VALUES(3,6543,'female',20,'北京大学',3.2,12,3,30);
INSERT INTO user_profile VALUES(4,2315,'female',23,'浙江大学',3.6,5,1,2);
INSERT INTO user_profile VALUES(5,5432,'male',25,'山东大学',3.8,20,15,70);
INSERT INTO user_profile VALUES(6,2131,'male',28,'山东大学',3.3,15,7,13);
INSERT INTO user_profile VALUES(7,4321,'male',28,'复旦大学',3.6,9,6,52);

-- 分别计算出 每个学校每种性别的用户数、30天内平均活跃天数 和 平均发帖数量
-- 查询返回结果需要对性别和学校分组, 结果保留1位小数, 1位小数之后的四舍五入
-- 如: 北京大学的男性用户个数为1, 平均活跃天数为7天, 平均发帖量为2
select gender,university,count(gender) as user_num,
avg(active_days_within_30) as avg_active_day,
avg(question_cnt) as avg_question_cnt 
from user_profile 
group by gender,university

19. 分组过滤练习题

-- 取出 平均发贴数低于5的学校 或 平均回帖数小于20的学校
-- 创建了 新的字段 不能用 where, 要用 having
select university,avg(question_cnt) as avg_question_cnt,
avg(answer_cnt) as avg_answer_cnt 
from user_profile 
group by university
having avg(question_cnt) <5 
or avg(answer_cnt) <20

20. 分组排序练习题

-- 看 不同大学的用户 平均发帖情况, 并期望结果 按照 平均发帖情况 进行升序排列
-- order by 默认为升序
select university,avg(answer_cnt) as avg_question_cnt 
from user_profile group by university order by avg_question_cnt

-- 解法2: 不需要单独命名
select university,avg(question_cnt) from user_profile
group by university 
order by avg(question_cnt)

-- 有三类后面 可以加 聚合函数 avg(),sum(),count() 那种
-- 1. select 2. order by 3. having

21. 浙江大学用户题目回答情况

表question_practice_detail + 用户信息表: user_profile 21-22题

id	device_id	question_id	result
1	2138	111	wrong
2	3214	112	wrong
3	3214	113	wrong
4	6543	114	right
5	2315	115	right
6	2315	116	right
7	2315	117	wrong

drop table if exists `user_profile`;
drop table if  exists `question_practice_detail`;
CREATE TABLE `user_profile` (
`device_id` int NOT NULL,
`gender` varchar(14) NOT NULL,
`age` int ,
`university` varchar(32) NOT NULL,
`gpa` float,
`active_days_within_30` int
);
CREATE TABLE `question_practice_detail` (
`device_id` int NOT NULL,
`question_id`int NOT NULL,
`result` varchar(32) NOT NULL
);

INSERT INTO user_profile VALUES(2138,'male',21,'北京大学',3.4,7);
INSERT INTO user_profile VALUES(3214,'male',null,'复旦大学',4.0,15);
INSERT INTO user_profile VALUES(6543,'female',20,'北京大学',3.2,12);
INSERT INTO user_profile VALUES(2315,'female',23,'浙江大学',3.6,5);
INSERT INTO user_profile VALUES(5432,'male',25,'山东大学',3.8,20);
INSERT INTO user_profile VALUES(2131,'male',28,'山东大学',3.3,15);
INSERT INTO user_profile VALUES(4321,'male',28,'复旦大学',3.6,9);
INSERT INTO question_practice_detail VALUES(2138,111,'wrong');
INSERT INTO question_practice_detail VALUES(3214,112,'wrong');
INSERT INTO question_practice_detail VALUES(3214,113,'wrong');
INSERT INTO question_practice_detail VALUES(6543,111,'right');
INSERT INTO question_practice_detail VALUES(2315,115,'right');
INSERT INTO question_practice_detail VALUES(2315,116,'right');
INSERT INTO question_practice_detail VALUES(2315,117,'wrong');
INSERT INTO question_practice_detail VALUES(5432,118,'wrong');
INSERT INTO question_practice_detail VALUES(5432,112,'wrong');
INSERT INTO question_practice_detail VALUES(2131,114,'right');
INSERT INTO question_practice_detail VALUES(5432,113,'wrong');

-- 联表查询
-- 方法 1-1 式: 两张表 联合查询 (join 表1 on 表2) 
select u.device_id,q.question_id,q.result 
from question_practice_detail q join user_profile u
on q.device_id = u.device_id
where university = '浙江大学'

-- 方法 1-2 式: 
-- 省略了as (如: user_profile as up)
select qpd.device_id,qpd.question_id,qpd.result 
from question_practice_detail qpd inner join user_profile up
on up.device_id = qpd.device_id and up.university='浙江大学'

-- 方法2 子查询 (慢一点)
-- 1. 先从 user_profile 画像表 找到 浙江大学的所有学生 device_id
-- select device_id from user_profile where university='浙江大学'
-- 2. 再去 练习明细表 question_practice_detail 筛选出 device_id 在这个列表的记录
-- 3. 用 where in  
select device_id,question_id,result from question_practice_detail
where device_id in (
    select device_id from user_profile
    where university='浙江大学'
)

22. 统计每个学校的答过题的用户平均答题数

-- 链接查询
-- 答题情况明细表 question_practice_detail, 其中 question_id 是 题目编号, result是 答题结果.
-- 查找 每个学校用户的平均答题数目 (说明: 某学校用户平均答题数量 计算方式为 该学校用户答题总次数 除以 答过题的不同用户个数)
-- 你的查询 应返回以下结果(结果 保留4位小数), 结果按照 university 升序排序
-- round(COUNT(question_id) / COUNT(DISTINCT qpd.device_id),4) avg_answer_cnt

SELECT university,COUNT(question_id) / COUNT(DISTINCT qpd.device_id) avg_answer_cnt
FROM question_practice_detail qpd LEFT JOIN user_profile up 
ON up.device_id = qpd.device_id 
GROUP BY university

23. 统计每个学校各难度的用户平均刷题数

表question_practice_detail + 用户信息表: user_profile + 难度表question_detail 23-27题

drop table if  exists `question_detail`;
CREATE TABLE `question_detail` (
`id` int NOT NULL,
`question_id`int NOT NULL,
`difficult_level` varchar(32) NOT NULL
);

INSERT INTO question_detail VALUES(1,111,'hard');
INSERT INTO question_detail VALUES(2,112,'medium');
INSERT INTO question_detail VALUES(3,113,'easy');
INSERT INTO question_detail VALUES(4,115,'easy');
INSERT INTO question_detail VALUES(5,116,'medium');
INSERT INTO question_detail VALUES(6,117,'easy');

-- 计算不同学校、不同难度的用户平均答题量, 你的查询 应返回以下结果(结果在小数点位数保留4位, 4位之后四舍五入)
select university,difficult_level,
ROUND(COUNT(qpd.question_id) / COUNT(DISTINCT qpd.device_id),4) as avg_answer_cnt 
from question_practice_detail qpd
left join user_profile u on u.device_id = qpd.device_id
left join question_detail qd on qpd.question_id = qd.question_id 
group by university,difficult_level

-- 解法2:
SELECT u.university,difficult_level,
COUNT(q.question_id)/COUNT(DISTINCT(q.device_id)) as avg_answer_cnt
FROM user_profile AS u,question_practice_detail AS q,question_detail AS d
WHERE u.device_id = q.device_id and d.question_id = q.question_id
GROUP BY university,difficult_level;

24. 统计每个用户的平均刷题数

-- 计算山东、不同难度的用户 平均答题量, 你的查询 应返回以下结果(结果 在小数点位数 保留4位, 4位之后四舍五入)
-- 解法1
select university,difficult_level,
round(count(qpd.question_id) / count(distinct u.device_id),4) as avg_answer_cnt 
from question_practice_detail qpd 
join user_profile u on qpd.device_id = u.device_id 
join question_detail qd on qpd.question_id = qd.question_id 
where university = '山东大学'
group by university,difficult_level

-- 解法2
select university,difficult_level,
round(count(qpd.question_id) / count(distinct u.device_id),4) as avg_answer_cnt 
from question_practice_detail qpd 
join user_profile u on qpd.device_id = u.device_id 
join question_detail qd on qpd.question_id = qd.question_id
group by university,difficult_level
having university = '山东大学'

25. 查找山东大学或者性别为男生的信息

-- 组合查询
-- 分别查看 学校为山东大学 或者 性别为男性的用户的 device_id、gender、age 和 gpa 数据

-- 注意是 分别查看, 再将 查询结果 拼接到一起
--  or 会去重 而 union all 不会去重
select device_id,gender,age,gpa from user_profile 
where university = '山东大学' 
union all 
select device_id,gender,age,gpa from user_profile
where gender = 'male'

26. 计算 25岁以上和以下的用户数量

-- 组合查询
-- 将 用户 划分为25岁以下 和 25岁及以上两个年龄段, 分别查看 这两个年龄段用户数量
-- 注意: age 为 null 也记为 25岁以下
select '25岁以下' as age_cut,count(device_id)as number 
from user_profile where age<25 or age is null
group by age_cut
union all
select '25岁及以上' as age_cut,count(device_id)as number 
from user_profile where age>=25 
group by age_cut

-- 解法2: if
-- age>=25 就 输出 25岁及以上,否则 归为 25岁以下 
select if(age>=25,'25岁及以上','25岁以下') as age_cut,count(device_id) as number 
from user_profile group by age_cut

-- 解法3: case
-- age>=25 就 输出 25岁及以上,否则 归为 25岁以下 
select (case when age>=25 then '25岁及以上' else '25岁以下' end) as age_cut,
count(device_id) as number 
from user_profile 
group by age_cut

-- 解法3 之 2式
SELECT 
    case 
        when age >= 25 then '25岁及以上'
        else '25岁以下'
    end as age_cut,
    count(device_id) as number
FROM user_profile
GROUP by age_cut;

27. 查看不同年龄段的用户明细

-- 将用户 划分为20岁以下, 20-24岁, 25岁及以上 三个年龄段, 分别查看 不同年龄段用户的明细情况, 请取出 相应数据. (注: 若 年龄为空 请返回 其他.)
select device_id,gender,
(case 
 when age<20 then '20岁以下' 
 when age<25 then '20-24岁' 
 when age>=25 then '25岁及以上' 
 else '其他' 
 end) as age_cut
from user_profile 

-- 解法2: if
select device_id,gender,
if(age<20,'20岁以下',
   if(age>=25,'25岁及以上',
      if(age between 20 and 24,'20-24岁','其他')
     )
  ) as age_cut 
from user_profile

28. 计算用户8月每天的练题数量

表question_practice_detail 和表user_profile 更新字段 question_detail 28-29题

drop table if exists `user_profile`;
drop table if  exists `question_practice_detail`;

CREATE TABLE `user_profile` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`gender` varchar(14) NOT NULL,
`age` int ,
`university` varchar(32) NOT NULL,
`gpa` float,
`active_days_within_30` int ,
`question_cnt` int ,
`answer_cnt` int 
);

CREATE TABLE `question_practice_detail` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`question_id`int NOT NULL,
`result` varchar(32) NOT NULL,
`date` date NOT NULL
);

INSERT INTO user_profile VALUES(1,2138,'male',21,'北京大学',3.4,7,2,12);
INSERT INTO user_profile VALUES(2,3214,'male',null,'复旦大学',4.0,15,5,25);
INSERT INTO user_profile VALUES(3,6543,'female',20,'北京大学',3.2,12,3,30);
INSERT INTO user_profile VALUES(4,2315,'female',23,'浙江大学',3.6,5,1,2);
INSERT INTO user_profile VALUES(5,5432,'male',25,'山东大学',3.8,20,15,70);
INSERT INTO user_profile VALUES(6,2131,'male',28,'山东大学',3.3,15,7,13);
INSERT INTO user_profile VALUES(7,4321,'male',28,'复旦大学',3.6,9,6,52);

INSERT INTO question_practice_detail VALUES(1,2138,111,'wrong','2021-05-03');
INSERT INTO question_practice_detail VALUES(2,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(3,3214,113,'wrong','2021-06-15');
INSERT INTO question_practice_detail VALUES(4,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(5,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(6,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(7,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(8,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(9,3214,113,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(10,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(11,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(12,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(13,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(14,3214,112,'wrong','2021-08-16');
INSERT INTO question_practice_detail VALUES(15,3214,113,'wrong','2021-08-18');
INSERT INTO question_practice_detail VALUES(16,6543,111,'right','2021-08-13');

-- 解法1
select Day(date) as day,count(question_id) as question_cnt 
from question_practice_detail 
where date like '2021-08-%'
-- where year(date)=2021 and month(date) = 08
group by day

-- 解法2
select Day(date) as day,count(question_id) as question_cnt 
from question_practice_detail 
group by date
having year(date) = 2021 and month(date) = 08
-- having date between '2021-08-01' and '2021-08-31'

29. 计算用户的平均次日留存率

-- 查看用户 在某天刷题后 第二天还会再来刷题的 平均概率

-- 去重: 需要按照 devece_id,date 去重, 因为 一个人 一天可能 来多次
-- a 为 第一天, b 为 第二天
-- DATE_SUB() 函数 从 日期 减去 指定的时间 间隔; 
-- 代码中 就是 减去一天 到 隔天日期后 还会来的 同一个人, avg() 即为 取平均值 3 / 10 👍

select avg(if(b.device_id is not null,1,0)) as avg_ret
from 
(select distinct device_id,date from question_practice_detail) a
left join (select distinct device_id,date_sub(date,interval 1 day) as date 
from question_practice_detail
) b
on a.device_id = b.device_id and a.date = b.date

-- 解法2: ✔️
-- 1. 用datediff区分第一天和第二天在线的device_id
-- 2. 用 left outer join 做 自表联结
-- 3. 用 distinct q2.device_id,q2.date 做 双重去重, 找到符合条件的 当天在线人数

SELECT count(distinct q2.device_id,q2.date) / 
count(DISTINCT q1.device_id,q1.date) as avg_ret
from question_practice_detail as q1 
left outer join question_practice_detail as q2
on q1.device_id = q2.device_id and DATEDIFF(q2.date,q1.date) = 1

-- 为什么要用 left outer join? 因为我们要 保证 分母的完整性; 
-- 如果只用了 join 则会 根据联结条件 去除 q1表中 不符合条件的行;
-- 则 最后结果 应该会 变成1, 所以要用 外联结.

30. 统计每种性别的人数

表user_submit 30-32题

-- 统计 每个性别的用户 分别有多少 参赛者

-- 1. LOCATE(substr,str): 返回子串 substr 在字符串 str 中第一次出现的位置,
-- 如果 字符substr 在 字符串str 中不存在, 则返回 0;

-- 2. POSITION(substr IN str): 返回子串 substr 在字符串 str 中第一次出现的位置,
-- 如果 字符substr 在 字符串str 中不存在, 与 LOCATE函数 作用相同;

-- 3. LEFT(str, length): 从左边开始截取 str, length 是 截取的长度;

-- 4. RIGHT(str, length): 从右边开始截取 str, length 是 截取的长度;

-- 5. SUBSTRING_INDEX(str, substr, n): 
-- 返回 字符substr 在 str 中 第n次出现位置 之前的字符串;

-- 6. SUBSTRING(str, n, m): 返回 字符串str 从 第n个字符 截取到 第m个字符;

-- 7. REPLACE(str, n, m): 将 字符串str 中的 n字符 替换成 m字符;

-- 8. LENGTH(str): 计算 字符串str的 长度;

drop table if exists user_submit;
CREATE TABLE `user_submit` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`profile` varchar(100) NOT NULL,
`blog_url` varchar(100) NOT NULL
);
INSERT INTO user_submit VALUES(1,2138,'180cm,75kg,27,male','http:/url/bisdgboy777');
INSERT INTO user_submit VALUES(1,3214,'165cm,45kg,26,female','http:/url/dkittycc');
INSERT INTO user_submit VALUES(1,6543,'178cm,65kg,25,male','http:/url/tigaer');
INSERT INTO user_submit VALUES(1,4321,'171cm,55kg,23,female','http:/url/uhsksd');
INSERT INTO user_submit VALUES(1,2131,'168cm,45kg,22,female','http:/url/sysdney');

-- 把 male 和 female 写在 profile 里面, 取出

-- 使用 substring_index 函数 
-- substring_index(str,delim,count) 
-- str为 要处理的字符串, delim为 分隔符, count为 计数
-- 根据逗号 将 profile字段 切分, 性别位于 最后一位, 位置填写 -1
--  即可取出 性别字段, 进行 分组计算 即可得出 所需数据
select substring_index(profile,',',-1) as gender,count(*) as number 
from user_submit group by gender

-- 大佬写的✍ 解法2 
SELECT IF(u.profile LIKE '%female','female','male') as gender,COUNT(*) as number
FROM user_submit u
-- GROUP BY IF(u.profile LIKE '%female','female','male');
GROUP BY gender

31. 提取博客URL中的用户名

-- blog_url字段 中 url字符后的字符串 为 用户个人博客的用户名, 现在运营想要把 用户的个人博客用户字段 提取出 单独记录为 一个新的字段, 请取出 所需数据

-- 解法1: 字符串切割截取
-- substring_index(string, '切割标志', 位置数(负号:从后面开始))
-- 切割位置 为 倒数第一个 /, 所以为 -1
select device_id,substring_index(blog_url,'/',-1) as user_name 
from user_submit

-- 解法2: 替换法  👍
-- replace(string, '被替换部分','替换后的结果')
select device_id,replace(blog_url,'http:/url/','') as user_name 
from user_submit

-- 解法3: 截取法 👍
-- substr(string, start_point, length*可选参数*)
-- length((blog_url)-10) 删除 前面10个长度字符 http:/url/ 
select device_id,substr(blog_url,11,length(blog_url)-10) as user_name 
from user_submit

-- 解法4: 删除法 👍
-- trim('被删除字段' from 列名)
select device_id,trim('http:/url/' from blog_url) as user_name 
from user_submit

32. 截取出年龄

--统计 每个年龄的用户 分别有多少 参赛者

-- 第一次 截取的是 27,male | 1 (从倒数第二个, 开始截取)
-- 第二次 截取才是 27 | 1 
select substring_index(substring_index(profile,',',-2),',',1) as age,
count(device_id) as number 
from user_submit
group by age
-- group by 分组,去重 并 聚合 不用 distinct(count(device_id))

33. 找出每个学校 GPA最低的同学

user_profile 表, 33题

-- 输出结果 按 university升序排序

-- 因为 学校与学生 是 一对多的关系
-- 如果仅用 min 求出 gpa最低的学生, 查询结果中的id 与 学生 不一定是 对应的关系.
-- 这种做法 ❌ 
select device_id,university,min(gpa) as gpa from user_profile 
group by university

-- 解法1 ✔️
-- b表 是 最低gpa, a表 实现对应 device_id
select a.device_id,a.university,a.gpa from user_profile a
join (select device_id,university,min(gpa) as gpa from user_profile 
group by university) b on a.university = b.university and a.gpa = b.gpa
order by a.university

-- 解法1 变式 --> 进阶, 秀 👍 因为 查出的 university 和 gpa 没毛病 ✔️
-- 就是 device_id 不对应 (学校与学生 是 一对多关系)
-- where 以 university 和 gpa 相同 为 条件 --> 找出 对应device_id 
select device_id,university,gpa
from user_profile
where (university,gpa) in (select university,min(gpa) from user_profile group by university)
order by university

-- 解法2 窗口函数 ✔️ (不能直接命名为 rank)
-- 先分组 再排序 然后选出 gpa最低的 
-- row_number() \ rank() \ dense_rank() 升序

select device_id,university,gpa from 
(select device_id,university,gpa,row_number() 
 over (partition by university order by gpa) as rn 
from user_profile 
) as u_min
where u_min.rn = 1

-- 要找最大最小值对应的那条数据的其他字段信息, 通常 窗口函数 取 where rn=1
-- (直接 max() 或者 min() 查询到的 其他字段 是 不对应的)

34. 统计复旦用户 8月练题情况

表question_practice_detail + 用户信息表: user_profile + 难度表question_detail 34题

-- 每个用户 在8月份 练习的总题目数 和 回答正确的题目数情况

-- 用left join 并且 用sum 而不是 count ✔️
-- 在 where里的条件 不受 join方式 影响, 如果用 left join 只要满足 where条件 --> 左表的记录 就会输出, 而不管 on的条件 是否match (即使 关联的右表的列 为 null)
-- group by 和 count一起用的时候, 会把 null值 也统计上

-- 关于 month(date) 为什么不写在 where后面 ❓  因为: 
-- 1. 先执行from, 再执行where, where中的操作 是对 连接好的表的操作; 
-- 2. a 左连接 b,对于 a 有而 b没有的 id, 则 连接好的表中的这些id的 b 相关的属性值为空.

select u.device_id,university,count(question_id) as question_cnt,
sum(if(result = 'right',1,0)) as right_question_cnt 
from user_profile u left join question_practice_detail qpd 
on qpd.device_id = u.device_id and date like '2021-08-%'
-- and month(date) = 8 
where university = '复旦大学'
group by u.device_id

-- 解法2 另一种思路 大开眼界👀 ✔️
-- 在 8月份没答题的 分为 两种情况:
--  1. 在 8月份以外答了题 但是 没在8月份 答题;
--  2. 所有月份 都没有答题, 其他人把 第一种情况 漏掉了
SELECT u.device_id,university,
sum(case when date>='2021-08-01' and date<='2021-08-31' then 1 else 0 end) as question_cnt,
sum(case when date>='2021-08-01' and date<='2021-08-31' and result='right' then 1 else 0 end) as right_question_cnt
-- sum(case when month(date) = 8 then 1 else 0 end) as question_cnt,
-- sum(case when month(date) = 8 and result = 'right' then 1 else 0 end) as right_question_cnt
from user_profile u 
left join question_practice_detail q 
on u.device_id = q.device_id
where university='复旦大学'
group by u.device_id

35. 浙大不同难度题目的正确率

表question_practice_detail + 用户信息表: user_profile + 难度表question_detail

drop table if exists `user_profile`;
drop table if  exists `question_practice_detail`;
drop table if  exists `question_detail`;
CREATE TABLE `user_profile` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`gender` varchar(14) NOT NULL,
`age` int ,
`university` varchar(32) NOT NULL,
`gpa` float,
`active_days_within_30` int ,
`question_cnt` int ,
`answer_cnt` int 
);
CREATE TABLE `question_practice_detail` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`question_id`int NOT NULL,
`result` varchar(32) NOT NULL,
`date` date NOT NULL
);
CREATE TABLE `question_detail` (
`id` int NOT NULL,
`question_id`int NOT NULL,
`difficult_level` varchar(32) NOT NULL
);

INSERT INTO user_profile VALUES(1,2138,'male',21,'北京大学',3.4,7,2,12);
INSERT INTO user_profile VALUES(2,3214,'male',null,'复旦大学',4.0,15,5,25);
INSERT INTO user_profile VALUES(3,6543,'female',20,'北京大学',3.2,12,3,30);
INSERT INTO user_profile VALUES(4,2315,'female',23,'浙江大学',3.6,5,1,2);
INSERT INTO user_profile VALUES(5,5432,'male',25,'山东大学',3.8,20,15,70);
INSERT INTO user_profile VALUES(6,2131,'male',28,'山东大学',3.3,15,7,13);
INSERT INTO user_profile VALUES(7,4321,'male',28,'复旦大学',3.6,9,6,52);
INSERT INTO question_practice_detail VALUES(1,2138,111,'wrong','2021-05-03');
INSERT INTO question_practice_detail VALUES(2,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(3,3214,113,'wrong','2021-06-15');
INSERT INTO question_practice_detail VALUES(4,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(5,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(6,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(7,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(8,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(9,3214,113,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(10,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(11,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(12,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(13,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(14,3214,112,'wrong','2021-08-16');
INSERT INTO question_practice_detail VALUES(15,3214,113,'wrong','2021-08-18');
INSERT INTO question_practice_detail VALUES(16,6543,111,'right','2021-08-13');
INSERT INTO question_detail VALUES(1,111,'hard');
INSERT INTO question_detail VALUES(2,112,'medium');
INSERT INTO question_detail VALUES(3,113,'easy');
INSERT INTO question_detail VALUES(4,115,'easy');
INSERT INTO question_detail VALUES(5,116,'medium');
INSERT INTO question_detail VALUES(6,117,'easy');

-- 取出相应数据, 并按照 准确率 升序输出
-- 解法1 ✔️
select difficult_level, 
-- sum(if(result = "right", 1, 0)) / count(u.answer_cnt) as correct_rate 
sum(case when qpd.result = "right" then 1 else 0 end) 
/ count(u.answer_cnt) as correct_rate
from user_profile u inner join question_practice_detail qpd on 
u.device_id = qpd.device_id
inner join question_detail qd on qd.question_id = qpd.question_id
where university = '浙江大学' 
group by difficult_level
order by correct_rate

-- 解法2 ✔️
-- 1. 做个三表关联
-- 2. 按问题难度分组
-- 3. sum() 嵌套if() 计算出 正确的 答题数量, 再除以 答题总数
select 
    d.difficult_level,
    sum(if(result = 'right', 1, 0)) / count(*) as correct_rate
from 
    user_profile u, question_practice_detail qp, question_detail d
where 
    u.university = '浙江大学' 
    and u.device_id = qp.device_id
    and qp.question_id = d.question_id
group by d.difficult_level
order by correct_rate

36. 查找后排序

-- 取出用户信息表中的 用户年龄
select device_id,age from user_profile order by age

37. 查找后多列排序

-- 取出用户信息表中的 年龄 和 gpa数据, 并先按照 gpa升序 排序，再按照 年龄升序 排序输出
select device_id,gpa,age from user_profile order by gpa,age

38. 查找后降序排列

-- 取出用户信息表中 对应的数据, 并先按照 gpa、年龄 降序排序 输出

-- order by 对后所有 排序字段 有效, 升序 和 降序 要给 每个字段 单独设定
-- 排序函数 不需要 where 限定
select device_id,gpa,age from user_profile order by gpa desc,age desc

39. 21年8月份练题总数

-- 2021年8月份 所有练习过题目的总用户数 和 练习过题目的总次数
select count(distinct device_id) as did_cnt, count(question_id) as question_cnt 
from question_practice_detail where date like '2021-08%'
-- where date >= '2021-08-01' and date <= '2021-08-31';

issues

order by 和 group by ❓

-- 需要分组的时候 要用 聚合函数 结合 group by, 
-- 实现 重新排序 用order by; 一般要遵循 先分组后排序

牛客网_在线编程-刷题SQL-快速入门

快速入门

1. 查询 多列

2. 查询 所有列

3. 查询 结果去重

4. 查询结果 限制返回行数

5. 将 查询后的列 重新命名

6. 查找 学校是北大的学生信息

7. 查找 年龄大于24岁的 用户信息

8. 查找 某个年龄段的用户信息

9. 查找 除复旦大学的用户信息

10. 用 where 过滤空值 练习

11. 高级操作符 练习(1)

12. 高级操作符 练习(2）

13. where in 和 not in

14. 操作符 混合运用

15. 查看 学校名称中含北京的 用户

16. 查找GPA 最高值

17. 计算 男生人数 以及 平均GPA

18. 分组计算 练习题

19. 分组过滤 练习题

20. 分组排序 练习题

21. 浙江大学用户 题目回答情况

22. 统计 每个学校的答过题的用户 平均答题数

23. 统计 每个学校 各难度的用户 平均刷题数

24. 统计 每个用户的 平均刷题数

25. 查找 山东大学 或者 性别为男生的信息

26. 计算 25岁以上 和 以下的用户数量

27. 查看 不同年龄段的 用户明细

28. 计算 用户8月 每天的练题数量

29. 计算 用户的 平均次日留存率

30. 统计 每种性别的 人数

31. 提取 博客URL中的 用户名

32. 截取出 年龄

33. 找出 每个学校 GPA最低的同学

34. 统计 复旦用户 8月练题情况

35. 浙大 不同难度题目的 正确率

36. 查找后 排序

37. 查找后 多列排序

38. 查找后 降序排列

39. 21年8月份 练题总数