SQL突击战——员工薪水中位数
569. 员工薪水中位数
#SQL架构
Create table If Not Exists Employee (id int, company varchar(255), salary int)
Truncate table Employee
insert into Employee (id, company, salary) values ('1', 'A', '2341')
insert into Employee (id, company, salary) values ('2', 'A', '341')
insert into Employee (id, company, salary) values ('3', 'A', '15')
insert into Employee (id, company, salary) values ('4', 'A', '15314')
insert into Employee (id, company, salary) values ('5', 'A', '451')
insert into Employee (id, company, salary) values ('6', 'A', '513')
insert into Employee (id, company, salary) values ('7', 'B', '15')
insert into Employee (id, company, salary) values ('8', 'B', '13')
insert into Employee (id, company, salary) values ('9', 'B', '1154')
insert into Employee (id, company, salary) values ('10', 'B', '1345')
insert into Employee (id, company, salary) values ('11', 'B', '1221')
insert into Employee (id, company, salary) values ('12', 'B', '234')
insert into Employee (id, company, salary) values ('13', 'C', '2345')
insert into Employee (id, company, salary) values ('14', 'C', '2645')
insert into Employee (id, company, salary) values ('15', 'C', '2645')
insert into Employee (id, company, salary) values ('16', 'C', '2652')
insert into Employee (id, company, salary) values ('17', 'C', '65')
Employee 表包含所有员工。Employee 表有三列:员工Id,公司名和薪水。
+-----+------------+--------+
|Id | Company | Salary |
+-----+------------+--------+
|1 | A | 2341 |
|2 | A | 341 |
|3 | A | 15 |
|4 | A | 15314 |
|5 | A | 451 |
|6 | A | 513 |
|7 | B | 15 |
|8 | B | 13 |
|9 | B | 1154 |
|10 | B | 1345 |
|11 | B | 1221 |
|12 | B | 234 |
|13 | C | 2345 |
|14 | C | 2645 |
|15 | C | 2645 |
|16 | C | 2652 |
|17 | C | 65 |
+-----+------------+--------+
请编写SQL查询来查找每个公司的薪水中位数。挑战点:你是否可以在不使用任何内置的SQL函数的情况下解决此问题。
+-----+------------+--------+
|Id | Company | Salary |
+-----+------------+--------+
|5 | A | 451 |
|6 | A | 513 |
|12 | B | 234 |
|9 | B | 1154 |
|14 | C | 2645 |
+-----+------------+--------+
来源:力扣(LeetCode)
链接:https://leetcode-cn.com/problems/median-employee-salary
著作权归领扣网络所有。商业转载请联系官方授权,非商业转载请注明出处。
题解
中位数定义:所有观察值高低排序后找出正中间的一个作为中位数。如果观察值有偶数个,通常取最中间的两个数值的平均数作为中位数,题目的要求是取中间两位。
因此需要中间量为一个公司有多少名员工,每个员工在这个公司的薪资排名,因此可以用一个子查询,获取一个形如
+-----+------------+--------+--------+--------+
|Id | Company | Salary | Rank | count |
+-----+------------+--------+--------+--------+
|1 | A | 2341 | | 6 |
|2 | A | 341 | 6
|3 | A | 15 | 6
|4 | A | 15314 |
|5 | A | 451 |
|6 | A | 513 |
|7 | B | 15 |
|8 | B | 13 |
|9 | B | 1154 |
|10 | B | 1345 |
|11 | B | 1221 |
|12 | B | 234 |
|13 | C | 2345 |
|14 | C | 2645 |
|15 | C | 2645 |
|16 | C | 2652 |
|17 | C | 65 |
+-----+------------+--------+
这样的一个子表,然后寻找Rank等于(count+1)/2或(count+2)/2的数据(可以用in也可以用or)。
# Write your MySQL query statement below
select Id,Company,Salary from
(
select Id,Company,Salary,
count(Id) over (partition by Company) CompanyCount,
row_number() over (partition by Company order by Salary) companyRank
from Employee
) tmp where tmp.companyRank in (floor((tmp.CompanyCount+1)/2),floor((tmp.CompanyCount+2)/2));
题目扩展
如果对于偶数个数据,取中位数变成中间两个的均值,可以在这个查询的基础上再次group by然后用avg即可获得
# Write your MySQL query statement below
select Id,Company,avg(Salary) Salary from
(
select Id,Company,Salary from
(
select Id,Company,Salary,
count(Id) over (partition by Company) CompanyCount,
row_number() over (partition by Company order by Salary) companyRank
from Employee
) tmp where tmp.companyRank in (floor((tmp.CompanyCount+1)/2),floor((tmp.CompanyCount+2)/2))
)
tmp2 group by tmp2.Company;
语法笔记
floor
向下取整,如floor(3.8)=3,floor(3.2)=3
partition by 和 group by
都是进行分组操作,不同点在于group by返回的是分组后进行的聚合操作,如sum,avg,而partition by实际上就是在执行完select之后,在所得结果集之上进行partition,能够在保留全部数据的基础上,只对其中某些字段做分组排序(类似excel中的操作),而group by则只保留参与分组的字段和聚合函数的结果(类似excel中的pivot)。
order by
用于排序,desc 降序,asc 升序
row_number(),rank(),dense_rank()
row_number():按照查询记录生成行号,配合order by可以实现排序,序号不会重复
rank(): 值相同时序号相同,根据记录条数类推
dense_rank():值相同时序号相同,按照序号类推