使用sql进行数据汇总

image-20211207120208142

2-报表项目关系图


基本用法

代码:

1
2
3
4
5
6
7
8
9
# 查询每个客户`ID`, `company_name`, `contact_name`, `contact_title`, `city`, 
# 和 `country`,并按国家名排序
select company_name,
contact_name,
contact_title,
city,
country
from customers
order by country;

结果:image-20211207120440486


连接查询

代码:

1
2
3
4
5
6
7
8
#  列出所有提供了4种以上不同商品的供应商列表,
# 所需字段:`supplier_id`, `supplier_id`, 和 `supplier_id` (提供的商品种类数量)
select s.supplier_id,
company_name,
count(*) as prodects_count
from suppliers as s
join products p on s.supplier_id = p.supplier_id
group by company_name, s.supplier_id;

结果:image-20211207120529120


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#  提取订单编号为10250的订单详情,
# 显示如下信息:`product_name`, `quantity`, `unit_price` ( `order_items` 表),
# `discount` ,`order_date` 按商品名字排序
select product_name,
quantity,
oi.unit_price,
discount,
order_date
from orders as o
join order_items oi
on o.order_id = oi.order_id
join products p
on oi.product_id = p.product_id
where o.order_id = 10250;

结果:image-20211207120543574


代码:

1
2
3
4
5
6
7
8
9
10
11
# 提供订单编号为10248的相关信息,
# 包括product_name, unit_price (在 `order_items` 表中),
# quantity(数量), company_name(供应商公司名字 ,起别名 `supplier_name`)
select product_name,
oi.unit_price,
oi.quantity,
company_name as su_name
from order_items as oi
join suppliers as s on oi.product_id = s.supplier_id
join products p on s.supplier_id = p.supplier_id
where order_id = 10248;

结果:image-20211207120614620


代码:

1
2
3
4
5
6
7
8
9
10
11
# 提取每件商品的详细信息,包括  商品名称(`product_name`),
# 供应商的公司名称 (`company_name`,在 `suppliers` 表中), 类别名称 `category_name`,
# 商品单价`unit_price`, 和商品的单位`quantity per unit`。
select p.product_name,
s.company_name,
c.category_name,
p.unit_price,
p.quantity_per_unit
from products as p
join suppliers s on p.supplier_id = s.supplier_id
join categories c on p.category_id = c.category_id;

结果:


使用聚合

代码:

1
2
3
4
5
# 在销售报表中,我们需要计算订单的总付款额。
select sum(unit_price * quantity)
from orders as o
join order_items oi on o.order_id = oi.order_id
where o.order_id = 10250;

结果:image-20211207120656016


代码:

1
2
3
4
5
6
7
8
# 每个商品的折扣都存储在 `order_items` 表的`discount` 列中
# 例如,“ 0.20”折扣意味着客户支付原始价格的“ 1-0.2 = 0.8”
# 在上面的代码中添加第二个名为`total_price_after_discount`的列,计算打折后的商品价格
select sum(unit_price * quantity),
sum(unit_price * quantity * (1 - discount)) as dis
from orders as o
join order_items oi on o.order_id = oi.order_id
where o.order_id = 10250;

结果:image-20211207120720666


代码:

1
2
3
4
5
6
7
8
9
# 统计多个订单的总金额
select o.customer_id,
c.company_name,
SUM(unit_price * quantity) AS total_price
from orders as o
join customers c on o.customer_id = c.customer_id
join order_items oi on o.order_id = oi.order_id
where o.ship_country = 'France'
group by o.order_id, c.company_name;

结果:image-20211207120728878


结合count()

代码:

1
2
3
4
5
6
7
8
9
10
11
# 统计每个员工处理的订单总数
# 结果包含员工ID`employee_id`,姓名`first_name` 和 `last_name`,处理的订单总数(别名 `orders_count`)
select e.employee_id,
first_name,
last_name,
count(*) as order_count
from orders as o
join employees e on o.employee_id = e.employee_id
group by e.employee_id, e.first_name, e.last_name
order by e.employee_id;

结果:image-20211207120749793


代码:

1
2
3
4
5
6
7
8
9
# 统计每个类别中的库存产品值多少钱? 
# 显示三列:`category_id`, `category_name`, 和 `category_total_value`
# 如何计算库存商品总价:`SUM(unit_price * units_in_stock)`。
select c.category_id,
c.category_name,
SUM(unit_price * units_in_stock) AS category_total_value
from categories as c
join products p on c.category_id = p.category_id
group by c.category_id, c.category_name;

结果:image-20211207120804583


代码:

1
2
3
4
5
6
7
# 了解每个员工的业绩:计算每个员工的订单数量
select c.customer_id,
c.company_name,
count(*) as c_
from customers as c
join orders o on c.customer_id = o.customer_id
group by c.customer_id, company_name;

结果:image-20211207120828621


代码:

1
2
3
4
5
6
7
8
# 计算每个客户的下订单数
# 结果包含:用户id、用户公司名称、订单数量(`customer_id`, `company_name`, `orders_count` )
select c.customer_id,
company_name,
count(*)
from orders as o
join customers c on o.customer_id = c.customer_id
group by c.customer_id, c.company_name;

结果:image-20211207121053115


代码:

1
2
3
4
5
6
7
8
9
10
# 我们要统计发货到不同国家/地区的订单数量以及已经发货的订单数量
SELECT
ship_country,
COUNT(*) AS all_orders,
COUNT(shipped_date) AS shipped_orders
FROM orders
GROUP BY ship_country;
# COUNT(*)将计算ship_country中的所有订单
# COUNT(shipped_date)将仅计算shipped_date列值不为NULL的行
# 在我们的数据库中,shipped_date 列中的NULL表示尚未发货,COUNT(shipped_date)仅计算已经发货的订单

结果:image-20211207200703588


代码:

1
2
3
4
5
# 查询订单运送到西班牙的客户数量(去重)别名`number_of_companies`
SELECT
COUNT(DISTINCT customer_id) AS number_of_companies
FROM orders
WHERE ship_country = 'Spain';

结果:image-20211207200815006

注意:

image-20211207200827266


case when 和 group by 数据分组

基本用法

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
# 需求:我们要在报表中显示每种产品的库存量
# 但我们不想简单地将“ units_in_stock”列放在报表中。
# 报表中只需要一个总体级别,例如低,高
SELECT product_id,
product_name,
units_in_stock,
CASE
WHEN units_in_stock > 100 THEN 'high'
WHEN units_in_stock > 50 THEN 'moderate'
WHEN units_in_stock > 0 THEN 'low'
WHEN units_in_stock = 0 THEN 'none'
END AS availability
FROM products;

结果:image-20211207201053346


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 需求: 创建一个报表,统计员工的经验水平
# 显示字段:`first_name`, `last_name`, `hire_date`, 和 `experience`
# 经验字段(`experience` ):
# 'junior' 2014年1月1日以后雇用的员工
# 'middle' 在2013年1月1日之后至2014年1月1日之前雇用的员工
# 'senior' 2013年1月1日或之前雇用的员工
select first_name,
last_name,
case
WHEN hire_date > '2014-01-01' THEN 'junior'
WHEN hire_date > '2013-01-01' THEN 'middle'
WHEN hire_date <= '2013-01-01' THEN 'senior'
end as experience
from employees;

结果:image-20211207201109287


结合else

代码:

1
2
3
4
5
6
7
8
9
10
11
# 我们的商店要针对北美地区的用户做促销活动:任何运送到北美地区(美国,加拿大) 的包裹免运费,其他地方10.0.。
# 创建报表,查询订单编号为10720~10730 活动后的运费价格
select order_id,
customer_id,
ship_country,
case
when ship_country = 'USA' or ship_country = 'Canada' then 0.0
else 10.0
end
from orders
where order_id BETWEEN 10720 AND 10730;

结果:image-20211207201140170


group by中使用case when

代码:

1
2
3
4
5
6
7
8
# 在引入北美地区免运费的促销策略时,我们也想知道运送到北美地区和其它国家地区的订单数量。
SELECT CASE
WHEN ship_country = 'USA' OR ship_country = 'Canada' THEN 0.0
ELSE 10.0
END AS shipping_cost,
COUNT(*) AS order_count
FROM orders
GROUP BY shipping_cost;

结果:image-20211207201226266


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 需求:创建报表统计供应商来自那个大洲
# 报表中包含两个字段:供应商来自哪个大洲(`supplier_continent` )和 供应产品种类数量(`product_count`)
# 供应商来自哪个大洲(`supplier_continent` )包含如下取值:
# 'North America' (供应商来自 'USA' 和 'Canada')
# 'Asia' (供应商来自 'Japan' 和 'Singapore')
# 'Other' (其它国家)
SELECT CASE
WHEN country in ('USA', 'Canada') THEN 'North America'
WHEN country in ('Japan', 'Singapore') THEN 'Asia'
ELSE 'Other'
END AS 'supplier_continent',
COUNT(*) AS 'product_count'
FROM suppliers s
JOIN products p
ON s.supplier_id = p.supplier_id
GROUP BY supplier_continent;

结果:image-20211207201248069


将 case when 和 count 结合

代码:

1
2
3
4
5
6
7
8
9
10
11
#需求:Washington (WA) 是 Northwind的主要运营地区,统计有多少订单是由华盛顿地区的员工处理的,多少订单是有其它地区的员工处理的
#结果字段: `orders_wa_employees` 和 `orders_not_wa_employees`
select COUNT(CASE
WHEN region = 'WA' THEN order_id
END) AS orders_wa_employees,
COUNT(CASE
WHEN region != 'WA' THEN order_id
END) AS orders_not_wa_employees

from employees as e
join orders o on e.employee_id = o.employee_id;

结果:image-20211207201325696


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 需求:创建报表,统计不同类别产品的库存量,将库存量分成两类 >30 和 <=30 两档分别统计数量
# 报表包含三个字段:
# 类别名称 `category_name`
# 库存充足 `high_availability`
# 库存紧张 `low_availability`
SELECT c.category_name,
COUNT(CASE
WHEN units_in_stock > 30 THEN product_id
END) AS high_availability,
COUNT(CASE
WHEN units_in_stock <= 30 THEN product_id
END) AS low_availability
FROM products p
JOIN categories c
ON p.category_id = c.category_id
GROUP BY c.category_id,
c.category_name;

结果:image-20211207201347388


sum 和 count的转换

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# count
SELECT COUNT(CASE
WHEN region = 'WA' THEN order_id
END) AS orders_wa_employees,
COUNT(CASE
WHEN region != 'WA' THEN order_id
END) AS orders_not_wa_employees
FROM employees e
JOIN orders o
ON e.employee_id = o.employee_id;


# sum
SELECT SUM(CASE
WHEN region = 'WA' THEN 1
END) AS orders_wa_employees,
SUM(CASE
WHEN region != 'WA' THEN 1
END) AS orders_not_wa_employees
FROM employees e
JOIN orders o
ON e.employee_id = o.employee_id;

结果:image-20211207201441068


sum中使用case when进行计算

代码:

1
2
3
4
5
6
7
8
9
10
11
# 需求:我们现在要统计每个订单的总付款额以及非素食产品的总付款额。
# 注:非素食产品的产品ID ( `category_id`) 是 6 和 8
select o.order_id,
sum(oi.quantity * oi.unit_price * (1 - oi.discount)) as total_price,
sum(case
when p.category_id in (6, 8) then oi.quantity * oi.unit_price * (1 - oi.discount)
else 0 end) AS non_vegetarian_price
from orders as o
join order_items oi on o.order_id = oi.order_id
join products p on oi.product_id = p.product_id
group by o.order_id;

结果:image-20211207201548507


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# 需求:输出报表,统计不同供应商供应商品的总库存量,以及高价值商品的库存量(单价超过40定义为高价值)
# 结果显示四列:
# 供应商ID `supplier_id`
# 供应商公司名 `company_name`
# 由该供应商提供的总库存 `all_units`
# 由该供应商提供的高价值商品库存 `expensive_units`
SELECT
s.supplier_id,
s.company_name,
SUM(units_in_stock) AS all_units,
SUM(CASE
WHEN unit_price > 40.0 THEN units_in_stock
ELSE 0
END) AS expensive_units
FROM products p
JOIN suppliers s
ON p.supplier_id = s.supplier_id
GROUP BY s.supplier_id,
s.company_name;

结果:image-20211207203640473


综合练习

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#三道练习题

# 作业1:创建报表来为每种商品添加价格标签,
# 贵、中等、便宜,结果包含如下字段:`product_id`, `product_name`, `unit_price`, 和 `price_level`
# 价格等级 `price_level` 的取值说明:
# 'expensive' 单价高于100的产品
# 'average' 单价高于40但不超过100的产品
# 'cheap' 其他产品
select product_id,
product_name,
unit_price,
case
when unit_price > 100 THEN 'expensive'
when unit_price > 40 THEN 'average'
else 'cheap'
end
from products
group by product_id,
product_name,
unit_price;

# 作业2:制作报表统计所有订单的总价(不计任何折扣)对它们进行分类。
# 包含一下字段:(order_id, total_price(折扣前), price_group )
# 字段 price_group 取值说明:
# 总价超过2000美元
# 'average',总价在$ 600到$ 2,000之间,包括两端
# 'low' 总价低于$ 600
select o.order_id,
sum(product_id * unit_price) as total_price,
case
when sum(product_id * unit_price) > 2000 then 'average'
when sum(product_id * unit_price) <= 2000 and sum(product_id * unit_price) >= 600 then 'average'
else 'low'
end as 'price_group'
from orders as o
join order_items oi on o.order_id = oi.order_id
group by order_id;

# 作业3:需求:统计所有订单的运费,将运费高低分为三档
# 报表中包含三个字段:
# `low_freight` `freight`值小于“ 40.0”的订单数
# `avg_freight` `freight`值大于或等于“ 40.0”但小于“ 80.0”的订单数
# `high_freight ` `freight`值大于或等于“ 80.0”的订单数

SELECT COUNT(CASE WHEN freight >= 80.0 THEN order_id END) AS high_freight,
COUNT(CASE WHEN freight < 40.0 THEN order_id END) AS low_freight,
COUNT(CASE WHEN freight >= 40.0 AND freight < 80.0 THEN order_id END) AS avg_freight
FROM orders;

结果:略


使用 with(CTE) 公用表达式

基本用法

代码:

1
2
3
4
5
6
7
8
9
10
# 需求: 统计每个类别中的平均产品数量`avg_product_count`.是多少?
# 通过CTE实现
with category_count AS (
select category_id,
count(product_id) as count_
from products
group by category_id
)
SELECT avg(count_) as avg_
FROM category_count;

结果:image-20211208092808538


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 查找来自加拿大的每个客户的平均订单价值
WITH order_total_prices AS (
SELECT o.order_id,
o.customer_id,
SUM(unit_price * quantity) AS total_price
FROM orders o
JOIN order_items oi
ON o.order_id = oi.order_id
GROUP BY o.order_id, o.customer_id
)
SELECT c.customer_id,
c.company_name,
AVG(total_price) AS avg_total_price
FROM order_total_prices OTP
JOIN customers c
ON OTP.customer_id = c.customer_id
WHERE c.country = 'Canada'
GROUP BY c.customer_id, c.company_name;

结果:image-20211208103607010


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 需求:创建报表,统计华盛顿地区(WA)每位员工所处理订单的订单平均价格
# 报表中包含如下列:
# `employee_id`, `first_name`, `last_name`, 和 `avg_total_price`
# 员工ID,名字,姓氏,平均订单总价
# 提示:
# 通过CTE计算所有订单的总价格,并将处理该订单的员工ID一起返回
# 在外部查询中,将CTE与`employees`表JOIN起来,计算平均值,显示所有所需信息,并按地区过滤结果
with name AS (
# your CTE
SELECT o.order_id,
employee_id,
sum(unit_price * quantity) as price
FROM orders as o
join order_items oi on o.order_id = oi.order_id
group by o.order_id, employee_id
)
SELECT e.employee_id,
e.first_name,
e.last_name,
avg(price) as avg_
FROM name t
join employees e on t.employee_id = e.employee_id
WHERE e.region = 'WA'
group by e.employee_id,
e.first_name,
e.last_name;

结果:image-20211208103634104


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 需求:创建报表,统计运输到不同国家/地区的订单中,不同商品的平均数量
# 报表包含`ship_country`和`avg_distinct_item_count`列,并按数量降序排列
with name AS (
select o.order_id,
ship_country,
count(distinct product_id) as dis_
from orders as o
join order_items oi on o.order_id = oi.order_id
group by o.order_id, o.ship_country
)
SELECT ship_country,
avg(dis_) as avg_
FROM name
group by ship_country
order by avg_ desc;

结果:image-20211208103657117


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 需求:创建报表,统计每位员工2016年所有订单中处理的平均物品数量
# 每一个订单中有多个订单明细项在 `order_items` 表中记录
# `order_items` 表中的`quantity` 字段记录了订单中每件商品购买的商品数量
# 订单中的物品数量用订单明细表中的`quantity` 求和进行计算
# 报表中包含如下列:
# `first_name`, `last_name`, `avg_item_count`
# 员工名字,员工姓氏,订单平均物品数量
with order_ AS (
# your CTE
select o.order_id,
o.employee_id,
o.order_date,
sum(quantity) as sum_
from orders as o
join order_items oi on o.order_id = oi.order_id
WHERE o.order_date >= '2016-01-01'
AND o.order_date < '2017-01-01'

group by o.order_id, o.employee_id
)
SELECT first_name,
last_name,
avg(sum_) as avg_
FROM order_
join employees e on order_.employee_id = e.employee_id
group by e.employee_id;

结果:image-20211208121904126


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 我们还可以将多级聚合与自定义分类结合在一起
# 需求:通过用户下单数量对用户分三组
# 订单数少于10的客户
# 订单数为10–20的客户
# 订单数超过20的客户
with customer as (
select customer_id,
CASE
WHEN COUNT(o.order_id) > 20
THEN 'more than 20'
WHEN COUNT(o.order_id) <= 20 AND COUNT(o.order_id) >= 10
THEN 'between 10 and 20'
ELSE 'less than 10'
END AS order_count_cat
from orders as o
group by customer_id
)
select order_count_cat,
count(customer_id) as count_
from customer
group by order_count_cat;

结果:image-20211208103727040


与 case when 组合使用

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# 需求:创建报表统计高价值和低价值客户的数量
# 客户在折扣前支付的所有订单的总价大于 $20,000 ,则将该客户视为“高价值”
# 否则,将它们视为“低值”
# 报表中包含两个字段
# category ( 'high-value' 或 'low-value'), `customer_count`
# 类别(“高价值”或“低价值”)和 “用户数量”
with name AS (
select o.customer_id,
sum(unit_price * quantity) as sum_,
case
when sum(unit_price * quantity) > 20000 THEN 'high-value'
ELSE 'low-value'
end as category
from orders as o
join order_items oi
on o.order_id = oi.order_id
group by o.customer_id
)
SELECT category,
count(customer_id) as number_
FROM name
group by category
order by category;

结果:image-20211208110453477


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 我们还可以将多级聚合与自定义分类结合在一起
# 需求:通过用户下单数量对用户分三组
# 订单数少于10的客户
# 订单数为10–20的客户
# 订单数超过20的客户
with customer as (
select customer_id,
CASE
WHEN COUNT(o.order_id) > 20
THEN 'more than 20'
WHEN COUNT(o.order_id) <= 20 AND COUNT(o.order_id) >= 10
THEN 'between 10 and 20'
ELSE 'less than 10'
END AS order_count_cat
from orders as o
group by customer_id
)
select order_count_cat,
count(customer_id) as count_
from customer
group by order_count_cat;

结果:image-20211208122047872


三层聚合

with子句必须在引用的select语句 之前定义 ,同级with关键字 只能使用一次,多个只能 用逗号分割;最后一个with 子句与下面的查询之间不能有逗号,只通过右括号分割, with 子句的查询必须用括号括起来。

如果定义了with子句,但其后没有跟select查询,则会报错!

前面的with子句定义的查询在后面的with子句中可以使用。但是一个with子句内部不能嵌套with子句!

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 创建报表查找所有处理的订单数量大于平均水平的员工
with order_count_employees as (
select employee_id,
count(order_id) as order_count
from orders
group by employee_id),

avg_order_count as (
select avg(order_count) as avg_order_count
from order_count_employees
)
select employee_id,
order_count,
avg_order_count
from order_count_employees,
avg_order_count
where order_count > avg_order_count;

结果:image-20211208122222300


计算多指标

基础用法

代码:

1
2
3
4
5
6
7
8
9
10
11
12
# 需求:创建报表计算2016年的绩效,计算每位员工处理的订单的总数和总收入
# 报表包含如下字段:`first_name` 员工的名字,`last_name` 员工姓氏,`order_count` 员工处理的订单总数,`order_revenue` 订单处理的订单总收入
select first_name,
last_name,
count(o.order_id) as order_count,
sum(unit_price * quantity) as order_revenue
from employees as e
left join orders o on e.employee_id = o.employee_id
left join order_items oi on o.order_id = oi.order_id
where o.order_date >= '2016-01-01'
and o.order_date < '2017-01-01'
group by o.order_id, first_name, last_name;

结果:image-20211208173831052


自定义指标

代码:

1
2
3
4
5
6
7
8
# 创建报表:对于每个类别,请显示库存产品的数量(即“ units_in_stock> 0”的产品)和未库存的产品数量。
# 该报告应包含三列:`category_name`,`products_in_stock`,`products_not_in_stock`
select category_name,
sum(case when units_in_stock > 0 then 1 else 0 end) as products_in_stock,
sum(case when units_in_stock = 0 then 1 else 0 end) as products_not_in_stock
from categories
join products p on categories.category_id = p.category_id
group by p.category_id;

结果:image-20211208173853993


计算百分比

代码:

1
2
3
4
5
6
7
8
# 需求:统计所有订单中已经出货的百分比
select count(case
when shipped_date is not null then order_id end) as not_null,
count(order_id) as all_,
concat(round(count(case
when shipped_date is not null then order_id end)
/ count(order_id) * 100, 2), '%') as ratio
from orders;

结果:image-20211208174032866


代码:

1
2
3
4
5
6
7
# 需求:统计缺货商品的百分比
# 显示三列:`count_discontinued`,`count_all`和`percentage_discontinued` 保留两位有效数字
select count(product_id) as count_all,
sum(case when discontinued = TRUE then 1 else 0 end) as count_discontinued,
sum(case when discontinued = FALSE then 1 else 0 end) as percentage_discontinued,
round(sum(case when discontinued = TRUE then 1 else 0 end) / count(product_id) * 100, 2) as percentage
from products;

结果:image-20211208174107118


代码:

1
2
3
4
5
6
7
8
# 按产品类别分组统计每类商品的缺货比例
select category_name,
count(case when discontinued is TRUE then product_id end) as count_,
count(product_id) as count_all,
round(count(case when discontinued is TRUE then product_id end) / count(product_id) * 100, 2)
from products p
join categories c on p.category_id = c.category_id
group by c.category_id, category_name;

结果:image-20211208174156869


统计总量并计算占比

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 统计2016年7月下订单的客户以及每个客户的消费金额占2016年7月总销售金额的占比
with temp AS (
SELECT sum(unit_price * quantity) as total_sales
FROM orders as o
join order_items oi on o.order_id = oi.order_id
WHERE order_date >= '2016-07-01'
AND order_date < '2016-08-01'
)
SELECT c.customer_id,
SUM(quantity * unit_price) AS revenue,
ROUND(SUM(quantity * unit_price) / temp.total_sales * 100, 2) AS revenue
FROM temp,customers as c
join orders o2 on c.customer_id = o2.customer_id
join order_items i on o2.order_id = i.order_id
WHERE order_date >= '2016-07-01'
AND order_date < '2016-08-01'
group by c.customer_id, c.company_name,temp.total_sales;

结果:image-20211208174233413


分组对比

按行比较

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 需求:统计运往北美地区和除北美地区外的订单数量。
with orders_by_group as (
select
order_id,
case
when ship_country in ('usa', 'canada', 'mexico')
then 'north america'
else 'other'
end as shipping_continent
from orders
)
select
shipping_continent,
count(order_id) as order_count
from orders_by_group
group by shipping_continent;

结果:image-20211208174403933


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 需求:统计“ WA ”地区的员工和所有其他员工处理的订单总量
# 结果显示两列:`employee_region`(员工所在区域,“ WA”或“ Not WA”)和`order_count`。
with name AS (
select o.order_id,
case when region='WA' then 'WA'
else 'Not WA' end as e_region
from employees as e
join orders o on e.employee_id = o.employee_id
)
SELECT
e_region,
count(e_region) as number_
FROM name
group by e_region;

结果:image-20211208174436755


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 需求:统计法国客户和其他国家/地区的客户消费的总金额(折扣后)
# 结果显示两列:`customer_country`(“ France”或“其他”)和 `discount_revenue`(四舍五入到小数点后两位
with temp AS (
select o.order_id,
case when country ='France' then 'France' else 'Other' end as customer_country

from orders as o
join customers c on o.customer_id = c.customer_id
)
SELECT
customer_country,
round(sum(quantity*unit_price* (1 - discount)), 2) as discount_revenue
FROM temp
join order_items on temp.order_id=order_items.order_id
group by customer_country;

结果:image-20211208174501522


按列比较

代码:

1
2
3
4
5
6
7
8
9
10
11
# 统计已经发货的订单数量和尚未发货的订单数量
select
count(case
when shipped_date is not null
then order_id
end) as orders_shipped,
count(case
when shipped_date is null
then order_id
end) as orders_pending
from orders;

结果:image-20211208174534465


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 需求:统计运往加拿大的订单中的全价出售的订单数量和打折订单的订单数量
# 显示两列:`full_price_count` 和 `discounted_price_count`
select
sum(case
when discount = 0
then 1
end) as full_price_count,
sum(case
when discount != 0
then 1
end) as discounted_price_count
from orders o
join order_items oi
on o.order_id = oi.order_id
where ship_country = 'canada';

结果:image-20211208174707133


多列对比

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 需求:统计每位员工处理的运往德语地区的订单总金额和运往其它国家的订单总金额,"dach_orders"(运往德国,奥地利或瑞士的订单),"other_orders"(运往所有其他国家的订单)。
# 结果显示以下列:`employee_id`,`first_name`,`last_name`,`dach_orders`,`other_orders`
# 员工ID,员工名字,员工姓氏,运往德语地区的订单数量,运往其他国家的订单数量
select
e.employee_id,
e.first_name,
e.last_name,
sum(case
when ship_country in ('switzerland', 'germany', 'austria')
then quantity * unit_price
else 0
end) as dach_orders,
sum(case
when ship_country not in ('switzerland', 'germany', 'austria')
then quantity * unit_price
else 0
end) as other_orders
from orders o
join order_items oi
on o.order_id = oi.order_id
join employees e
on o.employee_id = e.employee_id
group by e.employee_id,
e.first_name,
e.last_name;

结果:image-20211208174829874


占比对比

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 需求:统计ID为1和2的员工处理的订单中,不同收货国家的订单金额,
# 占发往该国的总订单金额百分比(折扣前)保留两位有效数字。查询结果显示3列:
# 订单要运送到的国家/地区(`ship_country`),`percentage_employee_1`-ID为1的员工所产生的收入占,
# `percentage_employee_2`-ID为2的员工所产生的收入占比
with revenue_employee_1 as (
select
ship_country,
sum(case
when employee_id = 1
then unit_price * quantity
else 0
end) as employee_revenue,
sum(unit_price * quantity) as total_revenue
from orders o
join order_items oi
on o.order_id = oi.order_id
group by ship_country),
revenue_employee_2 as (
select
ship_country,
sum(case
when employee_id = 2
then unit_price * quantity
else 0
end) as employee_revenue,
sum(unit_price * quantity) as total_revenue
from orders o
join order_items oi
on o.order_id = oi.order_id
group by ship_country)
select
re_1.ship_country,
round(re_1.employee_revenue / re_1.total_revenue * 100, 2) as percentage_employee_1,
round(re_2.employee_revenue / re_2.total_revenue * 100, 2) as percentage_employee_2
from revenue_employee_1 re_1
join revenue_employee_2 re_2
on re_1.ship_country = re_2.ship_country;

结果:image-20211208175608551


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# 统计每一个员工处理的,收货地为德国和美国的订单中,已发货的订单占已比。
with germany_orders as (
select
employee_id,
count(case
when shipped_date is not null
then order_id
end) as count_shipped,
count(order_id) as count_all
from orders o
where o.ship_country = 'germany'
group by employee_id
),
usa_orders as (
select
employee_id,
count(case
when shipped_date is not null
then order_id
end) as count_shipped,
count(order_id) as count_all
from orders o
where o.ship_country = 'usa'
group by employee_id
)
select
e.first_name,
e.last_name,
round(ge_or.count_shipped / ge_or.count_all * 100, 2) as germany_perc,
round(us_or.count_shipped / us_or.count_all * 100, 2) as usa_perc
from germany_orders ge_or
join usa_orders us_or
on ge_or.employee_id = us_or.employee_id
join employees e
on ge_or.employee_id = e.employee_id
or us_or.employee_id = e.employee_id;

结果:image-20211208175039518

使用sql进行数据汇总

image-20211207120208142

2-报表项目关系图


基本用法

代码:

1
2
3
4
5
6
7
8
9
# 查询每个客户`ID`, `company_name`, `contact_name`, `contact_title`, `city`, 
# 和 `country`,并按国家名排序
select company_name,
contact_name,
contact_title,
city,
country
from customers
order by country;

结果:image-20211207120440486


连接查询

代码:

1
2
3
4
5
6
7
8
#  列出所有提供了4种以上不同商品的供应商列表,
# 所需字段:`supplier_id`, `supplier_id`, 和 `supplier_id` (提供的商品种类数量)
select s.supplier_id,
company_name,
count(*) as prodects_count
from suppliers as s
join products p on s.supplier_id = p.supplier_id
group by company_name, s.supplier_id;

结果:image-20211207120529120


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#  提取订单编号为10250的订单详情,
# 显示如下信息:`product_name`, `quantity`, `unit_price` ( `order_items` 表),
# `discount` ,`order_date` 按商品名字排序
select product_name,
quantity,
oi.unit_price,
discount,
order_date
from orders as o
join order_items oi
on o.order_id = oi.order_id
join products p
on oi.product_id = p.product_id
where o.order_id = 10250;

结果:image-20211207120543574


代码:

1
2
3
4
5
6
7
8
9
10
11
# 提供订单编号为10248的相关信息,
# 包括product_name, unit_price (在 `order_items` 表中),
# quantity(数量), company_name(供应商公司名字 ,起别名 `supplier_name`)
select product_name,
oi.unit_price,
oi.quantity,
company_name as su_name
from order_items as oi
join suppliers as s on oi.product_id = s.supplier_id
join products p on s.supplier_id = p.supplier_id
where order_id = 10248;

结果:image-20211207120614620


代码:

1
2
3
4
5
6
7
8
9
10
11
# 提取每件商品的详细信息,包括  商品名称(`product_name`),
# 供应商的公司名称 (`company_name`,在 `suppliers` 表中), 类别名称 `category_name`,
# 商品单价`unit_price`, 和商品的单位`quantity per unit`。
select p.product_name,
s.company_name,
c.category_name,
p.unit_price,
p.quantity_per_unit
from products as p
join suppliers s on p.supplier_id = s.supplier_id
join categories c on p.category_id = c.category_id;

结果:


使用聚合

代码:

1
2
3
4
5
# 在销售报表中,我们需要计算订单的总付款额。
select sum(unit_price * quantity)
from orders as o
join order_items oi on o.order_id = oi.order_id
where o.order_id = 10250;

结果:image-20211207120656016


代码:

1
2
3
4
5
6
7
8
# 每个商品的折扣都存储在 `order_items` 表的`discount` 列中
# 例如,“ 0.20”折扣意味着客户支付原始价格的“ 1-0.2 = 0.8”
# 在上面的代码中添加第二个名为`total_price_after_discount`的列,计算打折后的商品价格
select sum(unit_price * quantity),
sum(unit_price * quantity * (1 - discount)) as dis
from orders as o
join order_items oi on o.order_id = oi.order_id
where o.order_id = 10250;

结果:image-20211207120720666


代码:

1
2
3
4
5
6
7
8
9
# 统计多个订单的总金额
select o.customer_id,
c.company_name,
SUM(unit_price * quantity) AS total_price
from orders as o
join customers c on o.customer_id = c.customer_id
join order_items oi on o.order_id = oi.order_id
where o.ship_country = 'France'
group by o.order_id, c.company_name;

结果:image-20211207120728878


结合count()

代码:

1
2
3
4
5
6
7
8
9
10
11
# 统计每个员工处理的订单总数
# 结果包含员工ID`employee_id`,姓名`first_name` 和 `last_name`,处理的订单总数(别名 `orders_count`)
select e.employee_id,
first_name,
last_name,
count(*) as order_count
from orders as o
join employees e on o.employee_id = e.employee_id
group by e.employee_id, e.first_name, e.last_name
order by e.employee_id;

结果:image-20211207120749793


代码:

1
2
3
4
5
6
7
8
9
# 统计每个类别中的库存产品值多少钱? 
# 显示三列:`category_id`, `category_name`, 和 `category_total_value`
# 如何计算库存商品总价:`SUM(unit_price * units_in_stock)`。
select c.category_id,
c.category_name,
SUM(unit_price * units_in_stock) AS category_total_value
from categories as c
join products p on c.category_id = p.category_id
group by c.category_id, c.category_name;

结果:image-20211207120804583


代码:

1
2
3
4
5
6
7
# 了解每个员工的业绩:计算每个员工的订单数量
select c.customer_id,
c.company_name,
count(*) as c_
from customers as c
join orders o on c.customer_id = o.customer_id
group by c.customer_id, company_name;

结果:image-20211207120828621


代码:

1
2
3
4
5
6
7
8
# 计算每个客户的下订单数
# 结果包含:用户id、用户公司名称、订单数量(`customer_id`, `company_name`, `orders_count` )
select c.customer_id,
company_name,
count(*)
from orders as o
join customers c on o.customer_id = c.customer_id
group by c.customer_id, c.company_name;

结果:image-20211207121053115


代码:

1
2
3
4
5
6
7
8
9
10
# 我们要统计发货到不同国家/地区的订单数量以及已经发货的订单数量
SELECT
ship_country,
COUNT(*) AS all_orders,
COUNT(shipped_date) AS shipped_orders
FROM orders
GROUP BY ship_country;
# COUNT(*)将计算ship_country中的所有订单
# COUNT(shipped_date)将仅计算shipped_date列值不为NULL的行
# 在我们的数据库中,shipped_date 列中的NULL表示尚未发货,COUNT(shipped_date)仅计算已经发货的订单

结果:image-20211207200703588


代码:

1
2
3
4
5
# 查询订单运送到西班牙的客户数量(去重)别名`number_of_companies`
SELECT
COUNT(DISTINCT customer_id) AS number_of_companies
FROM orders
WHERE ship_country = 'Spain';

结果:image-20211207200815006

注意:

image-20211207200827266


case when 和 group by 数据分组

基本用法

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
# 需求:我们要在报表中显示每种产品的库存量
# 但我们不想简单地将“ units_in_stock”列放在报表中。
# 报表中只需要一个总体级别,例如低,高
SELECT product_id,
product_name,
units_in_stock,
CASE
WHEN units_in_stock > 100 THEN 'high'
WHEN units_in_stock > 50 THEN 'moderate'
WHEN units_in_stock > 0 THEN 'low'
WHEN units_in_stock = 0 THEN 'none'
END AS availability
FROM products;

结果:image-20211207201053346


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 需求: 创建一个报表,统计员工的经验水平
# 显示字段:`first_name`, `last_name`, `hire_date`, 和 `experience`
# 经验字段(`experience` ):
# 'junior' 2014年1月1日以后雇用的员工
# 'middle' 在2013年1月1日之后至2014年1月1日之前雇用的员工
# 'senior' 2013年1月1日或之前雇用的员工
select first_name,
last_name,
case
WHEN hire_date > '2014-01-01' THEN 'junior'
WHEN hire_date > '2013-01-01' THEN 'middle'
WHEN hire_date <= '2013-01-01' THEN 'senior'
end as experience
from employees;

结果:image-20211207201109287


结合else

代码:

1
2
3
4
5
6
7
8
9
10
11
# 我们的商店要针对北美地区的用户做促销活动:任何运送到北美地区(美国,加拿大) 的包裹免运费,其他地方10.0.。
# 创建报表,查询订单编号为10720~10730 活动后的运费价格
select order_id,
customer_id,
ship_country,
case
when ship_country = 'USA' or ship_country = 'Canada' then 0.0
else 10.0
end
from orders
where order_id BETWEEN 10720 AND 10730;

结果:image-20211207201140170


group by中使用case when

代码:

1
2
3
4
5
6
7
8
# 在引入北美地区免运费的促销策略时,我们也想知道运送到北美地区和其它国家地区的订单数量。
SELECT CASE
WHEN ship_country = 'USA' OR ship_country = 'Canada' THEN 0.0
ELSE 10.0
END AS shipping_cost,
COUNT(*) AS order_count
FROM orders
GROUP BY shipping_cost;

结果:image-20211207201226266


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 需求:创建报表统计供应商来自那个大洲
# 报表中包含两个字段:供应商来自哪个大洲(`supplier_continent` )和 供应产品种类数量(`product_count`)
# 供应商来自哪个大洲(`supplier_continent` )包含如下取值:
# 'North America' (供应商来自 'USA' 和 'Canada')
# 'Asia' (供应商来自 'Japan' 和 'Singapore')
# 'Other' (其它国家)
SELECT CASE
WHEN country in ('USA', 'Canada') THEN 'North America'
WHEN country in ('Japan', 'Singapore') THEN 'Asia'
ELSE 'Other'
END AS 'supplier_continent',
COUNT(*) AS 'product_count'
FROM suppliers s
JOIN products p
ON s.supplier_id = p.supplier_id
GROUP BY supplier_continent;

结果:image-20211207201248069


将 case when 和 count 结合

代码:

1
2
3
4
5
6
7
8
9
10
11
#需求:Washington (WA) 是 Northwind的主要运营地区,统计有多少订单是由华盛顿地区的员工处理的,多少订单是有其它地区的员工处理的
#结果字段: `orders_wa_employees` 和 `orders_not_wa_employees`
select COUNT(CASE
WHEN region = 'WA' THEN order_id
END) AS orders_wa_employees,
COUNT(CASE
WHEN region != 'WA' THEN order_id
END) AS orders_not_wa_employees

from employees as e
join orders o on e.employee_id = o.employee_id;

结果:image-20211207201325696


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 需求:创建报表,统计不同类别产品的库存量,将库存量分成两类 >30 和 <=30 两档分别统计数量
# 报表包含三个字段:
# 类别名称 `category_name`
# 库存充足 `high_availability`
# 库存紧张 `low_availability`
SELECT c.category_name,
COUNT(CASE
WHEN units_in_stock > 30 THEN product_id
END) AS high_availability,
COUNT(CASE
WHEN units_in_stock <= 30 THEN product_id
END) AS low_availability
FROM products p
JOIN categories c
ON p.category_id = c.category_id
GROUP BY c.category_id,
c.category_name;

结果:image-20211207201347388


sum 和 count的转换

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# count
SELECT COUNT(CASE
WHEN region = 'WA' THEN order_id
END) AS orders_wa_employees,
COUNT(CASE
WHEN region != 'WA' THEN order_id
END) AS orders_not_wa_employees
FROM employees e
JOIN orders o
ON e.employee_id = o.employee_id;


# sum
SELECT SUM(CASE
WHEN region = 'WA' THEN 1
END) AS orders_wa_employees,
SUM(CASE
WHEN region != 'WA' THEN 1
END) AS orders_not_wa_employees
FROM employees e
JOIN orders o
ON e.employee_id = o.employee_id;

结果:image-20211207201441068


sum中使用case when进行计算

代码:

1
2
3
4
5
6
7
8
9
10
11
# 需求:我们现在要统计每个订单的总付款额以及非素食产品的总付款额。
# 注:非素食产品的产品ID ( `category_id`) 是 6 和 8
select o.order_id,
sum(oi.quantity * oi.unit_price * (1 - oi.discount)) as total_price,
sum(case
when p.category_id in (6, 8) then oi.quantity * oi.unit_price * (1 - oi.discount)
else 0 end) AS non_vegetarian_price
from orders as o
join order_items oi on o.order_id = oi.order_id
join products p on oi.product_id = p.product_id
group by o.order_id;

结果:image-20211207201548507


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# 需求:输出报表,统计不同供应商供应商品的总库存量,以及高价值商品的库存量(单价超过40定义为高价值)
# 结果显示四列:
# 供应商ID `supplier_id`
# 供应商公司名 `company_name`
# 由该供应商提供的总库存 `all_units`
# 由该供应商提供的高价值商品库存 `expensive_units`
SELECT
s.supplier_id,
s.company_name,
SUM(units_in_stock) AS all_units,
SUM(CASE
WHEN unit_price > 40.0 THEN units_in_stock
ELSE 0
END) AS expensive_units
FROM products p
JOIN suppliers s
ON p.supplier_id = s.supplier_id
GROUP BY s.supplier_id,
s.company_name;

结果:image-20211207203640473


综合练习

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#三道练习题

# 作业1:创建报表来为每种商品添加价格标签,
# 贵、中等、便宜,结果包含如下字段:`product_id`, `product_name`, `unit_price`, 和 `price_level`
# 价格等级 `price_level` 的取值说明:
# 'expensive' 单价高于100的产品
# 'average' 单价高于40但不超过100的产品
# 'cheap' 其他产品
select product_id,
product_name,
unit_price,
case
when unit_price > 100 THEN 'expensive'
when unit_price > 40 THEN 'average'
else 'cheap'
end
from products
group by product_id,
product_name,
unit_price;

# 作业2:制作报表统计所有订单的总价(不计任何折扣)对它们进行分类。
# 包含一下字段:(order_id, total_price(折扣前), price_group )
# 字段 price_group 取值说明:
# 总价超过2000美元
# 'average',总价在$ 600到$ 2,000之间,包括两端
# 'low' 总价低于$ 600
select o.order_id,
sum(product_id * unit_price) as total_price,
case
when sum(product_id * unit_price) > 2000 then 'average'
when sum(product_id * unit_price) <= 2000 and sum(product_id * unit_price) >= 600 then 'average'
else 'low'
end as 'price_group'
from orders as o
join order_items oi on o.order_id = oi.order_id
group by order_id;

# 作业3:需求:统计所有订单的运费,将运费高低分为三档
# 报表中包含三个字段:
# `low_freight` `freight`值小于“ 40.0”的订单数
# `avg_freight` `freight`值大于或等于“ 40.0”但小于“ 80.0”的订单数
# `high_freight ` `freight`值大于或等于“ 80.0”的订单数

SELECT COUNT(CASE WHEN freight >= 80.0 THEN order_id END) AS high_freight,
COUNT(CASE WHEN freight < 40.0 THEN order_id END) AS low_freight,
COUNT(CASE WHEN freight >= 40.0 AND freight < 80.0 THEN order_id END) AS avg_freight
FROM orders;

结果:略


使用 with(CTE) 公用表达式

基本用法

代码:

1
2
3
4
5
6
7
8
9
10
# 需求: 统计每个类别中的平均产品数量`avg_product_count`.是多少?
# 通过CTE实现
with category_count AS (
select category_id,
count(product_id) as count_
from products
group by category_id
)
SELECT avg(count_) as avg_
FROM category_count;

结果:image-20211208092808538


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 查找来自加拿大的每个客户的平均订单价值
WITH order_total_prices AS (
SELECT o.order_id,
o.customer_id,
SUM(unit_price * quantity) AS total_price
FROM orders o
JOIN order_items oi
ON o.order_id = oi.order_id
GROUP BY o.order_id, o.customer_id
)
SELECT c.customer_id,
c.company_name,
AVG(total_price) AS avg_total_price
FROM order_total_prices OTP
JOIN customers c
ON OTP.customer_id = c.customer_id
WHERE c.country = 'Canada'
GROUP BY c.customer_id, c.company_name;

结果:image-20211208103607010


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 需求:创建报表,统计华盛顿地区(WA)每位员工所处理订单的订单平均价格
# 报表中包含如下列:
# `employee_id`, `first_name`, `last_name`, 和 `avg_total_price`
# 员工ID,名字,姓氏,平均订单总价
# 提示:
# 通过CTE计算所有订单的总价格,并将处理该订单的员工ID一起返回
# 在外部查询中,将CTE与`employees`表JOIN起来,计算平均值,显示所有所需信息,并按地区过滤结果
with name AS (
# your CTE
SELECT o.order_id,
employee_id,
sum(unit_price * quantity) as price
FROM orders as o
join order_items oi on o.order_id = oi.order_id
group by o.order_id, employee_id
)
SELECT e.employee_id,
e.first_name,
e.last_name,
avg(price) as avg_
FROM name t
join employees e on t.employee_id = e.employee_id
WHERE e.region = 'WA'
group by e.employee_id,
e.first_name,
e.last_name;

结果:image-20211208103634104


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 需求:创建报表,统计运输到不同国家/地区的订单中,不同商品的平均数量
# 报表包含`ship_country`和`avg_distinct_item_count`列,并按数量降序排列
with name AS (
select o.order_id,
ship_country,
count(distinct product_id) as dis_
from orders as o
join order_items oi on o.order_id = oi.order_id
group by o.order_id, o.ship_country
)
SELECT ship_country,
avg(dis_) as avg_
FROM name
group by ship_country
order by avg_ desc;

结果:image-20211208103657117


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 需求:创建报表,统计每位员工2016年所有订单中处理的平均物品数量
# 每一个订单中有多个订单明细项在 `order_items` 表中记录
# `order_items` 表中的`quantity` 字段记录了订单中每件商品购买的商品数量
# 订单中的物品数量用订单明细表中的`quantity` 求和进行计算
# 报表中包含如下列:
# `first_name`, `last_name`, `avg_item_count`
# 员工名字,员工姓氏,订单平均物品数量
with order_ AS (
# your CTE
select o.order_id,
o.employee_id,
o.order_date,
sum(quantity) as sum_
from orders as o
join order_items oi on o.order_id = oi.order_id
WHERE o.order_date >= '2016-01-01'
AND o.order_date < '2017-01-01'

group by o.order_id, o.employee_id
)
SELECT first_name,
last_name,
avg(sum_) as avg_
FROM order_
join employees e on order_.employee_id = e.employee_id
group by e.employee_id;

结果:image-20211208121904126


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 我们还可以将多级聚合与自定义分类结合在一起
# 需求:通过用户下单数量对用户分三组
# 订单数少于10的客户
# 订单数为10–20的客户
# 订单数超过20的客户
with customer as (
select customer_id,
CASE
WHEN COUNT(o.order_id) > 20
THEN 'more than 20'
WHEN COUNT(o.order_id) <= 20 AND COUNT(o.order_id) >= 10
THEN 'between 10 and 20'
ELSE 'less than 10'
END AS order_count_cat
from orders as o
group by customer_id
)
select order_count_cat,
count(customer_id) as count_
from customer
group by order_count_cat;

结果:image-20211208103727040


与 case when 组合使用

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# 需求:创建报表统计高价值和低价值客户的数量
# 客户在折扣前支付的所有订单的总价大于 $20,000 ,则将该客户视为“高价值”
# 否则,将它们视为“低值”
# 报表中包含两个字段
# category ( 'high-value' 或 'low-value'), `customer_count`
# 类别(“高价值”或“低价值”)和 “用户数量”
with name AS (
select o.customer_id,
sum(unit_price * quantity) as sum_,
case
when sum(unit_price * quantity) > 20000 THEN 'high-value'
ELSE 'low-value'
end as category
from orders as o
join order_items oi
on o.order_id = oi.order_id
group by o.customer_id
)
SELECT category,
count(customer_id) as number_
FROM name
group by category
order by category;

结果:image-20211208110453477


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 我们还可以将多级聚合与自定义分类结合在一起
# 需求:通过用户下单数量对用户分三组
# 订单数少于10的客户
# 订单数为10–20的客户
# 订单数超过20的客户
with customer as (
select customer_id,
CASE
WHEN COUNT(o.order_id) > 20
THEN 'more than 20'
WHEN COUNT(o.order_id) <= 20 AND COUNT(o.order_id) >= 10
THEN 'between 10 and 20'
ELSE 'less than 10'
END AS order_count_cat
from orders as o
group by customer_id
)
select order_count_cat,
count(customer_id) as count_
from customer
group by order_count_cat;

结果:image-20211208122047872


三层聚合

with子句必须在引用的select语句 之前定义 ,同级with关键字 只能使用一次,多个只能 用逗号分割;最后一个with 子句与下面的查询之间不能有逗号,只通过右括号分割, with 子句的查询必须用括号括起来。

如果定义了with子句,但其后没有跟select查询,则会报错!

前面的with子句定义的查询在后面的with子句中可以使用。但是一个with子句内部不能嵌套with子句!

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 创建报表查找所有处理的订单数量大于平均水平的员工
with order_count_employees as (
select employee_id,
count(order_id) as order_count
from orders
group by employee_id),

avg_order_count as (
select avg(order_count) as avg_order_count
from order_count_employees
)
select employee_id,
order_count,
avg_order_count
from order_count_employees,
avg_order_count
where order_count > avg_order_count;

结果:image-20211208122222300


计算多指标

基础用法

代码:

1
2
3
4
5
6
7
8
9
10
11
12
# 需求:创建报表计算2016年的绩效,计算每位员工处理的订单的总数和总收入
# 报表包含如下字段:`first_name` 员工的名字,`last_name` 员工姓氏,`order_count` 员工处理的订单总数,`order_revenue` 订单处理的订单总收入
select first_name,
last_name,
count(o.order_id) as order_count,
sum(unit_price * quantity) as order_revenue
from employees as e
left join orders o on e.employee_id = o.employee_id
left join order_items oi on o.order_id = oi.order_id
where o.order_date >= '2016-01-01'
and o.order_date < '2017-01-01'
group by o.order_id, first_name, last_name;

结果:image-20211208173831052


自定义指标

代码:

1
2
3
4
5
6
7
8
# 创建报表:对于每个类别,请显示库存产品的数量(即“ units_in_stock> 0”的产品)和未库存的产品数量。
# 该报告应包含三列:`category_name`,`products_in_stock`,`products_not_in_stock`
select category_name,
sum(case when units_in_stock > 0 then 1 else 0 end) as products_in_stock,
sum(case when units_in_stock = 0 then 1 else 0 end) as products_not_in_stock
from categories
join products p on categories.category_id = p.category_id
group by p.category_id;

结果:image-20211208173853993


计算百分比

代码:

1
2
3
4
5
6
7
8
# 需求:统计所有订单中已经出货的百分比
select count(case
when shipped_date is not null then order_id end) as not_null,
count(order_id) as all_,
concat(round(count(case
when shipped_date is not null then order_id end)
/ count(order_id) * 100, 2), '%') as ratio
from orders;

结果:image-20211208174032866


代码:

1
2
3
4
5
6
7
# 需求:统计缺货商品的百分比
# 显示三列:`count_discontinued`,`count_all`和`percentage_discontinued` 保留两位有效数字
select count(product_id) as count_all,
sum(case when discontinued = TRUE then 1 else 0 end) as count_discontinued,
sum(case when discontinued = FALSE then 1 else 0 end) as percentage_discontinued,
round(sum(case when discontinued = TRUE then 1 else 0 end) / count(product_id) * 100, 2) as percentage
from products;

结果:image-20211208174107118


代码:

1
2
3
4
5
6
7
8
# 按产品类别分组统计每类商品的缺货比例
select category_name,
count(case when discontinued is TRUE then product_id end) as count_,
count(product_id) as count_all,
round(count(case when discontinued is TRUE then product_id end) / count(product_id) * 100, 2)
from products p
join categories c on p.category_id = c.category_id
group by c.category_id, category_name;

结果:image-20211208174156869


统计总量并计算占比

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 统计2016年7月下订单的客户以及每个客户的消费金额占2016年7月总销售金额的占比
with temp AS (
SELECT sum(unit_price * quantity) as total_sales
FROM orders as o
join order_items oi on o.order_id = oi.order_id
WHERE order_date >= '2016-07-01'
AND order_date < '2016-08-01'
)
SELECT c.customer_id,
SUM(quantity * unit_price) AS revenue,
ROUND(SUM(quantity * unit_price) / temp.total_sales * 100, 2) AS revenue
FROM temp,customers as c
join orders o2 on c.customer_id = o2.customer_id
join order_items i on o2.order_id = i.order_id
WHERE order_date >= '2016-07-01'
AND order_date < '2016-08-01'
group by c.customer_id, c.company_name,temp.total_sales;

结果:image-20211208174233413


分组对比

按行比较

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 需求:统计运往北美地区和除北美地区外的订单数量。
with orders_by_group as (
select
order_id,
case
when ship_country in ('usa', 'canada', 'mexico')
then 'north america'
else 'other'
end as shipping_continent
from orders
)
select
shipping_continent,
count(order_id) as order_count
from orders_by_group
group by shipping_continent;

结果:image-20211208174403933


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 需求:统计“ WA ”地区的员工和所有其他员工处理的订单总量
# 结果显示两列:`employee_region`(员工所在区域,“ WA”或“ Not WA”)和`order_count`。
with name AS (
select o.order_id,
case when region='WA' then 'WA'
else 'Not WA' end as e_region
from employees as e
join orders o on e.employee_id = o.employee_id
)
SELECT
e_region,
count(e_region) as number_
FROM name
group by e_region;

结果:image-20211208174436755


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 需求:统计法国客户和其他国家/地区的客户消费的总金额(折扣后)
# 结果显示两列:`customer_country`(“ France”或“其他”)和 `discount_revenue`(四舍五入到小数点后两位
with temp AS (
select o.order_id,
case when country ='France' then 'France' else 'Other' end as customer_country

from orders as o
join customers c on o.customer_id = c.customer_id
)
SELECT
customer_country,
round(sum(quantity*unit_price* (1 - discount)), 2) as discount_revenue
FROM temp
join order_items on temp.order_id=order_items.order_id
group by customer_country;

结果:image-20211208174501522


按列比较

代码:

1
2
3
4
5
6
7
8
9
10
11
# 统计已经发货的订单数量和尚未发货的订单数量
select
count(case
when shipped_date is not null
then order_id
end) as orders_shipped,
count(case
when shipped_date is null
then order_id
end) as orders_pending
from orders;

结果:image-20211208174534465


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 需求:统计运往加拿大的订单中的全价出售的订单数量和打折订单的订单数量
# 显示两列:`full_price_count` 和 `discounted_price_count`
select
sum(case
when discount = 0
then 1
end) as full_price_count,
sum(case
when discount != 0
then 1
end) as discounted_price_count
from orders o
join order_items oi
on o.order_id = oi.order_id
where ship_country = 'canada';

结果:image-20211208174707133


多列对比

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 需求:统计每位员工处理的运往德语地区的订单总金额和运往其它国家的订单总金额,"dach_orders"(运往德国,奥地利或瑞士的订单),"other_orders"(运往所有其他国家的订单)。
# 结果显示以下列:`employee_id`,`first_name`,`last_name`,`dach_orders`,`other_orders`
# 员工ID,员工名字,员工姓氏,运往德语地区的订单数量,运往其他国家的订单数量
select
e.employee_id,
e.first_name,
e.last_name,
sum(case
when ship_country in ('switzerland', 'germany', 'austria')
then quantity * unit_price
else 0
end) as dach_orders,
sum(case
when ship_country not in ('switzerland', 'germany', 'austria')
then quantity * unit_price
else 0
end) as other_orders
from orders o
join order_items oi
on o.order_id = oi.order_id
join employees e
on o.employee_id = e.employee_id
group by e.employee_id,
e.first_name,
e.last_name;

结果:image-20211208174829874


占比对比

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 需求:统计ID为1和2的员工处理的订单中,不同收货国家的订单金额,
# 占发往该国的总订单金额百分比(折扣前)保留两位有效数字。查询结果显示3列:
# 订单要运送到的国家/地区(`ship_country`),`percentage_employee_1`-ID为1的员工所产生的收入占,
# `percentage_employee_2`-ID为2的员工所产生的收入占比
with revenue_employee_1 as (
select
ship_country,
sum(case
when employee_id = 1
then unit_price * quantity
else 0
end) as employee_revenue,
sum(unit_price * quantity) as total_revenue
from orders o
join order_items oi
on o.order_id = oi.order_id
group by ship_country),
revenue_employee_2 as (
select
ship_country,
sum(case
when employee_id = 2
then unit_price * quantity
else 0
end) as employee_revenue,
sum(unit_price * quantity) as total_revenue
from orders o
join order_items oi
on o.order_id = oi.order_id
group by ship_country)
select
re_1.ship_country,
round(re_1.employee_revenue / re_1.total_revenue * 100, 2) as percentage_employee_1,
round(re_2.employee_revenue / re_2.total_revenue * 100, 2) as percentage_employee_2
from revenue_employee_1 re_1
join revenue_employee_2 re_2
on re_1.ship_country = re_2.ship_country;

结果:image-20211208175608551


代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# 统计每一个员工处理的,收货地为德国和美国的订单中,已发货的订单占已比。
with germany_orders as (
select
employee_id,
count(case
when shipped_date is not null
then order_id
end) as count_shipped,
count(order_id) as count_all
from orders o
where o.ship_country = 'germany'
group by employee_id
),
usa_orders as (
select
employee_id,
count(case
when shipped_date is not null
then order_id
end) as count_shipped,
count(order_id) as count_all
from orders o
where o.ship_country = 'usa'
group by employee_id
)
select
e.first_name,
e.last_name,
round(ge_or.count_shipped / ge_or.count_all * 100, 2) as germany_perc,
round(us_or.count_shipped / us_or.count_all * 100, 2) as usa_perc
from germany_orders ge_or
join usa_orders us_or
on ge_or.employee_id = us_or.employee_id
join employees e
on ge_or.employee_id = e.employee_id
or us_or.employee_id = e.employee_id;

结果:image-20211208175039518