-- 比较运算符 SELECT*FROM employees WHERE salary >50000; SELECT*FROM employees WHERE department ='IT'; SELECT*FROM employees WHERE age <>30;
-- 逻辑运算符 SELECT*FROM employees WHERE salary >50000AND department ='IT'; SELECT*FROM employees WHERE department ='IT'OR department ='HR'; SELECT*FROM employees WHERENOT department ='IT';
-- BETWEEN SELECT*FROM employees WHERE salary BETWEEN40000AND60000;
-- IN SELECT*FROM employees WHERE department IN ('IT', 'HR', 'Finance');
-- LIKE (模糊匹配) SELECT*FROM employees WHERE name LIKE'John%'; -- 以John开头 SELECT*FROM employees WHERE name LIKE'%son'; -- 以son结尾 SELECT*FROM employees WHERE name LIKE'%oh%'; -- 包含oh
-- IS NULL SELECT*FROM employees WHERE phone ISNULL; SELECT*FROM employees WHERE phone ISNOTNULL;
-- 基本语法 SELECT a.column1, b.column2, ... FROM table_name a JOIN table_name b ON a.common_column = b.common_column;
-- employees -- | id | name | manager_id | -- |----|---------|------------| -- | 1 | Alice | NULL | -- | 2 | Bob | 1 | -- | 3 | Charlie | 1 | -- | 4 | David | 2 |
SELECT e.name AS employee_name, m.name AS manager_name FROM employees e LEFTJOIN employees m ON e.manager_id = m.id;
-- 查询结果 -- | employee_name | manager_name | -- |---------------|--------------| -- | Alice | NULL | -- | Bob | Alice | -- | Charlie | Alice | -- | David | Bob |
注意: Alice 是最高层级的员工,没有经理,所以 manager_name 显示为 NULL。其他员工都能找到对应的经理。
-- 查找同一部门的员工对 SELECT e1.name AS employee1, e2.name AS employee2 FROM employees e1 JOIN employees e2 ON e1.department_id = e2.department_id WHERE e1.id < e2.id; -- 避免重复和自己匹配自己
-- 查找没有下属的员工 SELECT e.name FROM employees e LEFTJOIN employees m ON e.id = m.manager_id WHERE m.id ISNULL;
-- 查找工资比某个特定员工高的所有员工 SELECT e2.name, e2.salary FROM employees e1 JOIN employees e2 ON e2.salary > e1.salary WHERE e1.name ='Alice';
自连接必须使用表别名来区分同一表的不同实例
通常使用 LEFT JOIN 来包含没有匹配的行(如没有经理的员工)
注意避免笛卡尔积,确保有正确的 ON 条件
自连接可能影响性能,大表需要注意优化
17. FULL OUTER JOIN
FULL OUTER JOIN全外连接是SQL中的一种联接操作,它返回两个表中所有符合联接条件的记录,以及不符合联接条件的记录。与INNER JOIN只返回两个表中有匹配记录的行不同
FULL OUTER JOIN会返回两个表中的所有记录,如果在某一表中没有匹配的记录,则该表的列会返回NULL
-- 基本语法 SELECT column1, column2, ... FROM table1 FULLOUTERJOIN table2 ON table1.common_column = table2.common_column;
-- employees -- | id | name | department_id | -- |----|---------|---------------| -- | 1 | Alice | 1 | -- | 2 | Bob | 2 | -- | 3 | Charlie | NULL |
-- 查询每个员工最新的项目 SELECT e.name, p.project_name, p.start_date FROM employees e JOINLATERAL ( SELECT project_name, start_date FROM projects pr WHERE pr.employee_id = e.id ORDERBY start_date DESC LIMIT 1 ) p ONtrue;
-- 查询结果 -- | name | project_name | start_date | -- |-------|--------------|------------| -- | Alice | Project Y | 2023-06-01 | -- | Bob | Project Z | 2022-09-01 |
子查询可以引用外部表 e.id 对每个员工独立执行子查询 ON true 表示无额外联接条件 Alice 的最新项目是 Project Y,Bob 的是 Project Z
常见应用场景
-- 获取每个部门工资最高的3名员工 SELECT d.department_name, e.name, e.salary FROM departments d JOINLATERAL ( SELECT name, salary FROM employees WHERE department_id = d.id ORDERBY salary DESC LIMIT 3 ) e ONtrue;
-- 计算每个客户的最近5次订单总额 SELECT c.name, orders.total FROM customers c JOINLATERAL ( SELECTSUM(amount) as total FROM orders WHERE customer_id = c.id ORDERBY order_date DESC LIMIT 5 ) orders ONtrue;
-- 使用 LEFT JOIN LATERAL 包含没有项目的员工 SELECT e.name, COALESCE(p.project_name, 'No Projects') as project FROM employees e LEFTJOINLATERAL ( SELECT project_name FROM projects pr WHERE pr.employee_id = e.id ORDERBY start_date DESC LIMIT 1 ) p ONtrue;
数据库支持: PostgreSQL 9.3+, MySQL 8.0+ (使用 LATERAL 关键字)
性能: 对左表的每一行都执行子查询,大表时需注意性能
LEFT JOIN LATERAL: 允许子查询返回空结果,类似于 LEFT JOIN
ON true: 当子查询已经包含所有过滤条件时,使用 ON true
替代方案: 某些场景可以用窗口函数或普通 JOIN 替代
数据库兼容性
PostgreSQL: 完全支持 LATERAL
MySQL 8.0+: 支持 LATERAL(之前版本不支持)
SQL Server: 使用 CROSS APPLY 和 OUTER APPLY 代替
Oracle: 使用 CROSS APPLY 和 OUTER APPLY 代替
数据分组
21. GROUP BY 分组
GROUP BY是SQL中的一个子句,用于将查询结果按一个或多个列进行分组。分组操作通常与聚合函数(如COUNT、SUM、AVG、MAX、MIN等)结合使用,以便对每个分组进行汇总计算。GROUP BY允许你将数据按特定的维度进行聚合,进而对这些聚合后的数据进行分析。
-- 统计每个产品的销售次数和总金额 SELECT product_id, COUNT(*) AS sale_count, SUM(quantity * price) AS total_revenue FROM sales GROUPBY product_id;
-- 按多个列分组 SELECT product_id, price, SUM(quantity) AS total_quantity FROM sales GROUPBY product_id, price;
-- 结合WHERE过滤 SELECT product_id, SUM(quantity) AS total_quantity FROM sales WHERE price >10 GROUPBY product_id;
常用聚合函数
-- COUNT: 计数 SELECT department_id, COUNT(*) AS employee_count FROM employees GROUPBY department_id;
-- SUM: 求和 SELECT department_id, SUM(salary) AS total_salary FROM employees GROUPBY department_id;
-- AVG: 平均值 SELECT department_id, AVG(salary) AS avg_salary FROM employees GROUPBY department_id;
-- MAX/MIN: 最大值/最小值 SELECT department_id, MAX(salary) AS max_salary, MIN(salary) AS min_salary FROM employees GROUPBY department_id;
-- 多个聚合函数组合 SELECT department_id, COUNT(*) AS employee_count, AVG(salary) AS avg_salary, MAX(salary) AS max_salary FROM employees GROUPBY department_id;
SELECT中的列: SELECT子句中出现的非聚合列必须在GROUP BY中列出
执行顺序: WHERE → GROUP BY → 聚合函数 → HAVING → SELECT
NULL值处理: NULL值会被分为一组
性能: 在分组列上建立索引可以提高性能
错误示例
-- ❌ 错误: name 不在 GROUP BY 中,也不是聚合函数 SELECT department_id, name, COUNT(*) FROM employees GROUPBY department_id;
-- ✅ 正确: 所有非聚合列都在 GROUP BY 中 SELECT department_id, name, COUNT(*) FROM employees GROUPBY department_id, name;
-- ✅ 正确: 只选择分组列和聚合结果 SELECT department_id, COUNT(*) AS employee_count FROM employees GROUPBY department_id;
-- sales -- | region | product | sale_amount | -- |--------|---------|-------------| -- | North | A | 100 | -- | North | B | 150 | -- | South | A | 200 | -- | South | B | 250 |
-- 生成所有可能的分组组合 SELECT region, product, SUM(sale_amount) AS total_sales FROM sales GROUPBYCUBE (region, product);
-- 查询结果 -- | region | product | total_sales | -- |--------|---------|-------------| -- | North | A | 100 | ← (region, product) -- | North | B | 150 | ← (region, product) -- | North | NULL | 250 | ← (region) -- | South | A | 200 | ← (region, product) -- | South | B | 250 | ← (region, product) -- | South | NULL | 450 | ← (region) -- | NULL | A | 300 | ← (product) -- | NULL | B | 400 | ← (product) -- | NULL | NULL | 700 | ← ()
-- sales -- | region | product | sale_amount | -- |--------|---------|-------------| -- | North | A | 100 | -- | North | B | 150 | -- | South | A | 200 | -- | South | B | 250 |
-- 按区域和产品生成层次汇总 SELECT region, product, SUM(sale_amount) AS total_sales FROM sales GROUPBYROLLUP (region, product);
-- 查询结果 -- | region | product | total_sales | -- |--------|---------|-------------| -- | North | A | 100 | ← (region, product) -- | North | B | 150 | ← (region, product) -- | North | NULL | 250 | ← (region) 小计 -- | South | A | 200 | ← (region, product) -- | South | B | 250 | ← (region, product) -- | South | NULL | 450 | ← (region) 小计 -- | NULL | NULL | 700 | ← () 总计
-- 基本语法 -- WHERE子句中的子查询 SELECT column1, column2 FROM table1 WHERE column1 IN (SELECT column1 FROM table2);
-- FROM子句中的子查询 SELECT* FROM (SELECT column1, column2 FROM table1) AS subquery;
-- SELECT子句中的子查询 SELECT column1, (SELECTMAX(column2) FROM table2) AS max_value FROM table1;
-- employees -- | id | name | salary | department_id | -- |----|---------|--------|---------------| -- | 1 | Alice | 60000 | 1 | -- | 2 | Bob | 55000 | 2 | -- | 3 | Charlie | 70000 | 1 | -- | 4 | David | 50000 | 2 |
-- departments -- | id | name | -- |----|------------| -- | 1 | IT | -- | 2 | HR |
-- WHERE子句:查询工资高于平均工资的员工 SELECT name, salary FROM employees WHERE salary > (SELECTAVG(salary) FROM employees);
-- 查询结果 -- | name | salary | -- |---------|--------| -- | Alice | 60000 | -- | Charlie | 70000 |
-- FROM子句:查询每个部门的平均工资 SELECT d.name, avg_salary FROM departments d JOIN ( SELECT department_id, AVG(salary) AS avg_salary FROM employees GROUPBY department_id ) dept_avg ON d.id = dept_avg.department_id;
-- SELECT子句:显示每个员工及其部门的平均工资 SELECT name, salary, (SELECTAVG(salary) FROM employees e2 WHERE e2.department_id = e1.department_id) AS dept_avg FROM employees e1;
-- IN子句:查询IT部门的员工 SELECT name, salary FROM employees WHERE department_id IN ( SELECT id FROM departments WHERE name ='IT' );
-- EXISTS子句:查询有员工的部门 SELECT name FROM departments d WHEREEXISTS ( SELECT1FROM employees e WHERE e.department_id = d.id );
ANY/ALL 操作符
-- ANY:满足任意一个条件 SELECT name, salary FROM employees WHERE salary >ANY (SELECT salary FROM employees WHERE department_id =2);
-- ALL:满足所有条件 SELECT name, salary FROM employees WHERE salary >ALL (SELECT salary FROM employees WHERE department_id =2);
子查询必须用括号包裹
SELECT子句中的子查询必须返回单个值
FROM子句中的子查询必须有别名
相关子查询会对外部查询的每一行执行一次,性能较差
能用JOIN的地方优先用JOIN,通常性能更好
31. ANY
ANY运算符用于将一个值与子查询返回的一组值进行比较。如果子查询的任何值满足条件,则返回true。
-- 基本形式 SELECT title FROM film WHERE length >ANY( SELECT length FROM film WHERE category_id =1 );
-- 查找每个类别的最大电影时长 SELECTMAX(length) FROM film INNERJOIN film_category USING(film_id) GROUPBY category_id;
-- 查找长度大于等于任何类别最大长度的电影 SELECT title FROM film WHERE length >=ANY( SELECTMAX(length) FROM film INNERJOIN film_category USING(film_id) GROUPBY category_id );
-- 基本形式 SELECT title FROM film WHERE length >ALL( SELECT length FROM film WHERE category_id =1 );
ALL 运算符前面必须有比较运算符:=、!=、>、>=、<、<=
ALL 运算符后面必须跟一个子查询,子查询也必须用括号括起来
-- 查询按电影评级分组的平均长度 SELECT ROUND(AVG(length), 2) AS avg_length FROM film GROUPBY rating ORDERBY avg_length DESC;
-- 查找长度大于所有平均长度的电影 SELECT film_id, title, length FROM film WHERE length >ALL( SELECT ROUND(AVG(length), 2) FROM film GROUPBY rating ) ORDERBY length;
子查询必须返回单列
如果子查询返回空结果集,ALL 运算符始终返回 true
> ALL 等价于”大于最大值”
< ALL 等价于”小于最小值”
!= ALL 等价于 NOT IN
33. EXISTS
EXISTS运算符是一个布尔运算符,用于测试子查询中是否存在行。
-- 基本形式 SELECT column1 FROM table_1 WHEREEXISTS( SELECT1 FROM table_2 WHERE column_2 = table_1.column_1 );
-- 查找至少有一笔付款金额大于11的客户 SELECT first_name, last_name FROM customer c WHEREEXISTS( SELECT1 FROM payment p WHERE p.customer_id = c.customer_id AND amount >11 ) ORDERBY first_name, last_name;
-- NOT EXISTS: 查找没有支付过金额大于11的客户 SELECT first_name, last_name FROM customer c WHERENOTEXISTS( SELECT1 FROM payment p WHERE p.customer_id = c.customer_id AND amount >11 ) ORDERBY first_name, last_name;
-- 基本形式 WITH cte_film AS ( SELECT film_id, title, CASE WHEN length <30THEN'Short' WHEN length <90THEN'Medium' ELSE'Long' ENDAS length FROM film ) SELECT film_id, title, length FROM cte_film WHERE length ='Long' ORDERBY title;
首先,指定 CTE 的名称,后跟可选的列列表
其次,在WITH子句主体内,指定返回结果集的查询
第三,在SELECT、INSERT、UPDATE或DELETE等语句中,将 CTE 用作表或视图
-- 将 CTE 与表连接 WITH cte_rental AS ( SELECT staff_id, COUNT(rental_id) AS rental_count FROM rental GROUPBY staff_id ) SELECT s.staff_id, first_name, last_name, rental_count FROM staff s INNERJOIN cte_rental USING (staff_id);
-- 将 CTE 与窗口函数结合使用 WITH cte_film AS ( SELECT film_id, title, rating, length, RANK() OVER ( PARTITIONBY rating ORDERBY length DESC ) AS length_rank FROM film ) SELECT* FROM cte_film WHERE length_rank =1;
-- 多个 CTE WITH cte1 AS ( SELECT*FROM table1 WHERE condition1 ), cte2 AS ( SELECT*FROM table2 WHERE condition2 ) SELECT* FROM cte1 JOIN cte2 ON cte1.id = cte2.id;
-- 根据产品细分的折扣计算净价 UPDATE product SET net_price = price - price * discount FROM product_segment WHERE product.segment_id = product_segment.id;
-- 使用表别名简化查询 UPDATE product p SET net_price = price - price * discount FROM product_segment s WHERE p.segment_id = s.id;
-- 更新并返回结果 UPDATE product p SET net_price = price - price * discount FROM product_segment s WHERE p.segment_id = s.id RETURNING p.id, p.name, p.price, p.net_price;
MERGEINTO target_table USING source_query ON merge_condition WHEN MATCHED [ANDcondition] THEN {merge_update | merge_delete | DO NOTHING} WHENNOT MATCHED [ANDcondition] THEN {merge_insert | DO NOTHING};
-- 创建示例表 CREATETABLE leads( lead_id SERIAL PRIMARY KEY, name VARCHAR(255) NOTNULL, email VARCHAR(255) NOTNULLUNIQUE, active BOOL NOTNULLDEFAULTTRUE );
CREATETABLE customers( customer_id SERIAL PRIMARY KEY, name VARCHAR(255) NOTNULL, email VARCHAR(255) NOTNULLUNIQUE, active BOOL NOTNULLDEFAULTTRUE );
-- 示例1: 插入不匹配的行 MERGEINTO customers c USING leads l ON c.email = l.email WHENNOT MATCHED THEN INSERT (name, email) VALUES(l.name, l.email);
-- 示例2: 插入和更新 MERGEINTO customers c USING leads l ON c.email = l.email WHENNOT MATCHED THEN INSERT (name, email) VALUES(l.name, l.email) WHEN MATCHED THEN UPDATESET name = l.name, email = l.email;
-- 示例3: 插入、更新和删除 MERGEINTO customers c USING leads l ON c.email = l.email WHENNOT MATCHED THEN INSERT (name, email) VALUES(l.name, l.email) WHEN MATCHED AND l.active =falseTHEN DELETE WHEN MATCHED AND l.active =trueTHEN UPDATESET name = l.name, email = l.email;
-- 获取客户信息(JSON 格式) SELECT info ->'customer'AS customer FROM orders; -- | customer | -- |------------| -- | "John Doe" |
->> 运算符:返回 JSON 对象字段(文本格式)
-- 获取客户信息(文本格式) SELECT info ->>'customer'AS customer FROM orders; -- | customer | -- |--------------| -- | John Doe | -- | Lily Bush | -- | Josh William |
WHERE 子句中使用 JSON 运算符
-- 查找购买 Diaper 的客户 SELECT info ->>'customer'AS customer FROM orders WHERE info ->'items'->>'product'='Diaper';
-- 查找购买 2 个产品的客户 SELECT info ->>'customer'AS customer, info ->'items'->>'product'AS product FROM orders WHERECAST(info ->'items'->>'qty'ASINTEGER) =2;
聚合函数
-- 计算最小、最大、总和、平均数量 SELECT MIN(CAST(info ->'items'->>'qty'ASINTEGER)) AS min_qty, MAX(CAST(info ->'items'->>'qty'ASINTEGER)) AS max_qty, SUM(CAST(info ->'items'->>'qty'ASINTEGER)) AS total_qty, AVG(CAST(info ->'items'->>'qty'ASINTEGER)) AS avg_qty FROM orders; -- | min_qty | max_qty | total_qty | avg_qty | -- |---------|---------|-----------|---------| -- | 1 | 24 | 33 | 8.25 |
JSON 函数
json_each() - 展开 JSON 对象为键值对
SELECT json_each(info) FROM orders;
-- 获取文本格式的键值对 SELECT json_each_text(info) FROM orders;
CASE WHEN condition_1 THEN result_1 WHEN condition_2 THEN result_2 [WHEN ...] [ELSE else_result] END
-- 员工薪资等级分类 SELECT employee_id, name, salary, CASE WHEN salary <40000THEN'Low' WHEN salary >=40000AND salary <70000THEN'Medium' WHEN salary >=70000THEN'High' ENDAS salary_level FROM employees; -- | employee_id | name | salary | salary_level | -- |-------------|---------|--------|--------------| -- | 1 | Alice | 75000 | High | -- | 2 | Bob | 55000 | Medium | -- | 3 | Charlie | 35000 | Low |
-- 订单状态描述 SELECT order_id, status, CASE WHEN status ='pending'THEN'Order received' WHEN status ='processing'THEN'Being prepared' WHEN status ='shipped'THEN'On the way' WHEN status ='delivered'THEN'Completed' ELSE'Unknown status' ENDAS status_description FROM orders;
CASE是一个表达式,可以在 SELECT、WHERE、GROUP BY、HAVING、ORDER BY 等子句中使用
-- 删除特定角色 UPDATE users SET meta_data = jsonb_set( meta_data, '{roles}', (SELECT jsonb_agg(elem) FROM jsonb_array_elements(meta_data->'roles') elem WHERE elem !='"marketing"'::jsonb) ) WHERE id =1;
-- 删除多个元素 UPDATE users SET meta_data = jsonb_set( meta_data, '{roles}', (SELECT jsonb_agg(elem) FROM jsonb_array_elements(meta_data->'roles') elem WHERE elem NOTIN ('"hr"'::jsonb, '"sales"'::jsonb)) ) WHERE id =1;
替换特定位置的元素
-- 替换第一个元素(索引从0开始) UPDATE users SET meta_data = jsonb_set( meta_data, '{roles,0}', '"admin"'::jsonb ) WHERE id =1;
-- 替换第二个元素 UPDATE users SET meta_data = jsonb_set( meta_data, '{roles,1}', '"manager"'::jsonb ) WHERE id =1;
-- 提取第一个电话(JSONB 格式) SELECT name, phones ->0AS phone FROM employees WHERE name ='John Doe'; -- | name | phone | -- |----------|------------------| -- | John Doe | "(408) 555-1111" |
-- 提取第一个电话(文本格式) SELECT name, phones ->>0AS phone FROM employees WHERE name ='John Doe'; -- | name | phone | -- |----------|----------------| -- | John Doe | (408) 555-1111 |
-- 提取最后一个电话 SELECT name, phones ->>-1AS phone FROM employees WHERE name ='Jane Smith'; -- | name | phone | -- |------------|----------------| -- | Jane Smith | (408) 666-3333 |
-- 提取不存在的元素(返回 NULL) SELECT name, phones ->5AS phone FROM employees WHERE name ='John Doe'; -- | name | phone | -- |----------|-------| -- | John Doe | null |
从对象中提取值
-- 创建请求表 CREATETABLE requests( id SERIAL PRIMARY KEY, employee_id INTNOTNULL, request_date DATENOTNULL, data JSONB NOTNULL );
-- 提取当前职位(JSONB 格式) SELECT data ->'current_position'AS current_position FROM requests WHERE employee_id =1; -- | current_position | -- |---------------------| -- | "Software Engineer" |
-- 提取当前职位(文本格式) SELECT data ->>'current_position'AS current_position FROM requests WHERE employee_id =1; -- | current_position | -- |-------------------| -- | Software Engineer |
-- 提取多个键 SELECT employee_id, data ->>'current_position'AS current_position, data ->>'new_position'AS new_position, data ->>'effective_date'AS effective_date FROM requests;
-- 提取不存在的键(返回 NULL) SELECT data ->>'salary'AS salary FROM requests WHERE employee_id =1; -- | salary | -- |--------| -- | null |