问题
数据库死锁如何解决?
答案
核心思路
数据库死锁的解决分为三个层面:预防(Prevention)、检测(Detection)、恢复(Recovery)。重点是从设计和编码阶段预防死锁,辅以数据库自动检测和回滚机制。
一、预防死锁
1. 统一资源访问顺序
原理:打破”循环等待”条件,避免事务间形成等待环路。
-- 错误:不同事务加锁顺序不一致
-- 事务A
BEGIN;
UPDATE account SET balance = balance - 100 WHERE id = 1; -- 锁id=1
UPDATE account SET balance = balance + 100 WHERE id = 2; -- 锁id=2
COMMIT;
-- 事务B(并发执行)
BEGIN;
UPDATE account SET balance = balance - 50 WHERE id = 2; -- 锁id=2
UPDATE account SET balance = balance + 50 WHERE id = 1; -- 等待id=1
COMMIT;
-- 死锁!A等2,B等1
-- 正确:按ID顺序加锁
-- 事务A和事务B都按 id=1 → id=2 的顺序
BEGIN;
UPDATE account SET balance = balance - 100 WHERE id = LEAST(1, 2);
UPDATE account SET balance = balance + 100 WHERE id = GREATEST(1, 2);
COMMIT;
应用代码示例:
public void transfer(int fromId, int toId, BigDecimal amount) {
// 按ID排序,确保加锁顺序一致
int firstId = Math.min(fromId, toId);
int secondId = Math.max(fromId, toId);
jdbcTemplate.update(
"UPDATE account SET balance = balance - ? WHERE id = ?",
fromId == firstId ? amount : amount.negate(), firstId
);
jdbcTemplate.update(
"UPDATE account SET balance = balance + ? WHERE id = ?",
toId == secondId ? amount : amount.negate(), secondId
);
}
2. 避免锁升级
-- 错误:共享锁升级排他锁
BEGIN;
SELECT * FROM account WHERE id = 1 LOCK IN SHARE MODE; -- S锁
-- 业务逻辑...
UPDATE account SET balance = balance - 100 WHERE id = 1; -- 等待升级X锁
COMMIT;
-- 正确:直接加排他锁
BEGIN;
SELECT * FROM account WHERE id = 1 FOR UPDATE; -- 直接X锁
UPDATE account SET balance = balance - 100 WHERE id = 1;
COMMIT;
3. 缩短事务时间
-- 错误:事务包含耗时操作
BEGIN;
SELECT * FROM orders WHERE id = 1 FOR UPDATE;
-- 调用外部支付API(耗时5秒)...
-- 发送邮件通知(耗时2秒)...
UPDATE orders SET status = 'paid' WHERE id = 1;
COMMIT;
-- 正确:事务外完成耗时操作
Order order = queryOrder(1); -- 无锁查询
callPaymentAPI(order); -- 外部操作
sendEmail(order); -- 外部操作
BEGIN;
SELECT * FROM orders WHERE id = 1 FOR UPDATE;
-- 二次校验状态
UPDATE orders SET status = 'paid' WHERE id = 1;
COMMIT;
4. 降低事务隔离级别
-- RR隔离级别:使用Next-Key Lock,容易死锁
SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-- 改为RC隔离级别:减少Gap Lock
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- 配置文件
[mysqld]
transaction-isolation = READ-COMMITTED
权衡:
- RR级别:防幻读,但死锁风险高
- RC级别:减少死锁,但允许幻读
5. 使用乐观锁替代悲观锁
/**
* 悲观锁:可能死锁
*/
@Transactional
public void updateStockPessimistic(Long productId, int quantity) {
// SELECT FOR UPDATE加锁
Product product = productMapper.selectByIdForUpdate(productId);
if (product.getStock() < quantity) {
throw new BusinessException("库存不足");
}
product.setStock(product.getStock() - quantity);
productMapper.updateById(product);
}
/**
* 乐观锁:避免死锁
*/
@Transactional
public void updateStockOptimistic(Long productId, int quantity) {
Product product = productMapper.selectById(productId); // 无锁读取
if (product.getStock() < quantity) {
throw new BusinessException("库存不足");
}
// 使用版本号更新
int affected = productMapper.updateStockWithVersion(
productId, quantity, product.getVersion()
);
if (affected == 0) {
throw new OptimisticLockException("并发冲突,请重试");
}
}
-- 乐观锁SQL
UPDATE products
SET stock = stock - #{quantity}, version = version + 1
WHERE id = #{id} AND version = #{version} AND stock >= #{quantity}
6. 合理使用索引
-- 错误:无索引导致锁表
UPDATE orders SET status = 'cancelled' WHERE user_id = 100; -- user_id无索引
-- 正确:添加索引减少锁范围
ALTER TABLE orders ADD INDEX idx_user_id (user_id);
UPDATE orders SET status = 'cancelled' WHERE user_id = 100; -- 只锁相关行
二、检测死锁
1. InnoDB自动死锁检测
-- 查看死锁检测配置
SHOW VARIABLES LIKE 'innodb_deadlock_detect';
-- ON:自动检测并回滚(默认)
-- 查看死锁信息
SHOW ENGINE INNODB STATUS\G
-- 输出关键部分
/*
------------------------
LATEST DETECTED DEADLOCK
------------------------
*** (1) TRANSACTION:
TRANSACTION 12345, ACTIVE 10 sec starting index read
LOCK WAIT 2 lock struct(s), heap size 1136, 1 row lock(s)
MySQL thread id 10, query id 500 localhost root updating
UPDATE account SET balance = balance - 100 WHERE id = 1
*** (1) WAITING FOR THIS LOCK TO BE GRANTED:
RECORD LOCKS space id 2 page no 3 n bits 72 index PRIMARY
*** (2) TRANSACTION:
TRANSACTION 12346, ACTIVE 8 sec starting index read
2 lock struct(s), heap size 1136, 1 row lock(s)
UPDATE account SET balance = balance + 100 WHERE id = 2
*** WE ROLL BACK TRANSACTION (1)
*/
2. 监控锁等待
-- 查看当前锁等待
SELECT
r.trx_id AS waiting_trx_id,
r.trx_mysql_thread_id AS waiting_thread,
r.trx_query AS waiting_query,
b.trx_id AS blocking_trx_id,
b.trx_mysql_thread_id AS blocking_thread,
b.trx_query AS blocking_query
FROM information_schema.innodb_lock_waits w
INNER JOIN information_schema.innodb_trx b ON b.trx_id = w.blocking_trx_id
INNER JOIN information_schema.innodb_trx r ON r.trx_id = w.requesting_trx_id;
-- MySQL 8.0+新方法
SELECT
waiting_pid,
waiting_query,
blocking_pid,
blocking_query
FROM sys.innodb_lock_waits;
3. 配置死锁日志
-- 开启错误日志记录死锁
[mysqld]
log_error = /var/log/mysql/error.log
innodb_print_all_deadlocks = ON -- 记录所有死锁到日志
三、恢复死锁
1. 应用层重试机制
@Service
public class AccountService {
private static final int MAX_RETRIES = 3;
@Transactional(rollbackFor = Exception.class)
public void transfer(Long fromId, Long toId, BigDecimal amount) {
int retries = 0;
while (retries < MAX_RETRIES) {
try {
doTransfer(fromId, toId, amount);
return; // 成功则返回
} catch (DeadlockLoserDataAccessException e) {
retries++;
if (retries >= MAX_RETRIES) {
throw new BusinessException("转账失败,请稍后重试");
}
// 随机延迟后重试(避免重试时再次冲突)
sleep(100 + new Random().nextInt(200));
}
}
}
}
2. 设置锁等待超时
-- 设置锁等待超时时间(默认50秒)
SET innodb_lock_wait_timeout = 10; -- 10秒超时
-- 会话级设置
SET SESSION innodb_lock_wait_timeout = 5;
3. 手动处理阻塞
-- 查找阻塞的线程
SELECT * FROM information_schema.processlist WHERE state = 'Locked';
-- 杀掉阻塞的线程
KILL <thread_id>;
四、实际案例与最佳实践
案例1:订单系统死锁优化
// 优化前:死锁频发
@Transactional
public void createOrder(OrderDTO dto) {
// 1. 锁定商品库存
productMapper.updateStock(dto.getProductId(), dto.getQuantity());
// 2. 锁定用户余额
userMapper.updateBalance(dto.getUserId(), dto.getAmount());
// 3. 创建订单
orderMapper.insert(order);
}
// 优化后:统一加锁顺序 + 乐观锁
@Transactional
public void createOrderOptimized(OrderDTO dto) {
// 1. 按ID排序资源
List<LockResource> resources = Arrays.asList(
new LockResource("product", dto.getProductId()),
new LockResource("user", dto.getUserId())
);
Collections.sort(resources);
// 2. 使用乐观锁更新库存
int affected = productMapper.updateStockWithVersion(
dto.getProductId(), dto.getQuantity(), product.getVersion()
);
if (affected == 0) {
throw new OptimisticLockException("库存已变更");
}
// 3. 更新余额
userMapper.updateBalance(dto.getUserId(), dto.getAmount());
// 4. 创建订单
orderMapper.insert(order);
}
案例2:分布式锁避免死锁
@Service
public class DistributedLockService {
@Autowired
private RedisTemplate<String, String> redisTemplate;
public void processWithLock(String resourceId, Runnable task) {
String lockKey = "lock:" + resourceId;
String lockValue = UUID.randomUUID().toString();
try {
// 获取分布式锁(设置过期时间避免死锁)
Boolean success = redisTemplate.opsForValue()
.setIfAbsent(lockKey, lockValue, 10, TimeUnit.SECONDS);
if (Boolean.TRUE.equals(success)) {
task.run(); // 执行业务
} else {
throw new BusinessException("资源被锁定");
}
} finally {
// 释放锁(Lua脚本保证原子性)
releaseLock(lockKey, lockValue);
}
}
}
五、配置优化
[mysqld]
# 死锁检测(默认ON)
innodb_deadlock_detect = ON
# 锁等待超时(秒)
innodb_lock_wait_timeout = 50
# 记录所有死锁到错误日志
innodb_print_all_deadlocks = ON
# 隔离级别(RC减少死锁)
transaction-isolation = READ-COMMITTED
# 并发线程数(减少并发降低死锁概率)
innodb_thread_concurrency = 0 # 0表示不限制
答题总结
数据库死锁的解决策略:
1. 预防为主(设计阶段):
- 统一加锁顺序(破坏循环等待)
- 避免锁升级(直接加排他锁)
- 缩短事务时间(减少持锁时长)
- 使用乐观锁(避免阻塞)
2. 自动检测(数据库):
- InnoDB自动检测死锁并回滚代价最小的事务
- 配置
innodb_deadlock_detect=ON
3. 异常处理(应用层):
- 捕获死锁异常重试
- 设置合理的超时时间
- 记录死锁日志分析优化
面试要点:
- 死锁的四个必要条件:互斥、持有并等待、不可剥夺、循环等待
- 预防死锁的关键是打破循环等待条件
- 生产环境优先使用乐观锁和统一加锁顺序
- InnoDB会自动选择代价最小的事务回滚
实际开发中,应该从业务设计、数据库配置、应用代码三个层面综合考虑,建立完整的死锁预防和处理机制。