fix(ops): 修复同一工牌并行多单的状态错乱
线上观察:管理员手动取消一个僵尸 DISPATCHED 单会引发"越清越多"—— 系统顺势派队列首条给仍在工作的保洁员,监听器再用"旧工单残留"机制 尝试取消当前正在执行的工单,该取消走 REQUIRES_NEW 独立事务且吞异常, 最终新单落地、旧单残留,同一设备挂多个非终态工单。 修复两处: 1. DispatchEngineImpl.autoDispatchNext 入口加设备空闲校验: 若执行人名下还有 DISPATCHED/CONFIRMED/ARRIVED/PAUSED 工单(排除 completedOrderId),直接早返回,不再派发。所有调用方(保洁/安保 handleCancelled、asyncCompleteAndDispatchNext、xxl-job 空闲扫描) 自动受保护。新增 OpsOrderMapper.selectActiveByAssignee。 2. BadgeDeviceStatusEventListener.handleDispatched 移除"残留取消": 旧逻辑用 REQUIRES_NEW 事务 + 吞异常,是对数据已错乱场景的暴力兜底, 失败时导致误杀。改为只打 ERROR 告警暴露问题,仅清理 Redis 关联。 真正的防线在 DispatchEngine 入口。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -15,9 +15,6 @@ import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.context.event.EventListener;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.PlatformTransactionManager;
|
||||
import org.springframework.transaction.TransactionDefinition;
|
||||
import org.springframework.transaction.support.TransactionTemplate;
|
||||
|
||||
/**
|
||||
* 工牌设备状态事件监听器
|
||||
@@ -87,9 +84,6 @@ public class BadgeDeviceStatusEventListener {
|
||||
@Resource
|
||||
private OrderLifecycleManager orderLifecycleManager;
|
||||
|
||||
@Resource
|
||||
private PlatformTransactionManager transactionManager;
|
||||
|
||||
/**
|
||||
* 监听工单状态变更事件,同步更新设备工单关联
|
||||
* <p>
|
||||
@@ -180,40 +174,27 @@ public class BadgeDeviceStatusEventListener {
|
||||
|
||||
/**
|
||||
* 处理工单推送状态(首次设置工单关联)
|
||||
* <p>
|
||||
* 若 Redis 里检测到旧 orderId(正常业务不应出现),仅打 ERROR 告警并清理 Redis 关联。
|
||||
* 此前版本会在此处"自动取消旧工单",但那是对"数据已错乱"场景的暴力兜底:
|
||||
* <ul>
|
||||
* <li>取消使用 REQUIRES_NEW 独立事务且吞异常,失败时新单照常落地,旧单残留,形成越清越多</li>
|
||||
* <li>真正的防线应在 DispatchEngine.autoDispatchNext 入口做设备空闲校验</li>
|
||||
* </ul>
|
||||
* 现改为被动告警,暴露问题等待定位,避免误杀保洁员正在执行的任务。
|
||||
*/
|
||||
private void handleDispatched(Long deviceId, Long orderId, OpsOrderDO order) {
|
||||
// 检查并清理旧工单(防止工单切换时状态残留)
|
||||
BadgeDeviceStatusDTO deviceStatus = badgeDeviceStatusService.getBadgeStatus(deviceId);
|
||||
if (deviceStatus != null && deviceStatus.getCurrentOpsOrderId() != null) {
|
||||
Long oldOrderId = deviceStatus.getCurrentOpsOrderId();
|
||||
if (!oldOrderId.equals(orderId)) {
|
||||
log.warn("[BadgeDeviceStatusEventListener] 派发新工单时检测到旧工单残留: " +
|
||||
"deviceId={}, oldOrderId={}, newOrderId={}", deviceId, oldOrderId, orderId);
|
||||
|
||||
// 检查旧工单是否仍在进行中,如果是则先取消
|
||||
OpsOrderDO oldOrder = opsOrderMapper.selectById(oldOrderId);
|
||||
if (oldOrder != null) {
|
||||
WorkOrderStatusEnum oldStatus = WorkOrderStatusEnum.fromStatus(oldOrder.getStatus());
|
||||
if (oldStatus == WorkOrderStatusEnum.DISPATCHED
|
||||
|| oldStatus == WorkOrderStatusEnum.CONFIRMED
|
||||
|| oldStatus == WorkOrderStatusEnum.ARRIVED) {
|
||||
// 旧工单仍在进行,先取消
|
||||
// 使用 REQUIRES_NEW 独立事务,避免内层异常标记外层事务 rollback-only
|
||||
log.warn("[BadgeDeviceStatusEventListener] 取消残留的旧工单: oldOrderId={}", oldOrderId);
|
||||
try {
|
||||
TransactionTemplate txTemplate = new TransactionTemplate(transactionManager);
|
||||
txTemplate.setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRES_NEW);
|
||||
txTemplate.executeWithoutResult(status -> {
|
||||
orderLifecycleManager.cancelOrder(oldOrderId, deviceId,
|
||||
OperatorTypeEnum.SYSTEM, "新工单派发,自动取消旧工单");
|
||||
});
|
||||
} catch (Exception e) {
|
||||
log.error("[BadgeDeviceStatusEventListener] 取消旧工单失败: oldOrderId={}", oldOrderId, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
String oldStatus = oldOrder != null ? oldOrder.getStatus() : "NOT_FOUND";
|
||||
log.error("[BadgeDeviceStatusEventListener] 派发新工单时检测到旧工单残留(数据可能已错乱,需人工核查): " +
|
||||
"deviceId={}, oldOrderId={}, oldStatus={}, newOrderId={}",
|
||||
deviceId, oldOrderId, oldStatus, orderId);
|
||||
|
||||
// 确保设备状态清理(无论旧工单是否取消成功)
|
||||
// 清理 Redis 中对旧工单的关联(纯 Redis 操作,不触达状态机)
|
||||
badgeDeviceStatusService.clearCurrentOrder(deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user