feat(ops): 新增保洁工单超时自动取消 Job + 集成测试
背景:保洁工单偶尔因设备离线/信标丢失导致卡在非终态(如 PENDING 超 12h 没派,
DISPATCHED 超 12h 没确认),靠人工清理成本高。补一个每小时跑的 XXL-Job 扫描关单。
实现:
- CleanOrderAutoCancelJob.scanAndCancel:
* 查询 update_time 距今超 timeoutHours(默认 12h)的 CLEAN 工单
* 状态白名单 = PENDING/QUEUED/DISPATCHED/CONFIRMED/ARRIVED,**排除 PAUSED**
(PAUSED 是 P0 打断的产物,应由 resumeInterruptedOrder 走状态机恢复,
此处若把它 CANCEL,会破坏 P0 完成后的 resume 链路)
* 调用 orderLifecycleManager.cancelOrder 走完整责任链,事件监听器负责
TTS 停播/设备关联回收/审计日志
* cancel 前再 selectById 做乐观校验:若 update_time 已刷新或状态已变
(COMPLETED/CANCELLED/PAUSED),跳过;避免候选装内存到实际 cancel
之间用户刚触达的工单被误杀
* 单单独立 try/catch 隔离,单条失败不断批
* batchSize 限流(默认 200),事件风暴防护
- application.yaml 补默认配置:viewsh.ops.clean.auto-cancel.{timeout-hours, batch-size}
- CleanOrderAutoCancelJobTest 覆盖 6 条不变量:
无候选零计数、全成功、部分失败不中断、乐观锁跳过 stale、终态跳过、PAUSED 跳过
XXL-Job 配置建议:
- JobHandler: cleanOrderAutoCancelJob
- Cron: 0 17 * * * ? (每小时 :17,避开整点尖峰)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,160 @@
|
||||
package com.viewsh.module.ops.environment.job;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX;
|
||||
import com.viewsh.framework.tenant.core.job.TenantJob;
|
||||
import com.viewsh.module.ops.core.lifecycle.OrderLifecycleManager;
|
||||
import com.viewsh.module.ops.dal.dataobject.workorder.OpsOrderDO;
|
||||
import com.viewsh.module.ops.dal.mysql.workorder.OpsOrderMapper;
|
||||
import com.viewsh.module.ops.enums.OperatorTypeEnum;
|
||||
import com.viewsh.module.ops.enums.WorkOrderStatusEnum;
|
||||
import com.xxl.job.core.handler.annotation.XxlJob;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 保洁工单超时自动取消 Job
|
||||
* <p>
|
||||
* 职责:
|
||||
* 扫描所有保洁类(order_type=CLEAN)非终态工单,
|
||||
* 若最近一次进展(update_time)距今超过阈值(默认 12 小时),
|
||||
* 以 SYSTEM 身份走正常取消流程将其关闭。
|
||||
* <p>
|
||||
* 设计要点:
|
||||
* 1. 时间基准使用 update_time 而非 create_time——任何状态转换/字段更新都会刷新 update_time,
|
||||
* 这样"按最新进展计算超时"才准确:刚被重派的 DISPATCHED 单不会因 create_time 老而被误杀。
|
||||
* 2. 状态白名单 = PENDING / QUEUED / DISPATCHED / CONFIRMED / ARRIVED(不含 PAUSED)。
|
||||
* PAUSED 是 P0 打断的产物,应由 resumeInterruptedOrder 经状态机走 PAUSED → DISPATCHED
|
||||
* 恢复。若此 Job 把 PAUSED 单直接 CANCELLED,P0 完成后的 resume 会在状态机检查
|
||||
* "PAUSED → DISPATCHED" 时因源状态已变为 CANCELLED 而抛 IllegalStateException,
|
||||
* 进而破坏 P0 恢复链路。PAUSED 若真的卡死(P0 也卡),交由人工审核,不自动化。
|
||||
* 3. 取消调用 {@link OrderLifecycleManager#cancelOrder} 走完整责任链:
|
||||
* StateTransitionHandler → QueueSyncHandler → EventPublishHandler
|
||||
* → CleanOrderEventListener.onOrderStateChanged(CANCELLED) 会统一处理
|
||||
* TTS 停播、设备工单关联回收、审计日志。
|
||||
* 4. 单单独立事务 + try/catch 隔离,单条失败不影响批次其余工单。
|
||||
* 5. 单次扫描限 batchSize 条,防止异常堆积时一次性取消过多触发事件风暴;
|
||||
* 未处理完的工单留给下一轮 cron。
|
||||
* 6. cancel 前再做一次乐观校验:重查 update_time 是否仍 <= threshold。
|
||||
* 候选装内存到实际 cancel 之间如果有用户触达(确认/到岗),update_time 会被刷新;
|
||||
* 此时放弃 cancel,避免误杀用户刚触达的工单。
|
||||
* <p>
|
||||
* XXL-Job 配置建议:
|
||||
* - JobHandler: cleanOrderAutoCancelJob
|
||||
* - Cron: 0 17 * * * ? (每小时 :17 触发,避开整点尖峰)
|
||||
*
|
||||
* @author lzh
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class CleanOrderAutoCancelJob {
|
||||
|
||||
private static final String BUSINESS_TYPE_CLEAN = "CLEAN";
|
||||
private static final String CANCEL_REASON = "超过12小时未处理,系统自动完结";
|
||||
|
||||
@Resource
|
||||
private OpsOrderMapper opsOrderMapper;
|
||||
|
||||
@Resource
|
||||
private OrderLifecycleManager orderLifecycleManager;
|
||||
|
||||
/** 超时时长(小时),update_time 距今超过此值视为卡死 */
|
||||
@Value("${viewsh.ops.clean.auto-cancel.timeout-hours:12}")
|
||||
private int timeoutHours;
|
||||
|
||||
/** 单次最大扫描/取消工单数,防止事件风暴 */
|
||||
@Value("${viewsh.ops.clean.auto-cancel.batch-size:200}")
|
||||
private int batchSize;
|
||||
|
||||
@XxlJob("cleanOrderAutoCancelJob")
|
||||
@TenantJob
|
||||
public String execute() {
|
||||
try {
|
||||
CancelResult result = scanAndCancel();
|
||||
return StrUtil.format(
|
||||
"保洁工单超时自动取消完成: 扫描 {} 单, 成功 {}, 失败 {}, 跳过 {}, 耗时 {} ms",
|
||||
result.scanned, result.succeeded, result.failed, result.skippedStale, result.durationMs);
|
||||
} catch (Exception e) {
|
||||
log.error("[CleanOrderAutoCancelJob] 执行失败", e);
|
||||
return StrUtil.format("保洁工单超时自动取消失败: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public CancelResult scanAndCancel() {
|
||||
long startTime = System.currentTimeMillis();
|
||||
LocalDateTime threshold = LocalDateTime.now().minusHours(timeoutHours);
|
||||
|
||||
log.info("[CleanOrderAutoCancelJob] 开始扫描: timeoutHours={}, threshold={}, batchSize={}",
|
||||
timeoutHours, threshold, batchSize);
|
||||
|
||||
List<OpsOrderDO> candidates = opsOrderMapper.selectList(new LambdaQueryWrapperX<OpsOrderDO>()
|
||||
.eq(OpsOrderDO::getOrderType, BUSINESS_TYPE_CLEAN)
|
||||
.notIn(OpsOrderDO::getStatus,
|
||||
WorkOrderStatusEnum.COMPLETED.getStatus(),
|
||||
WorkOrderStatusEnum.CANCELLED.getStatus(),
|
||||
// PAUSED 交由 resumeInterruptedOrder 经状态机恢复,不在此 Job 自动化处理
|
||||
WorkOrderStatusEnum.PAUSED.getStatus())
|
||||
.le(OpsOrderDO::getUpdateTime, threshold)
|
||||
.orderByAsc(OpsOrderDO::getUpdateTime)
|
||||
.last("LIMIT " + batchSize));
|
||||
|
||||
if (CollUtil.isEmpty(candidates)) {
|
||||
log.info("[CleanOrderAutoCancelJob] 无超时工单");
|
||||
return new CancelResult(0, 0, 0, 0, System.currentTimeMillis() - startTime);
|
||||
}
|
||||
|
||||
int succeeded = 0;
|
||||
int failed = 0;
|
||||
int skippedStale = 0;
|
||||
|
||||
for (OpsOrderDO order : candidates) {
|
||||
Long orderId = order.getId();
|
||||
try {
|
||||
// 乐观校验:候选装内存→实际 cancel 之间,用户可能已触达工单刷新 update_time。
|
||||
// 重查一次确认仍超时,避免把用户刚点过的工单一并 cancel 掉。
|
||||
OpsOrderDO fresh = opsOrderMapper.selectById(orderId);
|
||||
if (fresh == null
|
||||
|| WorkOrderStatusEnum.COMPLETED.getStatus().equals(fresh.getStatus())
|
||||
|| WorkOrderStatusEnum.CANCELLED.getStatus().equals(fresh.getStatus())
|
||||
|| WorkOrderStatusEnum.PAUSED.getStatus().equals(fresh.getStatus())
|
||||
|| fresh.getUpdateTime() == null
|
||||
|| fresh.getUpdateTime().isAfter(threshold)) {
|
||||
skippedStale++;
|
||||
log.info("[CleanOrderAutoCancelJob] 并发触达/状态已变,跳过: orderId={}, snapshotStatus={}, latestStatus={}, latestUpdateTime={}",
|
||||
orderId, order.getStatus(),
|
||||
fresh != null ? fresh.getStatus() : "NOT_FOUND",
|
||||
fresh != null ? fresh.getUpdateTime() : null);
|
||||
continue;
|
||||
}
|
||||
|
||||
orderLifecycleManager.cancelOrder(
|
||||
orderId,
|
||||
null,
|
||||
OperatorTypeEnum.SYSTEM,
|
||||
CANCEL_REASON);
|
||||
succeeded++;
|
||||
log.info("[CleanOrderAutoCancelJob] 自动取消成功: orderId={}, orderCode={}, status={}, updateTime={}",
|
||||
orderId, order.getOrderCode(), order.getStatus(), order.getUpdateTime());
|
||||
} catch (Exception e) {
|
||||
failed++;
|
||||
log.warn("[CleanOrderAutoCancelJob] 自动取消失败: orderId={}, orderCode={}, status={}, error={}",
|
||||
orderId, order.getOrderCode(), order.getStatus(), e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
log.info("[CleanOrderAutoCancelJob] 扫描完成: 扫描 {} 单, 成功 {}, 失败 {}, 跳过 {}, 耗时 {} ms",
|
||||
candidates.size(), succeeded, failed, skippedStale, duration);
|
||||
|
||||
return new CancelResult(candidates.size(), succeeded, failed, skippedStale, duration);
|
||||
}
|
||||
|
||||
public record CancelResult(int scanned, int succeeded, int failed, int skippedStale, long durationMs) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
package com.viewsh.module.ops.environment.job;
|
||||
|
||||
import com.viewsh.module.ops.core.lifecycle.OrderLifecycleManager;
|
||||
import com.viewsh.module.ops.dal.dataobject.workorder.OpsOrderDO;
|
||||
import com.viewsh.module.ops.dal.mysql.workorder.OpsOrderMapper;
|
||||
import com.viewsh.module.ops.enums.OperatorTypeEnum;
|
||||
import com.viewsh.module.ops.enums.WorkOrderStatusEnum;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.test.util.ReflectionTestUtils;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyLong;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.doAnswer;
|
||||
import static org.mockito.Mockito.never;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
/**
|
||||
* 验证 CleanOrderAutoCancelJob 的五条不变量:
|
||||
* <ol>
|
||||
* <li>无候选 → 返回零结果,不触发取消</li>
|
||||
* <li>正常批次 → 依次 cancel,成功计数正确</li>
|
||||
* <li>单条失败不中断其余 → try/catch 隔离</li>
|
||||
* <li>候选到 cancel 间被用户触达 → 乐观锁跳过(避免误杀)</li>
|
||||
* <li>候选到 cancel 间状态变为终态/PAUSED → 跳过</li>
|
||||
* </ol>
|
||||
*/
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class CleanOrderAutoCancelJobTest {
|
||||
|
||||
@Mock
|
||||
private OpsOrderMapper opsOrderMapper;
|
||||
@Mock
|
||||
private OrderLifecycleManager orderLifecycleManager;
|
||||
|
||||
@InjectMocks
|
||||
private CleanOrderAutoCancelJob job;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
ReflectionTestUtils.setField(job, "timeoutHours", 12);
|
||||
ReflectionTestUtils.setField(job, "batchSize", 200);
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndCancel_whenNoCandidates_shouldReturnZeroCounts() {
|
||||
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
|
||||
.thenReturn(Collections.emptyList());
|
||||
|
||||
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
|
||||
|
||||
assertEquals(0, result.scanned());
|
||||
assertEquals(0, result.succeeded());
|
||||
assertEquals(0, result.failed());
|
||||
assertEquals(0, result.skippedStale());
|
||||
verify(orderLifecycleManager, never()).cancelOrder(anyLong(), any(), any(), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndCancel_whenAllCandidatesStillStale_shouldCancelAll() {
|
||||
LocalDateTime staleTime = LocalDateTime.now().minusHours(13);
|
||||
OpsOrderDO a = stale(101L, "WO-101", WorkOrderStatusEnum.DISPATCHED, staleTime);
|
||||
OpsOrderDO b = stale(102L, "WO-102", WorkOrderStatusEnum.CONFIRMED, staleTime);
|
||||
OpsOrderDO c = stale(103L, "WO-103", WorkOrderStatusEnum.ARRIVED, staleTime);
|
||||
|
||||
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
|
||||
.thenReturn(List.of(a, b, c));
|
||||
// Fresh fetch confirms all three are still stale
|
||||
when(opsOrderMapper.selectById(101L)).thenReturn(a);
|
||||
when(opsOrderMapper.selectById(102L)).thenReturn(b);
|
||||
when(opsOrderMapper.selectById(103L)).thenReturn(c);
|
||||
|
||||
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
|
||||
|
||||
assertEquals(3, result.scanned());
|
||||
assertEquals(3, result.succeeded());
|
||||
assertEquals(0, result.failed());
|
||||
assertEquals(0, result.skippedStale());
|
||||
verify(orderLifecycleManager, times(3))
|
||||
.cancelOrder(anyLong(), eq(null), eq(OperatorTypeEnum.SYSTEM), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndCancel_whenOneCancelThrows_shouldNotAbortBatch() {
|
||||
LocalDateTime staleTime = LocalDateTime.now().minusHours(13);
|
||||
OpsOrderDO a = stale(201L, "WO-201", WorkOrderStatusEnum.DISPATCHED, staleTime);
|
||||
OpsOrderDO b = stale(202L, "WO-202", WorkOrderStatusEnum.CONFIRMED, staleTime);
|
||||
OpsOrderDO c = stale(203L, "WO-203", WorkOrderStatusEnum.ARRIVED, staleTime);
|
||||
|
||||
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
|
||||
.thenReturn(List.of(a, b, c));
|
||||
when(opsOrderMapper.selectById(201L)).thenReturn(a);
|
||||
when(opsOrderMapper.selectById(202L)).thenReturn(b);
|
||||
when(opsOrderMapper.selectById(203L)).thenReturn(c);
|
||||
// 第二条取消抛异常,不应影响第一、第三条。
|
||||
// 不能用 doThrow(...).when(mock).cancelOrder(eq(202L), ...)——strict stubs 会把"201L 调用和 202L 存根不匹配"判成错配。
|
||||
// 改用 doAnswer 按 orderId 路由,覆盖所有 cancel 调用。
|
||||
doAnswer(invocation -> {
|
||||
Long orderId = invocation.getArgument(0);
|
||||
if (orderId != null && orderId == 202L) {
|
||||
throw new IllegalStateException("状态机非法转换");
|
||||
}
|
||||
return null;
|
||||
}).when(orderLifecycleManager).cancelOrder(anyLong(), any(), any(), any());
|
||||
|
||||
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
|
||||
|
||||
assertEquals(3, result.scanned());
|
||||
assertEquals(2, result.succeeded());
|
||||
assertEquals(1, result.failed());
|
||||
assertEquals(0, result.skippedStale());
|
||||
verify(orderLifecycleManager).cancelOrder(eq(201L), any(), any(), any());
|
||||
verify(orderLifecycleManager).cancelOrder(eq(202L), any(), any(), any());
|
||||
verify(orderLifecycleManager).cancelOrder(eq(203L), any(), any(), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndCancel_whenOrderTouchedBeforeCancel_shouldSkipAsStale() {
|
||||
// 候选装内存时 update_time=13h ago,实际 cancel 前用户刚刚点确认,update_time 刷为"1 分钟前"。
|
||||
// 乐观校验应跳过,避免误杀已被触达的工单。
|
||||
LocalDateTime snapshotUpdate = LocalDateTime.now().minusHours(13);
|
||||
LocalDateTime freshUpdate = LocalDateTime.now().minusMinutes(1);
|
||||
|
||||
OpsOrderDO snapshot = stale(301L, "WO-301", WorkOrderStatusEnum.DISPATCHED, snapshotUpdate);
|
||||
OpsOrderDO fresh = stale(301L, "WO-301", WorkOrderStatusEnum.CONFIRMED, freshUpdate);
|
||||
|
||||
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
|
||||
.thenReturn(List.of(snapshot));
|
||||
when(opsOrderMapper.selectById(301L)).thenReturn(fresh);
|
||||
|
||||
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
|
||||
|
||||
assertEquals(1, result.scanned());
|
||||
assertEquals(0, result.succeeded());
|
||||
assertEquals(1, result.skippedStale());
|
||||
verify(orderLifecycleManager, never()).cancelOrder(anyLong(), any(), any(), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndCancel_whenOrderBecameTerminal_shouldSkip() {
|
||||
// 候选装内存时还是 ARRIVED,实际 cancel 前已被其他路径 forceComplete 为 COMPLETED
|
||||
LocalDateTime staleTime = LocalDateTime.now().minusHours(13);
|
||||
OpsOrderDO snapshot = stale(401L, "WO-401", WorkOrderStatusEnum.ARRIVED, staleTime);
|
||||
OpsOrderDO fresh = stale(401L, "WO-401", WorkOrderStatusEnum.COMPLETED, staleTime);
|
||||
|
||||
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
|
||||
.thenReturn(List.of(snapshot));
|
||||
when(opsOrderMapper.selectById(401L)).thenReturn(fresh);
|
||||
|
||||
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
|
||||
|
||||
assertEquals(1, result.skippedStale());
|
||||
verify(orderLifecycleManager, never()).cancelOrder(anyLong(), any(), any(), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndCancel_whenOrderBecamePaused_shouldSkip() {
|
||||
// 快照是 DISPATCHED,刚被 P0 打断成 PAUSED——此 Job 应放行给 resumeInterruptedOrder
|
||||
LocalDateTime staleTime = LocalDateTime.now().minusHours(13);
|
||||
OpsOrderDO snapshot = stale(501L, "WO-501", WorkOrderStatusEnum.DISPATCHED, staleTime);
|
||||
OpsOrderDO fresh = stale(501L, "WO-501", WorkOrderStatusEnum.PAUSED,
|
||||
LocalDateTime.now().minusHours(14)); // update_time 刚刷新,但仍<=threshold;状态变 PAUSED 就该跳过
|
||||
|
||||
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
|
||||
.thenReturn(List.of(snapshot));
|
||||
when(opsOrderMapper.selectById(501L)).thenReturn(fresh);
|
||||
|
||||
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
|
||||
|
||||
assertEquals(1, result.skippedStale());
|
||||
verify(orderLifecycleManager, never()).cancelOrder(anyLong(), any(), any(), any());
|
||||
}
|
||||
|
||||
// ==================== Helpers ====================
|
||||
|
||||
private OpsOrderDO stale(Long id, String code, WorkOrderStatusEnum status, LocalDateTime updateTime) {
|
||||
OpsOrderDO order = OpsOrderDO.builder()
|
||||
.id(id)
|
||||
.orderCode(code)
|
||||
.status(status.getStatus())
|
||||
.orderType("CLEAN")
|
||||
.build();
|
||||
order.setUpdateTime(updateTime);
|
||||
return order;
|
||||
}
|
||||
}
|
||||
@@ -146,6 +146,12 @@ viewsh:
|
||||
connect-timeout: 5000
|
||||
read-timeout: 10000
|
||||
max-retry: 2
|
||||
clean:
|
||||
auto-cancel:
|
||||
# 保洁工单 update_time 距今超过此小时数视为卡死,由 CleanOrderAutoCancelJob 自动取消
|
||||
timeout-hours: 12
|
||||
# 单次扫描/取消上限,防止事件风暴;超出的工单留给下一轮 cron
|
||||
batch-size: 200
|
||||
# API 签名配置:外部系统调用开放接口时使用(如安保工单的告警系统)
|
||||
signature:
|
||||
apps:
|
||||
|
||||
Reference in New Issue
Block a user