feat(ops): 新增保洁工单超时自动取消 Job + 集成测试
Some checks failed
Java CI with Maven / build (11) (push) Has been cancelled
Java CI with Maven / build (17) (push) Has been cancelled
Java CI with Maven / build (8) (push) Has been cancelled

背景:保洁工单偶尔因设备离线/信标丢失导致卡在非终态(如 PENDING 超 12h 没派,
DISPATCHED 超 12h 没确认),靠人工清理成本高。补一个每小时跑的 XXL-Job 扫描关单。

实现:
- CleanOrderAutoCancelJob.scanAndCancel:
  * 查询 update_time 距今超 timeoutHours(默认 12h)的 CLEAN 工单
  * 状态白名单 = PENDING/QUEUED/DISPATCHED/CONFIRMED/ARRIVED,**排除 PAUSED**
    (PAUSED 是 P0 打断的产物,应由 resumeInterruptedOrder 走状态机恢复,
    此处若把它 CANCEL,会破坏 P0 完成后的 resume 链路)
  * 调用 orderLifecycleManager.cancelOrder 走完整责任链,事件监听器负责
    TTS 停播/设备关联回收/审计日志
  * cancel 前再 selectById 做乐观校验:若 update_time 已刷新或状态已变
    (COMPLETED/CANCELLED/PAUSED),跳过;避免候选装内存到实际 cancel
    之间用户刚触达的工单被误杀
  * 单单独立 try/catch 隔离,单条失败不断批
  * batchSize 限流(默认 200),事件风暴防护
- application.yaml 补默认配置:viewsh.ops.clean.auto-cancel.{timeout-hours, batch-size}
- CleanOrderAutoCancelJobTest 覆盖 6 条不变量:
  无候选零计数、全成功、部分失败不中断、乐观锁跳过 stale、终态跳过、PAUSED 跳过

XXL-Job 配置建议:
- JobHandler: cleanOrderAutoCancelJob
- Cron: 0 17 * * * ? (每小时 :17,避开整点尖峰)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
lzh
2026-04-20 15:21:33 +08:00
parent ba6f94a279
commit c78759fd52
3 changed files with 364 additions and 0 deletions

View File

@@ -0,0 +1,160 @@
package com.viewsh.module.ops.environment.job;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX;
import com.viewsh.framework.tenant.core.job.TenantJob;
import com.viewsh.module.ops.core.lifecycle.OrderLifecycleManager;
import com.viewsh.module.ops.dal.dataobject.workorder.OpsOrderDO;
import com.viewsh.module.ops.dal.mysql.workorder.OpsOrderMapper;
import com.viewsh.module.ops.enums.OperatorTypeEnum;
import com.viewsh.module.ops.enums.WorkOrderStatusEnum;
import com.xxl.job.core.handler.annotation.XxlJob;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.time.LocalDateTime;
import java.util.List;
/**
* 保洁工单超时自动取消 Job
* <p>
* 职责:
* 扫描所有保洁类order_type=CLEAN非终态工单
* 若最近一次进展update_time距今超过阈值默认 12 小时),
* 以 SYSTEM 身份走正常取消流程将其关闭。
* <p>
* 设计要点:
* 1. 时间基准使用 update_time 而非 create_time——任何状态转换/字段更新都会刷新 update_time
* 这样"按最新进展计算超时"才准确:刚被重派的 DISPATCHED 单不会因 create_time 老而被误杀。
* 2. 状态白名单 = PENDING / QUEUED / DISPATCHED / CONFIRMED / ARRIVED不含 PAUSED
* PAUSED 是 P0 打断的产物,应由 resumeInterruptedOrder 经状态机走 PAUSED → DISPATCHED
* 恢复。若此 Job 把 PAUSED 单直接 CANCELLEDP0 完成后的 resume 会在状态机检查
* "PAUSED → DISPATCHED" 时因源状态已变为 CANCELLED 而抛 IllegalStateException
* 进而破坏 P0 恢复链路。PAUSED 若真的卡死P0 也卡),交由人工审核,不自动化。
* 3. 取消调用 {@link OrderLifecycleManager#cancelOrder} 走完整责任链:
* StateTransitionHandler → QueueSyncHandler → EventPublishHandler
* → CleanOrderEventListener.onOrderStateChanged(CANCELLED) 会统一处理
* TTS 停播、设备工单关联回收、审计日志。
* 4. 单单独立事务 + try/catch 隔离,单条失败不影响批次其余工单。
* 5. 单次扫描限 batchSize 条,防止异常堆积时一次性取消过多触发事件风暴;
* 未处理完的工单留给下一轮 cron。
* 6. cancel 前再做一次乐观校验:重查 update_time 是否仍 &lt;= threshold。
* 候选装内存到实际 cancel 之间如果有用户触达(确认/到岗update_time 会被刷新;
* 此时放弃 cancel避免误杀用户刚触达的工单。
* <p>
* XXL-Job 配置建议:
* - JobHandler: cleanOrderAutoCancelJob
* - Cron: 0 17 * * * ? (每小时 :17 触发,避开整点尖峰)
*
* @author lzh
*/
@Slf4j
@Component
public class CleanOrderAutoCancelJob {
private static final String BUSINESS_TYPE_CLEAN = "CLEAN";
private static final String CANCEL_REASON = "超过12小时未处理系统自动完结";
@Resource
private OpsOrderMapper opsOrderMapper;
@Resource
private OrderLifecycleManager orderLifecycleManager;
/** 超时时长小时update_time 距今超过此值视为卡死 */
@Value("${viewsh.ops.clean.auto-cancel.timeout-hours:12}")
private int timeoutHours;
/** 单次最大扫描/取消工单数,防止事件风暴 */
@Value("${viewsh.ops.clean.auto-cancel.batch-size:200}")
private int batchSize;
@XxlJob("cleanOrderAutoCancelJob")
@TenantJob
public String execute() {
try {
CancelResult result = scanAndCancel();
return StrUtil.format(
"保洁工单超时自动取消完成: 扫描 {} 单, 成功 {}, 失败 {}, 跳过 {}, 耗时 {} ms",
result.scanned, result.succeeded, result.failed, result.skippedStale, result.durationMs);
} catch (Exception e) {
log.error("[CleanOrderAutoCancelJob] 执行失败", e);
return StrUtil.format("保洁工单超时自动取消失败: {}", e.getMessage());
}
}
public CancelResult scanAndCancel() {
long startTime = System.currentTimeMillis();
LocalDateTime threshold = LocalDateTime.now().minusHours(timeoutHours);
log.info("[CleanOrderAutoCancelJob] 开始扫描: timeoutHours={}, threshold={}, batchSize={}",
timeoutHours, threshold, batchSize);
List<OpsOrderDO> candidates = opsOrderMapper.selectList(new LambdaQueryWrapperX<OpsOrderDO>()
.eq(OpsOrderDO::getOrderType, BUSINESS_TYPE_CLEAN)
.notIn(OpsOrderDO::getStatus,
WorkOrderStatusEnum.COMPLETED.getStatus(),
WorkOrderStatusEnum.CANCELLED.getStatus(),
// PAUSED 交由 resumeInterruptedOrder 经状态机恢复,不在此 Job 自动化处理
WorkOrderStatusEnum.PAUSED.getStatus())
.le(OpsOrderDO::getUpdateTime, threshold)
.orderByAsc(OpsOrderDO::getUpdateTime)
.last("LIMIT " + batchSize));
if (CollUtil.isEmpty(candidates)) {
log.info("[CleanOrderAutoCancelJob] 无超时工单");
return new CancelResult(0, 0, 0, 0, System.currentTimeMillis() - startTime);
}
int succeeded = 0;
int failed = 0;
int skippedStale = 0;
for (OpsOrderDO order : candidates) {
Long orderId = order.getId();
try {
// 乐观校验:候选装内存→实际 cancel 之间,用户可能已触达工单刷新 update_time。
// 重查一次确认仍超时,避免把用户刚点过的工单一并 cancel 掉。
OpsOrderDO fresh = opsOrderMapper.selectById(orderId);
if (fresh == null
|| WorkOrderStatusEnum.COMPLETED.getStatus().equals(fresh.getStatus())
|| WorkOrderStatusEnum.CANCELLED.getStatus().equals(fresh.getStatus())
|| WorkOrderStatusEnum.PAUSED.getStatus().equals(fresh.getStatus())
|| fresh.getUpdateTime() == null
|| fresh.getUpdateTime().isAfter(threshold)) {
skippedStale++;
log.info("[CleanOrderAutoCancelJob] 并发触达/状态已变,跳过: orderId={}, snapshotStatus={}, latestStatus={}, latestUpdateTime={}",
orderId, order.getStatus(),
fresh != null ? fresh.getStatus() : "NOT_FOUND",
fresh != null ? fresh.getUpdateTime() : null);
continue;
}
orderLifecycleManager.cancelOrder(
orderId,
null,
OperatorTypeEnum.SYSTEM,
CANCEL_REASON);
succeeded++;
log.info("[CleanOrderAutoCancelJob] 自动取消成功: orderId={}, orderCode={}, status={}, updateTime={}",
orderId, order.getOrderCode(), order.getStatus(), order.getUpdateTime());
} catch (Exception e) {
failed++;
log.warn("[CleanOrderAutoCancelJob] 自动取消失败: orderId={}, orderCode={}, status={}, error={}",
orderId, order.getOrderCode(), order.getStatus(), e.getMessage(), e);
}
}
long duration = System.currentTimeMillis() - startTime;
log.info("[CleanOrderAutoCancelJob] 扫描完成: 扫描 {} 单, 成功 {}, 失败 {}, 跳过 {}, 耗时 {} ms",
candidates.size(), succeeded, failed, skippedStale, duration);
return new CancelResult(candidates.size(), succeeded, failed, skippedStale, duration);
}
public record CancelResult(int scanned, int succeeded, int failed, int skippedStale, long durationMs) {
}
}

View File

@@ -0,0 +1,198 @@
package com.viewsh.module.ops.environment.job;
import com.viewsh.module.ops.core.lifecycle.OrderLifecycleManager;
import com.viewsh.module.ops.dal.dataobject.workorder.OpsOrderDO;
import com.viewsh.module.ops.dal.mysql.workorder.OpsOrderMapper;
import com.viewsh.module.ops.enums.OperatorTypeEnum;
import com.viewsh.module.ops.enums.WorkOrderStatusEnum;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.test.util.ReflectionTestUtils;
import java.time.LocalDateTime;
import java.util.Collections;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
/**
* 验证 CleanOrderAutoCancelJob 的五条不变量:
* <ol>
* <li>无候选 → 返回零结果,不触发取消</li>
* <li>正常批次 → 依次 cancel成功计数正确</li>
* <li>单条失败不中断其余 → try/catch 隔离</li>
* <li>候选到 cancel 间被用户触达 → 乐观锁跳过(避免误杀)</li>
* <li>候选到 cancel 间状态变为终态/PAUSED → 跳过</li>
* </ol>
*/
@ExtendWith(MockitoExtension.class)
class CleanOrderAutoCancelJobTest {
@Mock
private OpsOrderMapper opsOrderMapper;
@Mock
private OrderLifecycleManager orderLifecycleManager;
@InjectMocks
private CleanOrderAutoCancelJob job;
@BeforeEach
void setUp() {
ReflectionTestUtils.setField(job, "timeoutHours", 12);
ReflectionTestUtils.setField(job, "batchSize", 200);
}
@Test
void scanAndCancel_whenNoCandidates_shouldReturnZeroCounts() {
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
.thenReturn(Collections.emptyList());
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
assertEquals(0, result.scanned());
assertEquals(0, result.succeeded());
assertEquals(0, result.failed());
assertEquals(0, result.skippedStale());
verify(orderLifecycleManager, never()).cancelOrder(anyLong(), any(), any(), any());
}
@Test
void scanAndCancel_whenAllCandidatesStillStale_shouldCancelAll() {
LocalDateTime staleTime = LocalDateTime.now().minusHours(13);
OpsOrderDO a = stale(101L, "WO-101", WorkOrderStatusEnum.DISPATCHED, staleTime);
OpsOrderDO b = stale(102L, "WO-102", WorkOrderStatusEnum.CONFIRMED, staleTime);
OpsOrderDO c = stale(103L, "WO-103", WorkOrderStatusEnum.ARRIVED, staleTime);
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
.thenReturn(List.of(a, b, c));
// Fresh fetch confirms all three are still stale
when(opsOrderMapper.selectById(101L)).thenReturn(a);
when(opsOrderMapper.selectById(102L)).thenReturn(b);
when(opsOrderMapper.selectById(103L)).thenReturn(c);
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
assertEquals(3, result.scanned());
assertEquals(3, result.succeeded());
assertEquals(0, result.failed());
assertEquals(0, result.skippedStale());
verify(orderLifecycleManager, times(3))
.cancelOrder(anyLong(), eq(null), eq(OperatorTypeEnum.SYSTEM), any());
}
@Test
void scanAndCancel_whenOneCancelThrows_shouldNotAbortBatch() {
LocalDateTime staleTime = LocalDateTime.now().minusHours(13);
OpsOrderDO a = stale(201L, "WO-201", WorkOrderStatusEnum.DISPATCHED, staleTime);
OpsOrderDO b = stale(202L, "WO-202", WorkOrderStatusEnum.CONFIRMED, staleTime);
OpsOrderDO c = stale(203L, "WO-203", WorkOrderStatusEnum.ARRIVED, staleTime);
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
.thenReturn(List.of(a, b, c));
when(opsOrderMapper.selectById(201L)).thenReturn(a);
when(opsOrderMapper.selectById(202L)).thenReturn(b);
when(opsOrderMapper.selectById(203L)).thenReturn(c);
// 第二条取消抛异常,不应影响第一、第三条。
// 不能用 doThrow(...).when(mock).cancelOrder(eq(202L), ...)——strict stubs 会把"201L 调用和 202L 存根不匹配"判成错配。
// 改用 doAnswer 按 orderId 路由,覆盖所有 cancel 调用。
doAnswer(invocation -> {
Long orderId = invocation.getArgument(0);
if (orderId != null && orderId == 202L) {
throw new IllegalStateException("状态机非法转换");
}
return null;
}).when(orderLifecycleManager).cancelOrder(anyLong(), any(), any(), any());
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
assertEquals(3, result.scanned());
assertEquals(2, result.succeeded());
assertEquals(1, result.failed());
assertEquals(0, result.skippedStale());
verify(orderLifecycleManager).cancelOrder(eq(201L), any(), any(), any());
verify(orderLifecycleManager).cancelOrder(eq(202L), any(), any(), any());
verify(orderLifecycleManager).cancelOrder(eq(203L), any(), any(), any());
}
@Test
void scanAndCancel_whenOrderTouchedBeforeCancel_shouldSkipAsStale() {
// 候选装内存时 update_time=13h ago实际 cancel 前用户刚刚点确认update_time 刷为"1 分钟前"。
// 乐观校验应跳过,避免误杀已被触达的工单。
LocalDateTime snapshotUpdate = LocalDateTime.now().minusHours(13);
LocalDateTime freshUpdate = LocalDateTime.now().minusMinutes(1);
OpsOrderDO snapshot = stale(301L, "WO-301", WorkOrderStatusEnum.DISPATCHED, snapshotUpdate);
OpsOrderDO fresh = stale(301L, "WO-301", WorkOrderStatusEnum.CONFIRMED, freshUpdate);
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
.thenReturn(List.of(snapshot));
when(opsOrderMapper.selectById(301L)).thenReturn(fresh);
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
assertEquals(1, result.scanned());
assertEquals(0, result.succeeded());
assertEquals(1, result.skippedStale());
verify(orderLifecycleManager, never()).cancelOrder(anyLong(), any(), any(), any());
}
@Test
void scanAndCancel_whenOrderBecameTerminal_shouldSkip() {
// 候选装内存时还是 ARRIVED实际 cancel 前已被其他路径 forceComplete 为 COMPLETED
LocalDateTime staleTime = LocalDateTime.now().minusHours(13);
OpsOrderDO snapshot = stale(401L, "WO-401", WorkOrderStatusEnum.ARRIVED, staleTime);
OpsOrderDO fresh = stale(401L, "WO-401", WorkOrderStatusEnum.COMPLETED, staleTime);
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
.thenReturn(List.of(snapshot));
when(opsOrderMapper.selectById(401L)).thenReturn(fresh);
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
assertEquals(1, result.skippedStale());
verify(orderLifecycleManager, never()).cancelOrder(anyLong(), any(), any(), any());
}
@Test
void scanAndCancel_whenOrderBecamePaused_shouldSkip() {
// 快照是 DISPATCHED刚被 P0 打断成 PAUSED——此 Job 应放行给 resumeInterruptedOrder
LocalDateTime staleTime = LocalDateTime.now().minusHours(13);
OpsOrderDO snapshot = stale(501L, "WO-501", WorkOrderStatusEnum.DISPATCHED, staleTime);
OpsOrderDO fresh = stale(501L, "WO-501", WorkOrderStatusEnum.PAUSED,
LocalDateTime.now().minusHours(14)); // update_time 刚刷新,但仍<=threshold状态变 PAUSED 就该跳过
when(opsOrderMapper.selectList(any(com.viewsh.framework.mybatis.core.query.LambdaQueryWrapperX.class)))
.thenReturn(List.of(snapshot));
when(opsOrderMapper.selectById(501L)).thenReturn(fresh);
CleanOrderAutoCancelJob.CancelResult result = job.scanAndCancel();
assertEquals(1, result.skippedStale());
verify(orderLifecycleManager, never()).cancelOrder(anyLong(), any(), any(), any());
}
// ==================== Helpers ====================
private OpsOrderDO stale(Long id, String code, WorkOrderStatusEnum status, LocalDateTime updateTime) {
OpsOrderDO order = OpsOrderDO.builder()
.id(id)
.orderCode(code)
.status(status.getStatus())
.orderType("CLEAN")
.build();
order.setUpdateTime(updateTime);
return order;
}
}

View File

@@ -146,6 +146,12 @@ viewsh:
connect-timeout: 5000
read-timeout: 10000
max-retry: 2
clean:
auto-cancel:
# 保洁工单 update_time 距今超过此小时数视为卡死,由 CleanOrderAutoCancelJob 自动取消
timeout-hours: 12
# 单次扫描/取消上限,防止事件风暴;超出的工单留给下一轮 cron
batch-size: 200
# API 签名配置:外部系统调用开放接口时使用(如安保工单的告警系统)
signature:
apps: