From c807bf1fab5b15ba80d40c7383ff849d5d19c43e Mon Sep 17 00:00:00 2001 From: lzh Date: Tue, 28 Apr 2026 17:43:38 +0800 Subject: [PATCH] =?UTF-8?q?fix(ci):=20=E8=A1=A5=203=20=E4=B8=AA=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E9=9A=90=E6=82=A3=E2=80=94=E2=80=94backup=20=E5=86=99?= =?UTF-8?q?=E6=AD=BB=20core.yml=E3=80=81deploy=20=E6=BC=8F=20export=20IMAG?= =?UTF-8?q?E=5FTAG=E3=80=81NonCPS=20=E8=AF=BB=20env?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 排查 PROD 误伤事故时连带发现 3 个会引发其他错误的位置: 1. backupCurrentDeployment 在远端 cp docker-compose.core.yml.backup 写死了 core 文件名 → release 部署到 .7 时 core.yml 不存在会触发 set -e 退出。改为 cp ${env.COMPOSE_FILE} 并加 [ -f ... ] 检查避免硬失败。 2. deployService 在 ssh 远端命令里 docker compose pull/up 之前没 export IMAGE_TAG, docker compose 会 fallback 到 yml 的 ${IMAGE_TAG:-latest},永远拉到 :latest 镜像 而不是本次构建的版本 tag。这就是 PROD 容器镜像显示 :latest 的根因——本意要拉 master-N-shortSHA 的镜像,但实际拉了 master 早先 push 的 :latest。 修复:注入 export IMAGE_TAG=${env.IMAGE_TAG} + REGISTRY_HOST。 3. getContainerNameForService 是 @NonCPS 函数,里面访问 env.CONTAINER_NAME_SUFFIX 在 NonCPS 上下文下 binding 不一定可达。改成把 suffix 作为参数传入,3 个调用点 全部加上 env.CONTAINER_NAME_SUFFIX 实参。函数纯粹无副作用。 --- Jenkinsfile | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index c31c1442..13d70962 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -411,7 +411,7 @@ pipeline { sh """ echo "📊 Service Status:" ${servicesToCheck.collect { - def container = getContainerNameForService(it) + def container = getContainerNameForService(it, env.CONTAINER_NAME_SUFFIX) "docker inspect --format='${it}: {{.State.Status}} ({{.State.Health.Status}})' ${container} 2>/dev/null || echo '${it}: not found'" }.join('\n ')} """ @@ -767,11 +767,15 @@ def backupCurrentDeployment(def services) { ssh ${sshOpts} root@${env.DEPLOY_HOST} ' cd ${env.DEPLOY_PATH} - # 保存当前 docker-compose 配置 - cp docker-compose.core.yml docker-compose.core.yml.backup-${env.BUILD_NUMBER} - - # 记录当前运行的镜像 - docker compose -f ${env.COMPOSE_FILE} images > deployment-state-${env.BUILD_NUMBER}.txt + # 保存当前 docker-compose 配置(用 ${env.COMPOSE_FILE} 适配多环境,文件不存在则跳过) + if [ -f ${env.COMPOSE_FILE} ]; then + cp ${env.COMPOSE_FILE} ${env.COMPOSE_FILE}.backup-${env.BUILD_NUMBER} + fi + + # 记录当前运行的镜像(compose 文件不存在时跳过) + if [ -f ${env.COMPOSE_FILE} ]; then + docker compose -f ${env.COMPOSE_FILE} images > deployment-state-${env.BUILD_NUMBER}.txt + fi echo "✅ Backup completed: deployment-state-${env.BUILD_NUMBER}.txt" ' @@ -927,7 +931,7 @@ def deployServiceWithTimeout(String service) { // 部署单个服务 def deployService(String service) { - def containerName = getContainerNameForService(service) + def containerName = getContainerNameForService(service, env.CONTAINER_NAME_SUFFIX) def sshOpts = "-o StrictHostKeyChecking=no -o ConnectTimeout=10 -i ${env.SSH_KEY}" echo "" @@ -945,7 +949,11 @@ def deployService(String service) { set -e cd ${env.DEPLOY_PATH} - echo "📥 Pulling ${service}..." + # 关键:注入本次构建的镜像 tag,否则 docker compose 会 fallback 到 yml 的 :latest(拉到 prod 镜像) + export IMAGE_TAG=${env.IMAGE_TAG} + export REGISTRY_HOST=${env.REGISTRY} + + echo "📥 Pulling ${service} (tag=${env.IMAGE_TAG})..." docker compose -f ${env.COMPOSE_FILE} pull ${service} echo "🔄 Restarting ${service}..." @@ -1049,7 +1057,7 @@ def waitForServiceHealthy(String containerName, String serviceName, String sshOp // 检查服务健康(带重试) def checkServiceHealthWithRetry(String service) { - def containerName = getContainerNameForService(service) + def containerName = getContainerNameForService(service, env.CONTAINER_NAME_SUFFIX) def sshOpts = "-o StrictHostKeyChecking=no -o ConnectTimeout=10 -i ${env.SSH_KEY}" retry(3) { @@ -1117,11 +1125,9 @@ def sortServicesByDependency(def services) { } } -// 获取服务对应的容器名称 +// 获取服务对应的容器名称(suffix 由调用者传入,避免 NonCPS 函数依赖 env binding) @NonCPS -def getContainerNameForService(String service) { - // 容器名 suffix 在 Initialize 阶段按分支决定(release/next='-release'、master=''), - // 物理隔离防止 release 部署误伤 prod 同名容器 +def getContainerNameForService(String service, String suffix) { def map = [ 'viewsh-gateway': 'aiot-gateway', 'viewsh-module-system-server': 'aiot-system-server', @@ -1130,7 +1136,7 @@ def getContainerNameForService(String service) { 'viewsh-module-iot-gateway': 'aiot-iot-gateway', 'viewsh-module-ops-server': 'aiot-ops-server' ] - return map.get(service, "aiot-${service}") + (env.CONTAINER_NAME_SUFFIX ?: '') + return map.get(service, "aiot-${service}") + (suffix ?: '') } // 获取服务对应的模块路径