diff --git a/Jenkinsfile b/Jenkinsfile index 2d8ee1f..c967523 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,5 @@ // ============================================ -// AIOT Platform - Jenkins Pipeline (Enterprise Edition) -// 优化版本:错误处理 + 性能优化 + 完善日志 +// AIOT Platform - Jenkins Pipeline (Optimized Edition) // ============================================ pipeline { @@ -14,14 +13,12 @@ pipeline { disableConcurrentBuilds() timeout(time: 90, unit: 'MINUTES') timestamps() - retry(1) // 失败自动重试1次 + retry(1) } environment { // 镜像仓库配置 REGISTRY = 'localhost:5000' - // 注意:IMAGE_TAG 将在 Checkout 阶段动态设置 - // IMAGE_TAG = "${BRANCH_NAME}-${BUILD_NUMBER}-${GIT_COMMIT}" DEPS_IMAGE = "${REGISTRY}/aiot-deps:latest" // 服务配置 @@ -32,42 +29,59 @@ pipeline { DEPLOY_PATH = '/opt/aiot-platform-cloud' SSH_KEY = '/var/jenkins_home/.ssh/id_rsa' - // 性能配置 - MAX_PARALLEL_BUILDS = 2 - BUILD_TIMEOUT = 45 // 单个服务构建超时(分钟) - DEPLOY_TIMEOUT = 10 // 单个服务部署超时(分钟) - HEALTH_CHECK_TIMEOUT = 180 // 健康检查总超时(秒) - HEALTH_CHECK_INTERVAL = 10 // 健康检查间隔(秒) + // 性能配置 - 将动态调整 + BUILD_TIMEOUT = 45 + DEPLOY_TIMEOUT = 10 + HEALTH_CHECK_TIMEOUT = 180 + HEALTH_CHECK_INTERVAL = 10 + + // 【优化1】Maven 缓存配置 + MAVEN_CACHE_VOLUME = 'jenkins-maven-cache' + MAVEN_OPTS = '-Dmaven.repo.local=/var/jenkins_home/.m2/repository' + + // 【优化7】回滚配置 + ROLLBACK_ENABLED = 'true' + KEEP_PREVIOUS_IMAGES = 'true' } stages { stage('Initialize') { steps { script { + // 【优化6】记录开始时间 + env.PIPELINE_START_TIME = System.currentTimeMillis() + echo "==========================================" - echo " AIOT Platform - CI/CD Pipeline" + echo " AIOT Platform - CI/CD Pipeline (Optimized)" echo "==========================================" echo "Branch: ${env.BRANCH_NAME}" echo "Build: #${env.BUILD_NUMBER}" echo "Workspace: ${env.WORKSPACE}" + echo "Start Time: ${new Date()}" echo "==========================================" + + // 【优化2】动态检测系统资源 + detectSystemResources() } } } stage('Checkout') { steps { - retry(3) { - checkout scm - } script { - // 动态设置环境变量(避免在 environment 块中使用 env 变量) + def stageStartTime = System.currentTimeMillis() + + retry(3) { + checkout scm + } + def shortCommit = sh( script: 'git rev-parse --short HEAD', returnStdout: true ).trim() env.IMAGE_TAG = "${env.BRANCH_NAME}-${env.BUILD_NUMBER}-${shortCommit}" + env.PREVIOUS_IMAGE_TAG = getPreviousImageTag() env.GIT_COMMIT_MSG = sh( script: 'git log -1 --pretty=%B', returnStdout: true @@ -75,7 +89,11 @@ pipeline { echo "📦 Commit: ${shortCommit}" echo "📝 Message: ${env.GIT_COMMIT_MSG}" - echo "🏷️ Image Tag: ${env.IMAGE_TAG}" + echo "🏷️ Current Tag: ${env.IMAGE_TAG}" + echo "🔖 Previous Tag: ${env.PREVIOUS_IMAGE_TAG}" + + // 【优化6】记录阶段耗时 + recordStageMetrics('Checkout', stageStartTime) } } } @@ -83,28 +101,29 @@ pipeline { stage('Detect Changes') { steps { script { - // 获取变更文件(只执行一次 git diff) + def stageStartTime = System.currentTimeMillis() + def changedFiles = getChangedFiles() echo "📝 Changed files: ${changedFiles.size()} files" - // 判断需要构建的服务 env.SERVICES_TO_BUILD = detectServicesToBuild(changedFiles) env.DEPS_CHANGED = checkIfDepsChanged(changedFiles) if (env.SERVICES_TO_BUILD.isEmpty()) { echo "⏭️ No changes detected, skipping build" currentBuild.result = 'SUCCESS' - return // 直接跳过后续阶段 + return } echo "🔄 Services to build: ${env.SERVICES_TO_BUILD}" echo "📦 Deps changed: ${env.DEPS_CHANGED}" - // 显示变更的服务 env.SERVICES_TO_BUILD.split(',').each { service -> def module = getModulePathForService(service) echo " - ${service} (${module})" } + + recordStageMetrics('Detect Changes', stageStartTime) } } } @@ -115,11 +134,23 @@ pipeline { } steps { script { + def stageStartTime = System.currentTimeMillis() + echo "🔍 Running pre-build checks..." - // 检查 Docker 是否可用 + // 检查 Docker sh "docker version >/dev/null 2>&1 || { echo '❌ Docker not available'; exit 1; }" + // 【优化1】检查并创建 Maven 缓存卷 + sh """ + if ! docker volume inspect ${env.MAVEN_CACHE_VOLUME} >/dev/null 2>&1; then + echo "📦 Creating Maven cache volume: ${env.MAVEN_CACHE_VOLUME}" + docker volume create ${env.MAVEN_CACHE_VOLUME} + else + echo "✅ Maven cache volume exists: ${env.MAVEN_CACHE_VOLUME}" + fi + """ + // 检查磁盘空间 def diskUsage = sh( script: "df ${env.WORKSPACE} | tail -1 | awk '{print \$5}' | sed 's/%//'", @@ -138,6 +169,7 @@ pipeline { """ echo "✅ Pre-build checks passed" + recordStageMetrics('Pre-build Check', stageStartTime) } } } @@ -151,21 +183,28 @@ pipeline { } steps { script { - echo "📦 Building dependencies base image..." + def stageStartTime = System.currentTimeMillis() + + echo "📦 Building dependencies base image with Maven cache..." timeout(time: 15, unit: 'MINUTES') { + // 【优化1】使用 Maven 缓存卷加速依赖下载 sh """ set -e - echo "Building ${env.DEPS_IMAGE}..." + echo "Building ${env.DEPS_IMAGE} with cache..." docker build \ -f docker/Dockerfile.deps \ -t ${env.DEPS_IMAGE} \ --build-arg BUILDKIT_INLINE_CACHE=1 \ + --build-arg MAVEN_OPTS="${env.MAVEN_OPTS}" \ + -v ${env.MAVEN_CACHE_VOLUME}:/var/jenkins_home/.m2/repository \ . docker push ${env.DEPS_IMAGE} echo "✅ Dependencies image built and pushed" """ } + + recordStageMetrics('Build Dependencies Image', stageStartTime) } } } @@ -176,20 +215,19 @@ pipeline { } steps { script { + def stageStartTime = System.currentTimeMillis() + def servicesToBuild = env.SERVICES_TO_BUILD.split(',') - echo "🔨 Building ${servicesToBuild.size()} services (parallelism: ${MAX_PARALLEL_BUILDS})" + echo "🔨 Building ${servicesToBuild.size()} services (parallelism: ${env.MAX_PARALLEL_BUILDS})" - // 分批并行构建 + // 【优化2】动态并行构建 def buildTasks = [:] - def batchSize = env.MAX_PARALLEL_BUILDS.toInteger() - servicesToBuild.each { service -> buildTasks[service] = { buildServiceWithRetry(service) } } - // 限制并发数 parallel buildTasks echo "✅ All services built successfully" @@ -199,6 +237,8 @@ pipeline { echo "📊 Built images:" ${env.SERVICES_TO_BUILD.split(',').collect { "docker images ${env.REGISTRY}/${it} --format ' {{.Repository}}:{{.Tag}} - {{.Size}}'" }.join('\n ')} """ + + recordStageMetrics('Build Services', stageStartTime) } } } @@ -212,9 +252,9 @@ pipeline { } steps { script { + def stageStartTime = System.currentTimeMillis() + def servicesToDeploy = env.SERVICES_TO_BUILD.split(',') - - // 按依赖顺序排序 def sortedServices = sortServicesByDependency(servicesToDeploy) echo "🚀 Deploying ${sortedServices.size()} services in dependency order" @@ -222,12 +262,30 @@ pipeline { echo " ${index + 1}. ${service}" } - // 串行部署(保证依赖关系) - sortedServices.each { service -> - deployServiceWithTimeout(service) + // 【优化7】部署前备份当前镜像标签 + if (env.ROLLBACK_ENABLED == 'true') { + backupCurrentDeployment(sortedServices) } - echo "🚀 All services deployed successfully!" + try { + // 串行部署(保证依赖关系) + sortedServices.each { service -> + deployServiceWithTimeout(service) + } + + echo "🚀 All services deployed successfully!" + + } catch (Exception e) { + // 【优化7】部署失败时自动回滚 + if (env.ROLLBACK_ENABLED == 'true') { + echo "❌ Deployment failed: ${e.message}" + echo "🔄 Initiating automatic rollback..." + rollbackDeployment(sortedServices) + } + throw e + } + + recordStageMetrics('Deploy', stageStartTime) } } } @@ -241,6 +299,8 @@ pipeline { } steps { script { + def stageStartTime = System.currentTimeMillis() + echo "🏥 Running final health check for all services..." def servicesToCheck = env.SERVICES_TO_BUILD.split(',') @@ -252,10 +312,18 @@ pipeline { } } - // 并行健康检查 - parallel healthCheckTasks - - echo "✅ All services are healthy!" + try { + parallel healthCheckTasks + echo "✅ All services are healthy!" + } catch (Exception e) { + // 【优化7】健康检查失败时回滚 + if (env.ROLLBACK_ENABLED == 'true') { + echo "❌ Health check failed: ${e.message}" + echo "🔄 Initiating automatic rollback..." + rollbackDeployment(servicesToCheck) + } + throw e + } // 显示最终状态 sh """ @@ -265,6 +333,8 @@ pipeline { "docker inspect --format='${it}: {{.State.Status}} ({{.State.Health.Status}})' ${container} 2>/dev/null || echo '${it}: not found'" }.join('\n ')} """ + + recordStageMetrics('Final Health Check', stageStartTime) } } } @@ -273,25 +343,33 @@ pipeline { post { success { script { + // 【优化6】计算总耗时并生成性能报告 + def totalDuration = System.currentTimeMillis() - env.PIPELINE_START_TIME.toLong() + generatePerformanceReport(totalDuration) + echo """ ========================================== ✅ BUILD SUCCESS ========================================== 📦 Services: ${env.SERVICES_TO_BUILD} 🏷️ Tag: ${env.IMAGE_TAG} - ⏱️ Duration: ${currentBuild.durationString} + ⏱️ Duration: ${formatDuration(totalDuration)} ========================================== """ } } failure { script { + def totalDuration = System.currentTimeMillis() - env.PIPELINE_START_TIME.toLong() + echo """ ========================================== ❌ BUILD FAILED ========================================== 📦 Services: ${env.SERVICES_TO_BUILD ?: 'None'} 🏷️ Tag: ${env.IMAGE_TAG ?: 'Unknown'} + 🔖 Rollback Tag: ${env.PREVIOUS_IMAGE_TAG ?: 'N/A'} + ⏱️ Duration: ${formatDuration(totalDuration)} ⚠️ Please check the logs above ========================================== """ @@ -313,8 +391,12 @@ pipeline { script { echo "🧹 Cleaning up..." - // 清理悬空的镜像 - sh "docker image prune -f || true" + // 清理悬空的镜像(但保留带标签的镜像用于回滚) + if (env.KEEP_PREVIOUS_IMAGES == 'true') { + sh "docker image prune -f --filter 'dangling=true' || true" + } else { + sh "docker image prune -f || true" + } // 清理超过30天的构建日志 sh """ @@ -324,6 +406,9 @@ pipeline { echo "📊 Final System Status:" sh 'df -h | grep -E "/$|/var" || true' sh 'docker system df || true' + + // 【优化6】保存性能指标到文件 + archivePerformanceMetrics() } } } @@ -365,12 +450,10 @@ def checkIfDepsChanged(List changedFiles) { // 检测需要构建的服务 @NonCPS def detectServicesToBuild(List changedFiles) { - // 如果是第一次构建或强制全量构建 if (changedFiles.contains('all')) { return env.CORE_SERVICES } - // 检查是否触发了全量构建 def triggerAllFiles = ['pom.xml', 'viewsh-framework', 'viewsh-dependencies', 'Jenkinsfile', 'docker/'] if (triggerAllFiles.any { triggerFile -> changedFiles.any { changedFile -> @@ -380,7 +463,6 @@ def detectServicesToBuild(List changedFiles) { return env.CORE_SERVICES } - // 检测变更的模块 def changedServices = [] def allServices = env.CORE_SERVICES.split(',') @@ -406,6 +488,243 @@ def depsImageExists() { return result == 0 } +// 【优化2】动态检测系统资源并设置并行度 +def detectSystemResources() { + try { + // 获取 CPU 核心数 + def cpuCores = sh( + script: 'nproc 2>/dev/null || echo "2"', + returnStdout: true + ).trim() as int + + // 获取可用内存(GB) + def availableMemory = sh( + script: 'free -g | grep Mem | awk \'{print $7}\'', + returnStdout: true + ).trim() as int + + // 动态计算并行度 + // 规则:每个构建任务需要至少 2GB 内存和 1 个 CPU 核心 + def maxParallelByMemory = Math.max(1, (availableMemory / 2) as int) + def maxParallelByCpu = Math.max(1, cpuCores - 1) // 保留一个核心给系统 + + env.MAX_PARALLEL_BUILDS = Math.min(maxParallelByMemory, maxParallelByCpu).toString() + + echo """ + ======================================== + 📊 System Resources Detected: + ======================================== + CPU Cores: ${cpuCores} + Available Memory: ${availableMemory} GB + Max Parallel Builds: ${env.MAX_PARALLEL_BUILDS} + ======================================== + """ + } catch (Exception e) { + echo "⚠️ Failed to detect system resources: ${e.message}" + echo "Using default parallelism: 2" + env.MAX_PARALLEL_BUILDS = '2' + } +} + +// 【优化6】记录阶段性能指标 +def recordStageMetrics(String stageName, long startTime) { + def duration = System.currentTimeMillis() - startTime + def durationStr = formatDuration(duration) + + if (!env.STAGE_METRICS) { + env.STAGE_METRICS = "" + } + + env.STAGE_METRICS += "${stageName}:${duration}|" + echo "⏱️ Stage '${stageName}' completed in ${durationStr}" +} + +// 【优化6】格式化时长 +@NonCPS +def formatDuration(long milliseconds) { + def seconds = (milliseconds / 1000) as int + def minutes = (seconds / 60) as int + def hours = (minutes / 60) as int + + if (hours > 0) { + return "${hours}h ${minutes % 60}m ${seconds % 60}s" + } else if (minutes > 0) { + return "${minutes}m ${seconds % 60}s" + } else { + return "${seconds}s" + } +} + +// 【优化6】生成性能报告 +def generatePerformanceReport(long totalDuration) { + echo """ + ========================================== + 📊 PERFORMANCE REPORT + ========================================== + Total Duration: ${formatDuration(totalDuration)} + """ + + if (env.STAGE_METRICS) { + echo "Stage Breakdown:" + env.STAGE_METRICS.split('\\|').each { metric -> + if (metric) { + def parts = metric.split(':') + if (parts.size() == 2) { + def stageName = parts[0] + def duration = parts[1] as long + def percentage = (duration * 100 / totalDuration) as int + echo " - ${stageName.padRight(25)}: ${formatDuration(duration).padRight(10)} (${percentage}%)" + } + } + } + } + + echo "==========================================" +} + +// 【优化6】归档性能指标 +def archivePerformanceMetrics() { + try { + def metricsFile = "${env.WORKSPACE}/build-metrics-${env.BUILD_NUMBER}.json" + def metricsData = [ + buildNumber: env.BUILD_NUMBER, + timestamp: new Date().format('yyyy-MM-dd HH:mm:ss'), + branch: env.BRANCH_NAME, + imageTag: env.IMAGE_TAG, + servicesToBuild: env.SERVICES_TO_BUILD, + totalDuration: System.currentTimeMillis() - env.PIPELINE_START_TIME.toLong(), + stages: [:] + ] + + if (env.STAGE_METRICS) { + env.STAGE_METRICS.split('\\|').each { metric -> + if (metric) { + def parts = metric.split(':') + if (parts.size() == 2) { + metricsData.stages[parts[0]] = parts[1] as long + } + } + } + } + + writeJSON file: metricsFile, json: metricsData + archiveArtifacts artifacts: "build-metrics-${env.BUILD_NUMBER}.json", allowEmptyArchive: true + + echo "✅ Performance metrics archived: ${metricsFile}" + } catch (Exception e) { + echo "⚠️ Failed to archive performance metrics: ${e.message}" + } +} + +// 【优化7】获取上一次成功部署的镜像标签 +def getPreviousImageTag() { + try { + // 从部署主机获取当前运行的镜像标签 + def sshOpts = "-o StrictHostKeyChecking=no -o ConnectTimeout=10 -i ${env.SSH_KEY}" + def previousTag = sh( + script: """ + ssh ${sshOpts} root@${env.DEPLOY_HOST} ' + cd ${env.DEPLOY_PATH} + docker compose -f docker-compose.core.yml images --format json | \ + jq -r ".[0].Tag" | head -1 + ' 2>/dev/null || echo "latest" + """, + returnStdout: true + ).trim() + + return previousTag ?: 'latest' + } catch (Exception e) { + echo "⚠️ Failed to get previous image tag: ${e.message}" + return 'latest' + } +} + +// 【优化7】备份当前部署 +def backupCurrentDeployment(def services) { + echo "💾 Backing up current deployment state..." + + def sshOpts = "-o StrictHostKeyChecking=no -o ConnectTimeout=10 -i ${env.SSH_KEY}" + + try { + sh """ + ssh ${sshOpts} root@${env.DEPLOY_HOST} ' + cd ${env.DEPLOY_PATH} + + # 保存当前 docker-compose 配置 + cp docker-compose.core.yml docker-compose.core.yml.backup-${env.BUILD_NUMBER} + + # 记录当前运行的镜像 + docker compose -f docker-compose.core.yml images > deployment-state-${env.BUILD_NUMBER}.txt + + echo "✅ Backup completed: deployment-state-${env.BUILD_NUMBER}.txt" + ' + """ + + echo "✅ Current deployment backed up successfully" + } catch (Exception e) { + echo "⚠️ Failed to backup current deployment: ${e.message}" + } +} + +// 【优化7】回滚部署 +def rollbackDeployment(def services) { + echo """ + ========================================== + 🔄 INITIATING ROLLBACK + ========================================== + Rolling back to: ${env.PREVIOUS_IMAGE_TAG} + Services: ${services.join(', ')} + ========================================== + """ + + def sshOpts = "-o StrictHostKeyChecking=no -o ConnectTimeout=10 -i ${env.SSH_KEY}" + + try { + services.each { service -> + echo "🔄 Rolling back ${service}..." + + sh """ + ssh ${sshOpts} root@${env.DEPLOY_HOST} ' + cd ${env.DEPLOY_PATH} + + # 设置回滚镜像标签 + export IMAGE_TAG=${env.PREVIOUS_IMAGE_TAG} + + # 拉取旧版本镜像 + docker compose -f docker-compose.core.yml pull ${service} + + # 重启服务 + docker compose -f docker-compose.core.yml up -d ${service} + + echo "✅ ${service} rolled back to ${env.PREVIOUS_IMAGE_TAG}" + ' + """ + + // 等待服务启动 + sleep 5 + } + + echo """ + ========================================== + ✅ ROLLBACK COMPLETED + ========================================== + All services have been rolled back to: ${env.PREVIOUS_IMAGE_TAG} + ========================================== + """ + + } catch (Exception e) { + echo """ + ========================================== + ❌ ROLLBACK FAILED + ========================================== + Error: ${e.message} + Manual intervention required! + ========================================== + """ + throw e + } +} + // 构建服务(带重试) def buildServiceWithRetry(String service) { retry(2) { @@ -418,6 +737,7 @@ def buildServiceWithRetry(String service) { // 构建单个服务 def buildService(String service) { def modulePath = getModulePathForService(service) + def buildStartTime = System.currentTimeMillis() echo "" echo "==========================================" @@ -429,17 +749,20 @@ def buildService(String service) { echo "==========================================" try { + // 【优化1】使用 Maven 缓存卷 sh """ set -e set -x - # 构建镜像 + # 构建镜像(使用 Maven 缓存) docker build \\ -f docker/Dockerfile.service \\ --build-arg DEPS_IMAGE=${env.DEPS_IMAGE} \\ --build-arg MODULE_NAME=${modulePath} \\ --build-arg JAR_NAME=${service} \\ --build-arg SKIP_TESTS=true \\ + --build-arg MAVEN_OPTS="${env.MAVEN_OPTS}" \\ + -v ${env.MAVEN_CACHE_VOLUME}:/var/jenkins_home/.m2/repository \\ -t ${env.REGISTRY}/${service}:${env.IMAGE_TAG} \\ -t ${env.REGISTRY}/${service}:latest \\ . @@ -451,7 +774,8 @@ def buildService(String service) { set +x """ - echo "✅ ${service} built and pushed successfully" + def buildDuration = System.currentTimeMillis() - buildStartTime + echo "✅ ${service} built and pushed successfully in ${formatDuration(buildDuration)}" // 获取镜像大小 def imageSize = sh( @@ -550,10 +874,10 @@ def waitForServiceHealthy(String containerName, String serviceName, String sshOp ssh ${sshOpts} root@${env.DEPLOY_HOST} ' set -e - for i in $(seq 1 ${maxAttempts}); do - STATUS=$(docker inspect --format="{{.State.Health.Status}}" ${containerName} 2>/dev/null || echo "starting") + for i in \$(seq 1 ${maxAttempts}); do + STATUS=\$(docker inspect --format="{{.State.Health.Status}}" ${containerName} 2>/dev/null || echo "starting") - case "$STATUS" in + case "\$STATUS" in healthy) echo "✅ ${serviceName} is healthy" exit 0 @@ -565,7 +889,7 @@ def waitForServiceHealthy(String containerName, String serviceName, String sshOp exit 1 ;; starting) - ELAPSED=$((i * ${env.HEALTH_CHECK_INTERVAL})) + ELAPSED=\$((i * ${env.HEALTH_CHECK_INTERVAL})) echo "⏳ ${serviceName} is starting... (\${ELAPSED}s/${env.HEALTH_CHECK_TIMEOUT}s)" ;; *) @@ -600,9 +924,9 @@ def checkServiceHealthWithRetry(String service) { def checkServiceHealth(String containerName, String serviceName, String sshOpts) { sh """ ssh ${sshOpts} root@${env.DEPLOY_HOST} ' - STATUS=$(docker inspect --format="{{.State.Health.Status}}" ${containerName} 2>/dev/null || echo "not_found") + STATUS=\$(docker inspect --format="{{.State.Health.Status}}" ${containerName} 2>/dev/null || echo "not_found") - case "$STATUS" in + case "\$STATUS" in healthy) echo "✅ ${serviceName} is healthy" ;;