chore(ci): 部署加磁盘预检 + 部署后自动清理 Prod 本地镜像与 Registry
Some checks failed
Java CI with Maven / build (11) (push) Has been cancelled
Java CI with Maven / build (17) (push) Has been cancelled
Java CI with Maven / build (8) (push) Has been cancelled

- 新增 Pre-deploy Check:SSH 到 Prod/Registry 读根分区空闲,<5% 直接 fail(规避磁盘满时 sshd 连带崩溃导致的 scp 失败),5~10% 仅告警
- 新增 Cleanup Old Images stage:部署成功后每服务保留最近 3 个镜像
  * Prod 侧调用 scripts/cleanup.sh
  * Registry 侧调用 scripts/registry-cleanup.py + 触发容器内 garbage-collect
- scripts/cleanup.sh:去掉 volume prune 的交互 read(CI 下会卡住),支持 --keep/--prune-volumes/--registry 参数
- scripts/registry-cleanup.py:按 tag 内数字降序保留最新 N 个;覆盖 Docker v2/OCI 多种 manifest Accept;多 tag 指向同一 digest 去重;失败不影响发布

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
lzh
2026-04-24 11:20:37 +08:00
parent acd7a35e1d
commit 8c5c5ef44a
3 changed files with 467 additions and 35 deletions

View File

@@ -2,80 +2,122 @@
# ============================================
# AIOT Platform - 清理脚本
# 清理旧镜像和容器,释放存储空间
# 清理部署主机上的旧镜像 / 停止容器 / 构建缓存,释放存储空间
# ============================================
set -e
# ---- 默认参数 ----
KEEP=3
PRUNE_VOLUMES=false
REGISTRY_HOST="localhost:5000"
usage() {
cat <<EOF
用法: $0 [options]
Options:
--keep=N 每个服务保留最近 N 个本地镜像(默认 3
--prune-volumes 额外清理未使用的 Docker volume默认不清避免误删数据
--registry=HOST 本地 docker images 仓库前缀(默认 localhost:5000
-h, --help 帮助
示例:
$0 --keep=3
$0 --keep=2 --prune-volumes
EOF
}
for arg in "$@"; do
case "$arg" in
--keep=*) KEEP="${arg#*=}" ;;
--prune-volumes) PRUNE_VOLUMES=true ;;
--registry=*) REGISTRY_HOST="${arg#*=}" ;;
-h|--help) usage; exit 0 ;;
*) echo "未知参数: $arg"; usage; exit 1 ;;
esac
done
if ! [[ "$KEEP" =~ ^[0-9]+$ ]] || [ "$KEEP" -lt 1 ]; then
echo "❌ --keep 必须为正整数"
exit 1
fi
# 颜色输出
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_info "========================================="
log_info "AIOT Platform 清理开始"
log_info "AIOT Platform 清理开始 (keep=${KEEP})"
log_info "========================================="
# 显示当前磁盘使用情况
log_info "当前磁盘使用情况:"
df -h | grep -E "Filesystem|/$"
df -h | grep -E "Filesystem|/$" || true
echo ""
log_info "当前 Docker 磁盘使用:"
docker system df
echo ""
# 清理停止的容器
log_info "清理停止的容器..."
docker container prune -f
# 清理悬空镜像
log_info "清理悬空镜像..."
docker image prune -f
# 清理旧版本镜像(保留最近 3 个版本)
log_info "清理旧版本镜像(保留最近 3 个版本)..."
log_info "清理旧版本镜像(每个服务保留最近 ${KEEP} 个)..."
SERVICES="viewsh-gateway viewsh-module-system-server viewsh-module-infra-server viewsh-module-iot-server viewsh-module-iot-gateway viewsh-module-ops-server"
SERVICES=(
"viewsh-gateway"
"viewsh-module-system-server"
"viewsh-module-infra-server"
"viewsh-module-iot-server"
"viewsh-module-iot-gateway"
"viewsh-module-ops-server"
)
for service in $SERVICES; do
for service in "${SERVICES[@]}"; do
log_info "处理服务: ${service}"
# 获取所有镜像,按时间排序,删除除了最新 3 个之外的所有镜像
docker images "localhost:5000/${service}" --format "{{.ID}} {{.Tag}}" | \
grep -v "latest" | \
tail -n +4 | \
awk '{print $1}' | \
xargs -r docker rmi -f 2>/dev/null || true
# 按创建时间降序取 ID 列表,跳过 latest tag保留前 KEEP 个
mapfile -t ids_to_delete < <(
docker images "${REGISTRY_HOST}/${service}" \
--format '{{.CreatedAt}}|{{.ID}}|{{.Tag}}' \
| grep -v '|latest$' \
| sort -r \
| awk -F'|' -v k="$KEEP" 'NR > k {print $2}'
)
if [ "${#ids_to_delete[@]}" -eq 0 ]; then
log_info " └─ 无可清理镜像"
continue
fi
log_info " └─ 删除 ${#ids_to_delete[@]} 个旧镜像"
# 去重后批量删
printf '%s\n' "${ids_to_delete[@]}" | sort -u | xargs -r docker rmi -f 2>/dev/null || true
done
# 清理未使用的卷(谨慎使用)
log_warn "是否清理未使用的卷? (y/N)"
read -r response
if [ "$response" = "y" ] || [ "$response" = "Y" ]; then
log_info "清理未使用的卷..."
if [ "$PRUNE_VOLUMES" = true ]; then
log_warn "清理未使用的 volume--prune-volumes 已启用)"
docker volume prune -f
else
log_info "跳过 volume 清理(如需清理请加 --prune-volumes"
fi
# 清理构建缓存
log_info "清理 Docker 构建缓存..."
docker builder prune -f --filter "until=24h"
log_info "清理 Docker 构建缓存24h 前)..."
docker builder prune -f --filter "until=24h" || true
# 显示清理后的磁盘使用情况
echo ""
log_info "========================================="
log_info "清理完成"
log_info "========================================="
echo ""
log_info "清理后磁盘使用情况:"
df -h | grep -E "Filesystem|/$"
df -h | grep -E "Filesystem|/$" || true
echo ""
log_info "清理后 Docker 磁盘使用:"
docker system df