#!/usr/bin/env bash
# 校验各模型权重文件大小是否与 ModelScope 清单一致；不一致则从 hf-mirror 重拉。
# 仅严格校验权重类(safetensors/bin/pth/gguf)；元数据文件(.gitattributes/README/configuration.json)
# 跨平台大小本就不同, 忽略。完成后重生成 .files + SHA256SUMS。
set -uo pipefail
OUT=/srv/mirror/models
PY=/home/ecs-user/dlvenv/bin/python
MSAPI=https://www.modelscope.cn/api/v1/models
G="--connect-timeout 20 --speed-limit 51200 --speed-time 30 --retry 30 --retry-delay 3 -C -"

check(){ # name repo
  local name="$1" repo="$2" dir="$OUT/$1" bad=0
  curl -s --max-time 30 "$MSAPI/$repo/repo/files?Revision=master&Recursive=true" -o "/tmp/v_$name.json"
  $PY -c "import json;d=json.load(open('/tmp/v_$name.json'));[print(f['Path']+'\t'+str(f['Size'])) for f in d['Data']['Files'] if f['Type']=='blob']" > "/tmp/v_$name.tsv" 2>/dev/null || { echo "[verify] $name: cannot list"; return; }
  while IFS=$'\t' read -r p s; do
    [ -z "$p" ] && continue
    got=$(stat -c%s "$dir/$p" 2>/dev/null || echo 0)
    [ "$got" = "$s" ] && continue
    case "$p" in
      *.safetensors|*.bin|*.gguf|*.pth|*.pt|*.onnx)
        echo "  [MISMATCH] $name/$p got=$got want=$s -> refetch"
        curl -fL $G -s -o "$dir/$p" "https://hf-mirror.com/$repo/resolve/main/$p" || true
        got=$(stat -c%s "$dir/$p" 2>/dev/null || echo 0)
        [ "$got" = "$s" ] && echo "    fixed" || { echo "    STILL BAD ($got)"; bad=1; } ;;
      *) ;; # metadata: ignore cross-platform size diff
    esac
  done < "/tmp/v_$name.tsv"
  ( cd "$dir" && find . -type f ! -name .files ! -name SHA256SUMS ! -path './.cache/*' -printf '%P\n' | sort > .files )
  ( cd "$dir" && : > SHA256SUMS && while IFS= read -r f; do sha256sum "$f" >> SHA256SUMS; done < .files )
  echo "[verify] $name done (bad=$bad, files=$(wc -l < "$dir/.files"))"
}

check qwen3-8b-instruct      Qwen/Qwen3-8B
check qwen3guard             Qwen/Qwen3Guard-Gen-8B
check qwen3-vl-8b-instruct   Qwen/Qwen3-VL-8B-Instruct
check qwen3-14b-instruct-awq Qwen/Qwen3-14B-AWQ
check qwen3-asr-1.7b         Qwen/Qwen3-ASR-1.7B
echo "=== verify_models done ==="
