#!/usr/bin/env bash
# 大模型权重 -> /srv/mirror/models/<dir>
# 方案: 文件清单走 ModelScope API(小/快)，字节走 hf-mirror.com resolve(实测 25MB/s)，
#       HF 缺失的文件回退 ModelScope。逐文件 size 校验 + 断点续传，完成后生成 .files / SHA256SUMS。
set -uo pipefail
OUT=/srv/mirror/models
PY=/home/ecs-user/dlvenv/bin/python
HFBASE=https://hf-mirror.com
MSAPI=https://www.modelscope.cn/api/v1/models
PAR=3
mkdir -p "$OUT"

getfile(){ # repo dir path size
  local repo="$1" dir="$2" path="$3" size="$4" out
  out="$dir/$path"; mkdir -p "$(dirname "$out")"
  if [ -f "$out" ] && [ "$(stat -c%s "$out" 2>/dev/null)" = "$size" ]; then echo "[skip] $path"; return 0; fi
  local G="--connect-timeout 20 --speed-limit 51200 --speed-time 30 --retry 30 --retry-delay 3 -C -"
  if curl -fL $G -s -o "$out" "$HFBASE/$repo/resolve/main/$path"; then :; else
    echo "[hf-miss->ms] $path"
    curl -fL $G -s -o "$out" "$MSAPI/$repo/repo?Revision=master&FilePath=$path" || { echo "[FAIL] $path"; return 1; }
  fi
  local got; got=$(stat -c%s "$out" 2>/dev/null || echo 0)
  if [ "$size" != "0" ] && [ "$got" != "$size" ]; then echo "[SIZE-MISMATCH] $path got=$got want=$size"; return 1; fi
  echo "[ok] $path ($got)"
}

getmodel(){ # repo_id  local_dirname
  local repo="$1" name="$2" dir="$OUT/$2" n=0
  echo "================ [model] $repo -> $dir  $(date +%H:%M:%S) ================"
  mkdir -p "$dir"
  curl -s --max-time 30 "$MSAPI/$repo/repo/files?Revision=master&Recursive=true" -o "/tmp/list_$name.json"
  if ! $PY -c "import json;d=json.load(open('/tmp/list_$name.json'));[print(f['Path']+'\t'+str(f['Size'])) for f in d['Data']['Files'] if f['Type']=='blob']" > "/tmp/list_$name.tsv" 2>/dev/null; then
    echo "  [FAIL] cannot list $repo"; return 1; fi
  echo "  files=$(wc -l < /tmp/list_$name.tsv)  total=$($PY -c "print(round(sum(int(l.split(chr(9))[1]) for l in open('/tmp/list_$name.tsv'))/1e9,1),'GB')")"
  while IFS=$'\t' read -r path size; do
    [ -z "$path" ] && continue
    getfile "$repo" "$dir" "$path" "$size" &
    n=$((n+1)); [ $((n % PAR)) -eq 0 ] && wait
  done < "/tmp/list_$name.tsv"
  wait
  ( cd "$dir" && find . -type f ! -name .files ! -name SHA256SUMS ! -path './.cache/*' -printf '%P\n' | sort > .files )
  ( cd "$dir" && : > SHA256SUMS && while IFS= read -r f; do sha256sum "$f" >> SHA256SUMS; done < .files )
  echo "  [done] $repo  files=$(wc -l < "$dir/.files")  size=$(du -sh --exclude=.cache "$dir" | cut -f1)  $(date +%H:%M:%S)"
}

getmodel Qwen/Qwen3-8B             qwen3-8b-instruct
getmodel Qwen/Qwen3Guard-Gen-8B    qwen3guard
getmodel Qwen/Qwen3-VL-8B-Instruct qwen3-vl-8b-instruct
getmodel Qwen/Qwen3-14B-AWQ        qwen3-14b-instruct-awq
getmodel Qwen/Qwen3-ASR-1.7B       qwen3-asr-1.7b

echo "=== all models done  $(date +%H:%M:%S) ==="
du -sh --exclude=.cache "$OUT"/qwen3* 2>/dev/null
