#!/usr/bin/env bash
# gpu-resume — start a paused vast.ai instance, wait for tunnel, re-wire OpenCode.

set -euo pipefail
STATE_FILE=/var/lib/specker/vast-current.json
OPENCODE_CONFIG=/home/opencode/.config/opencode/opencode.json
OPENCODE_BACKUP=${OPENCODE_CONFIG}.host-a-backup
TUNNEL_PORT="${TUNNEL_PORT:-11440}"
PATH="/root/.local/bin:$PATH"

[[ -f $STATE_FILE ]] || { echo "[!] no state file — nothing to resume"; exit 1; }
INSTANCE_ID=$(jq -r '.instance_id' "$STATE_FILE")
MODEL=$(jq -r '.model' "$STATE_FILE")
CURRENT_STATE=$(jq -r '.state // "running"' "$STATE_FILE")
[[ "$CURRENT_STATE" == "paused" ]] || { echo "[!] state is '$CURRENT_STATE', not 'paused' — nothing to resume"; exit 1; }

echo "[*] starting vast.ai instance $INSTANCE_ID ..."
yes | vastai start instance "$INSTANCE_ID" 2>&1 | head -5

echo "[*] waiting up to 10 min for reverse tunnel on 127.0.0.1:${TUNNEL_PORT} ..."
# (onstart re-runs on start; apt/ollama already installed → fast; just ollama serve + pull-check + autossh)
TUNNEL_UP=0
for i in $(seq 1 60); do
  if ss -tlnp 2>/dev/null | grep -q "127.0.0.1:${TUNNEL_PORT} "; then
    TUNNEL_UP=1
    echo "[+] tunnel up after ~${i}0s"
    break
  fi
  sleep 10
done
[[ $TUNNEL_UP -eq 1 ]] || { echo "[!] tunnel did not come up in 10 min. Check: vastai logs $INSTANCE_ID --tail 100"; exit 1; }

# verify ollama responsive
for i in $(seq 1 12); do
  if curl -sS --max-time 5 "http://127.0.0.1:${TUNNEL_PORT}/api/tags" >/dev/null 2>&1; then
    break
  fi
  sleep 5
done

# rewrite opencode.json (same as gpu-up)
[[ -f $OPENCODE_BACKUP ]] || cp "$OPENCODE_CONFIG" "$OPENCODE_BACKUP"
cat > "$OPENCODE_CONFIG" <<EOF
{
  "\$schema": "https://opencode.ai/config.json",
  "model": "vast/${MODEL}",
  "provider": {
    "vast": {
      "npm": "@ai-sdk/openai-compatible",
      "name": "vast.ai GPU (Host B :${TUNNEL_PORT} ← reverse-tunnel)",
      "options": {
        "baseURL": "http://127.0.0.1:${TUNNEL_PORT}/v1",
        "apiKey": "ollama",
        "timeout": 600000,
        "chunkTimeout": 600000
      },
      "models": {
        "${MODEL}": { "name": "${MODEL} (vast.ai GPU)", "tool_call": true, "temperature": true, "reasoning": true }
      }
    }
  }
}
EOF
chown opencode:opencode "$OPENCODE_CONFIG"
chmod 0640 "$OPENCODE_CONFIG"
systemctl restart opencode-server.service

# update state
TMP=$(mktemp)
jq 'del(.paused_at) | . + {state: "running", resumed_at: now | todateiso8601}' "$STATE_FILE" > "$TMP" && mv "$TMP" "$STATE_FILE"

echo "==================================================================="
echo "  gpu-resume complete"
echo "    instance:   $INSTANCE_ID"
echo "    model:      $MODEL"
echo "    opencode:   back on vast.ai"
echo "==================================================================="
