tcga-downloader/tcga_downloader/download.py
yunpeng.zhang a01a59b371
Some checks failed
CI / Lint (push) Failing after 9m32s
CI / Test (3.11) (push) Successful in 6m41s
CI / Test (3.12) (push) Successful in 4m21s
feat: add interactive cli
2026-02-09 13:13:39 +08:00

64 lines
1.9 KiB
Python

from __future__ import annotations
import shutil
import subprocess
from pathlib import Path
from tcga_downloader.logger import get_logger
logger = get_logger("download")
def build_gdc_command(
manifest_path: Path, out_dir: Path, processes: int, retries: int, token: str | None = None
) -> list[str]:
cmd = [
"gdc-client",
"download",
"-m",
str(manifest_path),
"-d",
str(out_dir),
"--n-processes",
str(processes),
"--retry-amount",
str(retries),
]
if token:
cmd.extend(["--token", token])
return cmd
def run_gdc_download(
manifest_path: Path,
out_dir: Path,
processes: int = 4,
retries: int = 3,
token: str | None = None,
) -> None:
if not manifest_path.exists():
raise FileNotFoundError(f"Manifest file not found: {manifest_path}")
if not shutil.which("gdc-client"):
raise RuntimeError(
"gdc-client not found in PATH. Install from https://gdc.cancer.gov/access-data/gdc-data-transfer-tool"
)
cmd = build_gdc_command(manifest_path, out_dir, processes, retries, token)
logger.info("Starting download: manifest=%s, out_dir=%s", manifest_path, out_dir)
logger.debug("Running command: %s", " ".join(cmd))
try:
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
if result.stdout:
logger.debug("gdc-client output: %s", result.stdout)
except subprocess.CalledProcessError as e:
logger.error("Download failed with exit code %d", e.returncode)
if e.stdout:
logger.info("gdc-client output:\n%s", e.stdout)
if e.stderr:
logger.error("gdc-client error:\n%s", e.stderr)
raise RuntimeError(f"gdc-client download failed with exit code {e.returncode}") from e
logger.info("Download completed successfully")