64 lines
1.9 KiB
Python
64 lines
1.9 KiB
Python
from __future__ import annotations
|
|
|
|
import shutil
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
from tcga_downloader.logger import get_logger
|
|
|
|
logger = get_logger("download")
|
|
|
|
|
|
def build_gdc_command(
|
|
manifest_path: Path, out_dir: Path, processes: int, retries: int, token: str | None = None
|
|
) -> list[str]:
|
|
cmd = [
|
|
"gdc-client",
|
|
"download",
|
|
"-m",
|
|
str(manifest_path),
|
|
"-d",
|
|
str(out_dir),
|
|
"--n-processes",
|
|
str(processes),
|
|
"--retry-amount",
|
|
str(retries),
|
|
]
|
|
if token:
|
|
cmd.extend(["--token", token])
|
|
return cmd
|
|
|
|
|
|
def run_gdc_download(
|
|
manifest_path: Path,
|
|
out_dir: Path,
|
|
processes: int = 4,
|
|
retries: int = 3,
|
|
token: str | None = None,
|
|
) -> None:
|
|
if not manifest_path.exists():
|
|
raise FileNotFoundError(f"Manifest file not found: {manifest_path}")
|
|
|
|
if not shutil.which("gdc-client"):
|
|
raise RuntimeError(
|
|
"gdc-client not found in PATH. Install from https://gdc.cancer.gov/access-data/gdc-data-transfer-tool"
|
|
)
|
|
|
|
cmd = build_gdc_command(manifest_path, out_dir, processes, retries, token)
|
|
logger.info("Starting download: manifest=%s, out_dir=%s", manifest_path, out_dir)
|
|
logger.debug("Running command: %s", " ".join(cmd))
|
|
|
|
try:
|
|
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
if result.stdout:
|
|
logger.debug("gdc-client output: %s", result.stdout)
|
|
except subprocess.CalledProcessError as e:
|
|
logger.error("Download failed with exit code %d", e.returncode)
|
|
if e.stdout:
|
|
logger.info("gdc-client output:\n%s", e.stdout)
|
|
if e.stderr:
|
|
logger.error("gdc-client error:\n%s", e.stderr)
|
|
raise RuntimeError(f"gdc-client download failed with exit code {e.returncode}") from e
|
|
|
|
logger.info("Download completed successfully")
|