from __future__ import annotations import shutil import subprocess from pathlib import Path from tcga_downloader.logger import get_logger logger = get_logger("download") def build_gdc_command( manifest_path: Path, out_dir: Path, processes: int, retries: int, token: str | None = None ) -> list[str]: cmd = [ "gdc-client", "download", "-m", str(manifest_path), "-d", str(out_dir), "--n-processes", str(processes), "--retry-amount", str(retries), ] if token: cmd.extend(["--token", token]) return cmd def run_gdc_download( manifest_path: Path, out_dir: Path, processes: int = 4, retries: int = 3, token: str | None = None, ) -> None: if not manifest_path.exists(): raise FileNotFoundError(f"Manifest file not found: {manifest_path}") if not shutil.which("gdc-client"): raise RuntimeError( "gdc-client not found in PATH. Install from https://gdc.cancer.gov/access-data/gdc-data-transfer-tool" ) cmd = build_gdc_command(manifest_path, out_dir, processes, retries, token) logger.info("Starting download: manifest=%s, out_dir=%s", manifest_path, out_dir) logger.debug("Running command: %s", " ".join(cmd)) try: result = subprocess.run(cmd, check=True, capture_output=True, text=True) if result.stdout: logger.debug("gdc-client output: %s", result.stdout) except subprocess.CalledProcessError as e: logger.error("Download failed with exit code %d", e.returncode) if e.stdout: logger.info("gdc-client output:\n%s", e.stdout) if e.stderr: logger.error("gdc-client error:\n%s", e.stderr) raise RuntimeError(f"gdc-client download failed with exit code {e.returncode}") from e logger.info("Download completed successfully")