from __future__ import annotations import argparse from pathlib import Path from tcga_downloader.download import run_gdc_download from tcga_downloader.manifest import ManifestRecord, write_manifest from tcga_downloader.query import query_files def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(prog="tcga-downloader") sub = parser.add_subparsers(dest="command", required=True) q = sub.add_parser("query") q.add_argument("--project", required=True) q.add_argument("--data-type", required=True) q.add_argument("--out", required=True) q.add_argument("--format", choices=["tsv", "json"], default="tsv") d = sub.add_parser("download") d.add_argument("--manifest", required=True) d.add_argument("--out-dir", required=True) d.add_argument("--processes", type=int, default=4) d.add_argument("--retries", type=int, default=3) r = sub.add_parser("run") r.add_argument("--project", required=True) r.add_argument("--data-type", required=True) r.add_argument("--out", required=True) r.add_argument("--format", choices=["tsv", "json"], default="tsv") r.add_argument("--out-dir", required=True) r.add_argument("--processes", type=int, default=4) r.add_argument("--retries", type=int, default=3) return parser def _records_from_hits(hits: list[dict]) -> list[ManifestRecord]: records = [] for h in hits: records.append( ManifestRecord( file_id=h["file_id"], file_name=h["file_name"], data_type=h["data_type"], data_format=h["data_format"], size=int(h["file_size"]), md5=h["md5sum"], ) ) return records def main() -> None: parser = build_parser() args = parser.parse_args() if args.command in {"query", "run"}: hits = query_files(args.project, args.data_type) records = _records_from_hits(hits) write_manifest(records, Path(args.out), fmt=args.format) if args.command in {"download", "run"}: manifest_path = Path(args.manifest if args.command == "download" else args.out) run_gdc_download(manifest_path, Path(args.out_dir), args.processes, args.retries)