From 60e852de843a11546fc11ed5c4262ab7bc27fbe5 Mon Sep 17 00:00:00 2001 From: "yunpeng.zhang" Date: Fri, 16 Jan 2026 14:51:39 +0800 Subject: [PATCH] feat: add CLI entry points --- tcga_downloader/cli.py | 66 ++++++++++++++++++++++++++++++++++++++++++ tests/test_cli.py | 7 +++++ 2 files changed, 73 insertions(+) create mode 100644 tcga_downloader/cli.py create mode 100644 tests/test_cli.py diff --git a/tcga_downloader/cli.py b/tcga_downloader/cli.py new file mode 100644 index 0000000..5760155 --- /dev/null +++ b/tcga_downloader/cli.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import argparse +from pathlib import Path + +from tcga_downloader.download import run_gdc_download +from tcga_downloader.manifest import ManifestRecord, write_manifest +from tcga_downloader.query import query_files + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="tcga-downloader") + sub = parser.add_subparsers(dest="command", required=True) + + q = sub.add_parser("query") + q.add_argument("--project", required=True) + q.add_argument("--data-type", required=True) + q.add_argument("--out", required=True) + q.add_argument("--format", choices=["tsv", "json"], default="tsv") + + d = sub.add_parser("download") + d.add_argument("--manifest", required=True) + d.add_argument("--out-dir", required=True) + d.add_argument("--processes", type=int, default=4) + d.add_argument("--retries", type=int, default=3) + + r = sub.add_parser("run") + r.add_argument("--project", required=True) + r.add_argument("--data-type", required=True) + r.add_argument("--out", required=True) + r.add_argument("--format", choices=["tsv", "json"], default="tsv") + r.add_argument("--out-dir", required=True) + r.add_argument("--processes", type=int, default=4) + r.add_argument("--retries", type=int, default=3) + + return parser + + +def _records_from_hits(hits: list[dict]) -> list[ManifestRecord]: + records = [] + for h in hits: + records.append( + ManifestRecord( + file_id=h["file_id"], + file_name=h["file_name"], + data_type=h["data_type"], + data_format=h["data_format"], + size=int(h["file_size"]), + md5=h["md5sum"], + ) + ) + return records + + +def main() -> None: + parser = build_parser() + args = parser.parse_args() + + if args.command in {"query", "run"}: + hits = query_files(args.project, args.data_type) + records = _records_from_hits(hits) + write_manifest(records, Path(args.out), fmt=args.format) + + if args.command in {"download", "run"}: + manifest_path = Path(args.manifest if args.command == "download" else args.out) + run_gdc_download(manifest_path, Path(args.out_dir), args.processes, args.retries) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..4ca812a --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,7 @@ +from tcga_downloader.cli import build_parser + + +def test_cli_has_subcommands(): + parser = build_parser() + subparsers = parser._subparsers + assert subparsers is not None