tcga-downloader/tcga_downloader/cli.py
2026-01-16 14:51:39 +08:00

67 lines
2.2 KiB
Python

from __future__ import annotations
import argparse
from pathlib import Path
from tcga_downloader.download import run_gdc_download
from tcga_downloader.manifest import ManifestRecord, write_manifest
from tcga_downloader.query import query_files
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="tcga-downloader")
sub = parser.add_subparsers(dest="command", required=True)
q = sub.add_parser("query")
q.add_argument("--project", required=True)
q.add_argument("--data-type", required=True)
q.add_argument("--out", required=True)
q.add_argument("--format", choices=["tsv", "json"], default="tsv")
d = sub.add_parser("download")
d.add_argument("--manifest", required=True)
d.add_argument("--out-dir", required=True)
d.add_argument("--processes", type=int, default=4)
d.add_argument("--retries", type=int, default=3)
r = sub.add_parser("run")
r.add_argument("--project", required=True)
r.add_argument("--data-type", required=True)
r.add_argument("--out", required=True)
r.add_argument("--format", choices=["tsv", "json"], default="tsv")
r.add_argument("--out-dir", required=True)
r.add_argument("--processes", type=int, default=4)
r.add_argument("--retries", type=int, default=3)
return parser
def _records_from_hits(hits: list[dict]) -> list[ManifestRecord]:
records = []
for h in hits:
records.append(
ManifestRecord(
file_id=h["file_id"],
file_name=h["file_name"],
data_type=h["data_type"],
data_format=h["data_format"],
size=int(h["file_size"]),
md5=h["md5sum"],
)
)
return records
def main() -> None:
parser = build_parser()
args = parser.parse_args()
if args.command in {"query", "run"}:
hits = query_files(args.project, args.data_type)
records = _records_from_hits(hits)
write_manifest(records, Path(args.out), fmt=args.format)
if args.command in {"download", "run"}:
manifest_path = Path(args.manifest if args.command == "download" else args.out)
run_gdc_download(manifest_path, Path(args.out_dir), args.processes, args.retries)