67 lines
2.2 KiB
Python
67 lines
2.2 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
from tcga_downloader.download import run_gdc_download
|
|
from tcga_downloader.manifest import ManifestRecord, write_manifest
|
|
from tcga_downloader.query import query_files
|
|
|
|
|
|
def build_parser() -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser(prog="tcga-downloader")
|
|
sub = parser.add_subparsers(dest="command", required=True)
|
|
|
|
q = sub.add_parser("query")
|
|
q.add_argument("--project", required=True)
|
|
q.add_argument("--data-type", required=True)
|
|
q.add_argument("--out", required=True)
|
|
q.add_argument("--format", choices=["tsv", "json"], default="tsv")
|
|
|
|
d = sub.add_parser("download")
|
|
d.add_argument("--manifest", required=True)
|
|
d.add_argument("--out-dir", required=True)
|
|
d.add_argument("--processes", type=int, default=4)
|
|
d.add_argument("--retries", type=int, default=3)
|
|
|
|
r = sub.add_parser("run")
|
|
r.add_argument("--project", required=True)
|
|
r.add_argument("--data-type", required=True)
|
|
r.add_argument("--out", required=True)
|
|
r.add_argument("--format", choices=["tsv", "json"], default="tsv")
|
|
r.add_argument("--out-dir", required=True)
|
|
r.add_argument("--processes", type=int, default=4)
|
|
r.add_argument("--retries", type=int, default=3)
|
|
|
|
return parser
|
|
|
|
|
|
def _records_from_hits(hits: list[dict]) -> list[ManifestRecord]:
|
|
records = []
|
|
for h in hits:
|
|
records.append(
|
|
ManifestRecord(
|
|
file_id=h["file_id"],
|
|
file_name=h["file_name"],
|
|
data_type=h["data_type"],
|
|
data_format=h["data_format"],
|
|
size=int(h["file_size"]),
|
|
md5=h["md5sum"],
|
|
)
|
|
)
|
|
return records
|
|
|
|
|
|
def main() -> None:
|
|
parser = build_parser()
|
|
args = parser.parse_args()
|
|
|
|
if args.command in {"query", "run"}:
|
|
hits = query_files(args.project, args.data_type)
|
|
records = _records_from_hits(hits)
|
|
write_manifest(records, Path(args.out), fmt=args.format)
|
|
|
|
if args.command in {"download", "run"}:
|
|
manifest_path = Path(args.manifest if args.command == "download" else args.out)
|
|
run_gdc_download(manifest_path, Path(args.out_dir), args.processes, args.retries)
|