tcga-downloader/examples/basic_query_download.py
yunpeng.zhang a01a59b371
Some checks failed
CI / Lint (push) Failing after 9m32s
CI / Test (3.11) (push) Successful in 6m41s
CI / Test (3.12) (push) Successful in 4m21s
feat: add interactive cli
2026-02-09 13:13:39 +08:00

49 lines
1.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Basic example: Query TCGA data and download files.
"""
from pathlib import Path
from tcga_downloader.download import run_gdc_download
from tcga_downloader.manifest import ManifestRecord, write_manifest
from tcga_downloader.query import query_files
def main():
query_project = "TCGA-BRCA"
data_type = "Gene Expression"
manifest_path = Path("manifest.tsv")
data_dir = Path("data")
print(f"Querying {query_project} for {data_type}...")
hits = query_files(query_project, data_type)
print(f"Found {len(hits)} files")
print(f"Writing manifest to {manifest_path}...")
records = [
ManifestRecord(
file_id=h["file_id"],
file_name=h["file_name"],
data_type=h["data_type"],
data_format=h["data_format"],
size=int(h["file_size"]),
md5=h["md5sum"],
)
for h in hits
]
write_manifest(records, manifest_path, fmt="tsv")
print(f"Downloading files to {data_dir}...")
data_dir.mkdir(exist_ok=True)
run_gdc_download(manifest_path, data_dir, processes=4, retries=3)
print("Download complete!")
if __name__ == "__main__":
main()