49 lines
1.2 KiB
Python
Executable File
49 lines
1.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Basic example: Query TCGA data and download files.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
from tcga_downloader.download import run_gdc_download
|
|
from tcga_downloader.manifest import ManifestRecord, write_manifest
|
|
from tcga_downloader.query import query_files
|
|
|
|
|
|
def main():
|
|
query_project = "TCGA-BRCA"
|
|
data_type = "Gene Expression"
|
|
manifest_path = Path("manifest.tsv")
|
|
data_dir = Path("data")
|
|
|
|
print(f"Querying {query_project} for {data_type}...")
|
|
hits = query_files(query_project, data_type)
|
|
|
|
print(f"Found {len(hits)} files")
|
|
print(f"Writing manifest to {manifest_path}...")
|
|
|
|
records = [
|
|
ManifestRecord(
|
|
file_id=h["file_id"],
|
|
file_name=h["file_name"],
|
|
data_type=h["data_type"],
|
|
data_format=h["data_format"],
|
|
size=int(h["file_size"]),
|
|
md5=h["md5sum"],
|
|
)
|
|
for h in hits
|
|
]
|
|
|
|
write_manifest(records, manifest_path, fmt="tsv")
|
|
|
|
print(f"Downloading files to {data_dir}...")
|
|
data_dir.mkdir(exist_ok=True)
|
|
run_gdc_download(manifest_path, data_dir, processes=4, retries=3)
|
|
|
|
print("Download complete!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|