49 lines
1.1 KiB
Python
Executable File
49 lines
1.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Example: Query multiple TCGA projects at once.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
from tcga_downloader.manifest import (
|
|
ManifestRecord,
|
|
format_manifest_stats,
|
|
get_manifest_stats,
|
|
write_manifest,
|
|
)
|
|
from tcga_downloader.query import query_multiple_projects
|
|
|
|
|
|
def main():
|
|
projects = ["TCGA-BRCA", "TCGA-LUAD", "TCGA-COAD"]
|
|
data_type = "Gene Expression"
|
|
manifest_path = Path("multi_project_manifest.tsv")
|
|
|
|
print(f"Querying {len(projects)} projects: {', '.join(projects)}...")
|
|
print(f"Data type: {data_type}")
|
|
|
|
hits = query_multiple_projects(projects, data_type)
|
|
|
|
records = [
|
|
ManifestRecord(
|
|
file_id=h["file_id"],
|
|
file_name=h["file_name"],
|
|
data_type=h["data_type"],
|
|
data_format=h["data_format"],
|
|
size=int(h["file_size"]),
|
|
md5=h["md5sum"],
|
|
)
|
|
for h in hits
|
|
]
|
|
|
|
write_manifest(records, manifest_path, fmt="tsv")
|
|
|
|
stats = get_manifest_stats(records)
|
|
print("\nManifest Statistics:")
|
|
print(format_manifest_stats(stats))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|