tcga-downloader/tcga_downloader/query.py
2026-01-16 14:48:12 +08:00

43 lines
1.2 KiB
Python

from __future__ import annotations
import requests
GDC_FILES_URL = "https://api.gdc.cancer.gov/files"
def build_filters(project: str, data_type: str) -> dict:
return {
"op": "and",
"content": [
{
"op": "in",
"content": {
"field": "cases.project.project_id",
"value": [project],
},
},
{
"op": "in",
"content": {
"field": "data_type",
"value": [data_type],
},
},
],
}
def query_files(project: str, data_type: str, fields: list[str] | None = None, size: int = 1000) -> list[dict]:
if fields is None:
fields = ["file_id", "file_name", "data_type", "data_format", "file_size", "md5sum"]
payload = {
"filters": build_filters(project, data_type),
"fields": ",".join(fields),
"format": "JSON",
"size": size,
}
resp = requests.post(GDC_FILES_URL, json=payload, timeout=30)
resp.raise_for_status()
data = resp.json()
return data.get("data", {}).get("hits", [])