feat: add GDC query via REST
This commit is contained in:
parent
e6b2a174c5
commit
7e8497decb
42
tcga_downloader/query.py
Normal file
42
tcga_downloader/query.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
GDC_FILES_URL = "https://api.gdc.cancer.gov/files"
|
||||||
|
|
||||||
|
|
||||||
|
def build_filters(project: str, data_type: str) -> dict:
|
||||||
|
return {
|
||||||
|
"op": "and",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"op": "in",
|
||||||
|
"content": {
|
||||||
|
"field": "cases.project.project_id",
|
||||||
|
"value": [project],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "in",
|
||||||
|
"content": {
|
||||||
|
"field": "data_type",
|
||||||
|
"value": [data_type],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def query_files(project: str, data_type: str, fields: list[str] | None = None, size: int = 1000) -> list[dict]:
|
||||||
|
if fields is None:
|
||||||
|
fields = ["file_id", "file_name", "data_type", "data_format", "file_size", "md5sum"]
|
||||||
|
payload = {
|
||||||
|
"filters": build_filters(project, data_type),
|
||||||
|
"fields": ",".join(fields),
|
||||||
|
"format": "JSON",
|
||||||
|
"size": size,
|
||||||
|
}
|
||||||
|
resp = requests.post(GDC_FILES_URL, json=payload, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
return data.get("data", {}).get("hits", [])
|
||||||
7
tests/test_query.py
Normal file
7
tests/test_query.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from tcga_downloader.query import build_filters
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_filters_project_and_type():
|
||||||
|
filters = build_filters(project="TCGA-BRCA", data_type="Gene Expression")
|
||||||
|
assert filters["op"] == "and"
|
||||||
|
assert filters["content"][0]["content"]["field"] == "cases.project.project_id"
|
||||||
Loading…
Reference in New Issue
Block a user