from __future__ import annotations from typing import TYPE_CHECKING import questionary import requests if TYPE_CHECKING: pass from tcga_downloader.logger import get_logger logger = get_logger("interactive") GDC_PROJECTS_URL = "https://api.gdc.cancer.gov/projects" GDC_FILES_URL = "https://api.gdc.cancer.gov/files" def fetch_projects(size: int = 1000) -> list[dict]: """ Fetch all available TCGA projects from GDC API. Args: size: Number of projects to return Returns: List of project dictionaries containing project_id, name, etc. """ logger.info("Fetching TCGA projects...") params = {"size": size} try: resp = requests.get(GDC_PROJECTS_URL, params=params, timeout=30) resp.raise_for_status() data = resp.json() projects = data.get("data", {}).get("hits", []) logger.info("Fetched %d projects", len(projects)) return projects except requests.exceptions.RequestException as e: logger.error("Failed to fetch TCGA projects: %s", e) raise def fetch_data_types(project_id: str, size: int = 1000) -> list[str]: """ Fetch available data types for a specific project using GDC Files API facets. Args: project_id: TCGA project ID (e.g., TCGA-BRCA) size: Maximum number of data types to return Returns: List of unique data type names """ logger.info("Fetching data types for project %s...", project_id) payload = { "filters": { "op": "and", "content": [ { "op": "in", "content": {"field": "cases.project.project_id", "value": [project_id]}, } ], }, "size": 0, "facets": "data_type", "format": "JSON", } try: resp = requests.post(GDC_FILES_URL, json=payload, timeout=30) resp.raise_for_status() data = resp.json() aggregations = data.get("data", {}).get("aggregations", {}) data_types = aggregations.get("data_type", {}).get("buckets", []) unique_types = [bucket.get("key") for bucket in data_types if bucket.get("key")] logger.info("Found %d data types", len(unique_types)) return unique_types[:size] except requests.exceptions.RequestException as e: logger.error("Failed to fetch data types for project %s: %s", project_id, e) raise def select_project(projects: list[dict]) -> dict: """ Interactive selection of project from list. Args: projects: List of available projects Returns: Selected project dictionary """ choices = [f"{p['project_id']} - {p.get('name', 'Unknown')}" for p in projects] answer = questionary.select( message="Select a TCGA project:", choices=choices, ).ask() if answer is None: logger.info("No project selected") raise SystemExit(0) selected = None for p in projects: if answer.startswith(p["project_id"]): selected = p break if not selected: logger.warning("Could not parse selection: %s", answer) raise SystemExit(1) return selected def select_data_type(data_types: list[str]) -> str: """ Interactive selection of data type from list. Args: data_types: List of available data types Returns: Selected data type """ if not data_types: logger.warning("No data types available") raise SystemExit(1) answer = questionary.select( message="Select a data type:", choices=data_types, ).ask() if answer is None: logger.info("No data type selected") raise SystemExit(0) return answer def interactive_select() -> tuple[str, str]: """ Run interactive selection workflow: project -> data type. Returns: Tuple of (project_id, data_type) """ print("\n" + "=" * 60) print("TCGA Downloader - Interactive Mode") print("=" * 60 + "\n") projects = fetch_projects() selected_project = select_project(projects) project_id = selected_project["project_id"] data_types = fetch_data_types(project_id) selected_data_type = select_data_type(data_types) print(f"\nSelected project: {selected_project.get('name', project_id)}") print(f"Selected data type: {selected_data_type}\n") return project_id, selected_data_type