from __future__ import annotations import json from dataclasses import dataclass from pathlib import Path from tcga_downloader.logger import get_logger logger = get_logger("config") @dataclass class QueryConfig: project: str data_type: str sample_type: str | None = None platform: str | None = None max_files: int = 1000 @dataclass class DownloadConfig: out_dir: str processes: int = 4 retries: int = 3 token: str | None = None @dataclass class Config: query: QueryConfig download: DownloadConfig log_file: str | None = None verbose: bool = False def load_config(path: Path) -> Config: """Load configuration from JSON or YAML file.""" if not path.exists(): raise FileNotFoundError(f"Config file not found: {path}") suffix = path.suffix.lower() if suffix == ".json": with path.open() as f: data = json.load(f) elif suffix in {".yaml", ".yml"}: try: import yaml with path.open() as f: data = yaml.safe_load(f) except ImportError as e: raise ImportError( "PyYAML is required for YAML config files. Install with: pip install pyyaml" ) from e else: raise ValueError(f"Unsupported config format: {suffix}") return _parse_config(data) def _parse_config(data: dict) -> Config: query_data = data.get("query", {}) download_data = data.get("download", {}) query = QueryConfig( project=query_data.get("project"), data_type=query_data.get("data_type"), sample_type=query_data.get("sample_type"), platform=query_data.get("platform"), max_files=query_data.get("max_files", 1000), ) download = DownloadConfig( out_dir=download_data.get("out_dir", "./data"), processes=download_data.get("processes", 4), retries=download_data.get("retries", 3), token=download_data.get("token"), ) return Config( query=query, download=download, log_file=data.get("log_file"), verbose=data.get("verbose", False), ) def save_default_config(path: Path) -> None: """Save a default configuration template to file.""" config = Config( query=QueryConfig(project="TCGA-BRCA", data_type="Gene Expression"), download=DownloadConfig(out_dir="./data"), ) suffix = path.suffix.lower() if suffix == ".json": with path.open("w") as f: json.dump(_config_to_dict(config), f, indent=2) elif suffix in {".yaml", ".yml"}: try: import yaml with path.open("w") as f: yaml.dump(_config_to_dict(config), f, default_flow_style=False) except ImportError as e: raise ImportError( "PyYAML is required for YAML config files. Install with: pip install pyyaml" ) from e else: raise ValueError(f"Unsupported config format: {suffix}") logger.info("Default config saved to %s", path) def _config_to_dict(config: Config) -> dict: """Convert Config dataclass to dictionary.""" return { "query": { "project": config.query.project, "data_type": config.query.data_type, "sample_type": config.query.sample_type, "platform": config.query.platform, "max_files": config.query.max_files, }, "download": { "out_dir": config.download.out_dir, "processes": config.download.processes, "retries": config.download.retries, "token": config.download.token, }, "log_file": config.log_file, "verbose": config.verbose, }