tcga-downloader/tcga_downloader/config.py
yunpeng.zhang a01a59b371
Some checks failed
CI / Lint (push) Failing after 9m32s
CI / Test (3.11) (push) Successful in 6m41s
CI / Test (3.12) (push) Successful in 4m21s
feat: add interactive cli
2026-02-09 13:13:39 +08:00

137 lines
3.7 KiB
Python

from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
from tcga_downloader.logger import get_logger
logger = get_logger("config")
@dataclass
class QueryConfig:
project: str
data_type: str
sample_type: str | None = None
platform: str | None = None
max_files: int = 1000
@dataclass
class DownloadConfig:
out_dir: str
processes: int = 4
retries: int = 3
token: str | None = None
@dataclass
class Config:
query: QueryConfig
download: DownloadConfig
log_file: str | None = None
verbose: bool = False
def load_config(path: Path) -> Config:
"""Load configuration from JSON or YAML file."""
if not path.exists():
raise FileNotFoundError(f"Config file not found: {path}")
suffix = path.suffix.lower()
if suffix == ".json":
with path.open() as f:
data = json.load(f)
elif suffix in {".yaml", ".yml"}:
try:
import yaml
with path.open() as f:
data = yaml.safe_load(f)
except ImportError as e:
raise ImportError(
"PyYAML is required for YAML config files. Install with: pip install pyyaml"
) from e
else:
raise ValueError(f"Unsupported config format: {suffix}")
return _parse_config(data)
def _parse_config(data: dict) -> Config:
query_data = data.get("query", {})
download_data = data.get("download", {})
query = QueryConfig(
project=query_data.get("project"),
data_type=query_data.get("data_type"),
sample_type=query_data.get("sample_type"),
platform=query_data.get("platform"),
max_files=query_data.get("max_files", 1000),
)
download = DownloadConfig(
out_dir=download_data.get("out_dir", "./data"),
processes=download_data.get("processes", 4),
retries=download_data.get("retries", 3),
token=download_data.get("token"),
)
return Config(
query=query,
download=download,
log_file=data.get("log_file"),
verbose=data.get("verbose", False),
)
def save_default_config(path: Path) -> None:
"""Save a default configuration template to file."""
config = Config(
query=QueryConfig(project="TCGA-BRCA", data_type="Gene Expression"),
download=DownloadConfig(out_dir="./data"),
)
suffix = path.suffix.lower()
if suffix == ".json":
with path.open("w") as f:
json.dump(_config_to_dict(config), f, indent=2)
elif suffix in {".yaml", ".yml"}:
try:
import yaml
with path.open("w") as f:
yaml.dump(_config_to_dict(config), f, default_flow_style=False)
except ImportError as e:
raise ImportError(
"PyYAML is required for YAML config files. Install with: pip install pyyaml"
) from e
else:
raise ValueError(f"Unsupported config format: {suffix}")
logger.info("Default config saved to %s", path)
def _config_to_dict(config: Config) -> dict:
"""Convert Config dataclass to dictionary."""
return {
"query": {
"project": config.query.project,
"data_type": config.query.data_type,
"sample_type": config.query.sample_type,
"platform": config.query.platform,
"max_files": config.query.max_files,
},
"download": {
"out_dir": config.download.out_dir,
"processes": config.download.processes,
"retries": config.download.retries,
"token": config.download.token,
},
"log_file": config.log_file,
"verbose": config.verbose,
}