137 lines
3.7 KiB
Python
137 lines
3.7 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
from tcga_downloader.logger import get_logger
|
|
|
|
logger = get_logger("config")
|
|
|
|
|
|
@dataclass
|
|
class QueryConfig:
|
|
project: str
|
|
data_type: str
|
|
sample_type: str | None = None
|
|
platform: str | None = None
|
|
max_files: int = 1000
|
|
|
|
|
|
@dataclass
|
|
class DownloadConfig:
|
|
out_dir: str
|
|
processes: int = 4
|
|
retries: int = 3
|
|
token: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class Config:
|
|
query: QueryConfig
|
|
download: DownloadConfig
|
|
log_file: str | None = None
|
|
verbose: bool = False
|
|
|
|
|
|
def load_config(path: Path) -> Config:
|
|
"""Load configuration from JSON or YAML file."""
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"Config file not found: {path}")
|
|
|
|
suffix = path.suffix.lower()
|
|
|
|
if suffix == ".json":
|
|
with path.open() as f:
|
|
data = json.load(f)
|
|
elif suffix in {".yaml", ".yml"}:
|
|
try:
|
|
import yaml
|
|
|
|
with path.open() as f:
|
|
data = yaml.safe_load(f)
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"PyYAML is required for YAML config files. Install with: pip install pyyaml"
|
|
) from e
|
|
else:
|
|
raise ValueError(f"Unsupported config format: {suffix}")
|
|
|
|
return _parse_config(data)
|
|
|
|
|
|
def _parse_config(data: dict) -> Config:
|
|
query_data = data.get("query", {})
|
|
download_data = data.get("download", {})
|
|
|
|
query = QueryConfig(
|
|
project=query_data.get("project"),
|
|
data_type=query_data.get("data_type"),
|
|
sample_type=query_data.get("sample_type"),
|
|
platform=query_data.get("platform"),
|
|
max_files=query_data.get("max_files", 1000),
|
|
)
|
|
|
|
download = DownloadConfig(
|
|
out_dir=download_data.get("out_dir", "./data"),
|
|
processes=download_data.get("processes", 4),
|
|
retries=download_data.get("retries", 3),
|
|
token=download_data.get("token"),
|
|
)
|
|
|
|
return Config(
|
|
query=query,
|
|
download=download,
|
|
log_file=data.get("log_file"),
|
|
verbose=data.get("verbose", False),
|
|
)
|
|
|
|
|
|
def save_default_config(path: Path) -> None:
|
|
"""Save a default configuration template to file."""
|
|
config = Config(
|
|
query=QueryConfig(project="TCGA-BRCA", data_type="Gene Expression"),
|
|
download=DownloadConfig(out_dir="./data"),
|
|
)
|
|
|
|
suffix = path.suffix.lower()
|
|
|
|
if suffix == ".json":
|
|
with path.open("w") as f:
|
|
json.dump(_config_to_dict(config), f, indent=2)
|
|
elif suffix in {".yaml", ".yml"}:
|
|
try:
|
|
import yaml
|
|
|
|
with path.open("w") as f:
|
|
yaml.dump(_config_to_dict(config), f, default_flow_style=False)
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"PyYAML is required for YAML config files. Install with: pip install pyyaml"
|
|
) from e
|
|
else:
|
|
raise ValueError(f"Unsupported config format: {suffix}")
|
|
|
|
logger.info("Default config saved to %s", path)
|
|
|
|
|
|
def _config_to_dict(config: Config) -> dict:
|
|
"""Convert Config dataclass to dictionary."""
|
|
return {
|
|
"query": {
|
|
"project": config.query.project,
|
|
"data_type": config.query.data_type,
|
|
"sample_type": config.query.sample_type,
|
|
"platform": config.query.platform,
|
|
"max_files": config.query.max_files,
|
|
},
|
|
"download": {
|
|
"out_dir": config.download.out_dir,
|
|
"processes": config.download.processes,
|
|
"retries": config.download.retries,
|
|
"token": config.download.token,
|
|
},
|
|
"log_file": config.log_file,
|
|
"verbose": config.verbose,
|
|
}
|