63 lines
1.7 KiB
Python
Executable File
63 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Example: Validate manifest and downloaded files.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
from tcga_downloader.manifest import (
|
|
load_manifest,
|
|
validate_files_against_manifest,
|
|
validate_manifest,
|
|
)
|
|
|
|
|
|
def main():
|
|
manifest_path = Path("manifest.tsv")
|
|
data_dir = Path("data")
|
|
|
|
print("Loading manifest...")
|
|
records = load_manifest(manifest_path)
|
|
|
|
print(f"Loaded {len(records)} records from manifest")
|
|
|
|
print("\nValidating manifest structure...")
|
|
errors = validate_manifest(records)
|
|
|
|
if errors:
|
|
print("Manifest validation errors found:")
|
|
for error in errors:
|
|
print(f" - {error}")
|
|
else:
|
|
print("Manifest structure is valid!")
|
|
|
|
if data_dir.exists():
|
|
print("\nValidating downloaded files against manifest...")
|
|
missing, checksum_errors = validate_files_against_manifest(records, data_dir)
|
|
|
|
if missing:
|
|
print(f"\nMissing files ({len(missing)}):")
|
|
for filename in missing[:10]:
|
|
print(f" - {filename}")
|
|
if len(missing) > 10:
|
|
print(f" ... and {len(missing) - 10} more")
|
|
|
|
if checksum_errors:
|
|
print(f"\nChecksum errors ({len(checksum_errors)}):")
|
|
for error in checksum_errors[:10]:
|
|
print(f" - {error}")
|
|
if len(checksum_errors) > 10:
|
|
print(f" ... and {len(checksum_errors) - 10} more")
|
|
|
|
if not missing and not checksum_errors:
|
|
print("\nAll files are present and checksums match!")
|
|
else:
|
|
print("\nValidation failed. Please re-download missing files.")
|
|
else:
|
|
print(f"\nData directory {data_dir} does not exist. Skipping file validation.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|