Source code for lightcurvelynx.utils.data_download

"""Basic functions for downloading data using pooch."""

import logging
from pathlib import Path

import pooch

from lightcurvelynx.utils.io_utils import SquashLogging

[docs] logger = logging.getLogger(__name__)
[docs] def download_data_file_if_needed( data_path, data_url, force_download=False, silent=False, ): """Download a data file from a URL and save it to a specified path. Parameters ---------- data_path : str or Path The path to the data file. This is where the downloaded file will be written. data_url : str The URL to download the data file. force_download : bool, optional If True, the file will be downloaded even if it already exists. Default is False. silent : bool, optional If True, suppress print statements from the download process. Default is False. Returns ------- bool True if the download was successful, False otherwise. """ # Start by checking if the file already exists and if we are not forcing a download. data_path = Path(data_path) if not force_download and data_path.exists(): logger.info(f"Data file {data_path} already exists. Skipping download.") return True # Check that there is a valid URL for the download. if data_url is None or len(data_url) == 0: raise ValueError("No URL given for table download.") if not silent: print(f"Downloading data file from {data_url} to {data_path}") # Create the directory in which to save the file if it does not already exist. data_path.parent.mkdir(parents=True, exist_ok=True) # Use pooch to download the data files and extract them to the data directory. # We use SquashLogging to suppress the INFO output from pooch, which is not # often useful to the user. with SquashLogging(logger=pooch.utils.get_logger(), level=logging.WARNING): full_path = pooch.retrieve( url=data_url, known_hash=None, fname=data_path.name, path=data_path.parent, ) if full_path is None or not Path(full_path).exists(): logger.error(f"Data file not downloaded from {data_url}.") return False return True