Downloading

Usage

The easiest way to download data is using the procedure download, which requires a list of tuples (product group, product). The examples below show how to download individual products, since only one tuple is given in each example. An example of a list for downloading more than one product in one command would be sarp_list = [(‘arc2’, ‘tif’), (‘rfe2’, ‘africa_tif’)].

The possible values of product group and product are defined in the configuration file (see Configuration). They can be also accessed using:

from warsa.precipitation.satellite.products import get_satellite_precipitation_groups
spm = get_satellite_precipitation_groups()
print [(g, p) for g, p in spm.group_product_names()]

Output:

[('arc2', 'bin'), ('arc2', 'tif'),
('chirps20', 'global_daily_05_tif'), ('chirps20', 'global_daily_25_tif'),
('cmorph', 'v0x_025deg_3hly'), ('cmorph', 'v0x_025deg_daily'), ('cmorph', 'v0x_8km_30min'),
('cmorph', 'v1x_025deg_3hly'), ('cmorph', 'v1x_025deg_daily'), ('cmorph', 'v1x_8km_30min'),
('gpmimerg', '3b_hhr_early'), ('gpmimerg', '3b_hhr_late'), ('gpmimerg', '3b_hhr_v03'),
('gpmimerg', '3b_hhr_v04'), ('gpmimerg', '3b_hhr_v05'), ('gpmimerg', '3b_mo_v03'),
('gpmimerg', '3b_mo_v04'), ('gpmimerg', '3b_mo_v05'), ('gpmimerg', 'gis_3b_daily_v03'),
('gpmimerg', 'gis_3b_daily_v04'), ('gpmimerg', 'gis_3b_daily_v05'),
('gpmimerg', 'gis_3b_hhr_v03'), ('gpmimerg', 'gis_3b_hhr_v04'),
('gpmimerg', 'gis_3b_hhr_v05'), ('gpmimerg', 'gis_3b_mo_v03'),
('gpmimerg', 'gis_3b_mo_v04'), ('gpmimerg', 'gis_3b_mo_v05'),
('rfe2', 'africa_bin'), ('rfe2', 'africa_tif'), ('rfe2', 'asia_bin'),
('trmmnascom', '3b42_v7x_3h_hd5'), ('trmmnascom', '3b42_v7x_3h_hd5z'),
('trmmnascom', '3b42_v7x_daily_bin'), ('trmmnascom', '3b42_v7x_daily_nc4'),
('trmmnascom', '3b42rt_v7x_3h_bin'), ('trmmnascom', '3b42rt_v7x_3h_nc4'),
('trmmopen', '3b40rt_v7x_3h'), ('trmmopen', '3b41rt_v7x_3h'), ('trmmopen', '3b42_v7x_3h'),
('trmmopen', '3b42_v7x_3h_gis'), ('trmmopen', '3b42rt_v7x_3h'),
('trmmopen', '3b42rt_v7x_3h_gis')]

Examples

from warsa.precipitation.satellite.products import download
sarp_list = [('arc2', 'tif')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('arc2', 'bin')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('rfe2', 'africa_tif')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('rfe2', 'africa_bin')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('rfe2', 'asia_bin')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('chirps20', 'global_daily_05_tif')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('chirps20', 'global_daily_25_tif')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v0x_8km_30min')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v0x_025deg_3hly')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v0x_025deg_daily')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v1x_8km_30min')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v1x_025deg_3hly')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v1x_025deg_daily')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_early')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_late')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_mo_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_mo_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_mo_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_daily_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_daily_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_daily_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_hhr_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_hhr_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_hhr_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_mo_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_mo_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_mo_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b40rt_v7x_3h')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b41rt_v7x_3h')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b42_v7x_3h')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b42_v7x_3h_gis')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b42rt_v7x_3h')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b42rt_v7x_3h_gis')]
download(sarp_list=sarp_list)

General download procedure

The general procedure to download data is:

prod = <<ProductDownloadClass>>(local_directory)
or
prod = <<ProductDownloadClass>>(local_directory, user, password)

where:
    <<ProductDownloadClass>> is a download class
    local_directory:
        directory on a local hard-disk. If not given, the directory will be retrieved
        from the configuration file (see :ref:`wconfiguration`)
    user and password:
        Only required for some products. The user has to register at the provider's site
        in order to obtain her/his own user/password

prod.download( update=False, verbose=True, begin=None, end=None)

where:
    update:
        True: only files starting after the last downloaded file will be downloaded
        False: all missing files (gaps) will be also downloaded. Files already downloaded will
               not be downloaded
    update:
        True: print messages
    begin:
        starting date and time
    end:
        end date and time

The ARC2 example below shows how to use the download class.

from datetime import datetime
from warsa.precipitation.satellite.arc2.download import ARC2AfricaTifFTP
arc2 = ARC2AfricaTifFTP('D:/tmp/arc2')
arc2.download(update=False, verbose=True, begin=datetime(2017, 1, 1), end=datetime(2017, 1, 6))

Output:

Downloading from ftp://ftp.cpc.ncep.noaa.gov to D:/tmp/arc2 (2017-11-19 12:03)
africa_arc.20170101.tif.zip; OK; 1.80 seconds
africa_arc.20170102.tif.zip; OK; 1.78 seconds
africa_arc.20170103.tif.zip; OK; 2.12 seconds
africa_arc.20170104.tif.zip; OK; 2.04 seconds
africa_arc.20170105.tif.zip; OK; 2.29 seconds
africa_arc.20170106.tif.zip; OK; 2.04 seconds
Downloading from ftp://ftp.cpc.ncep.noaa.gov to D:/tmp/arc2 finished in 0.3 minutes (2017-11-19 12:04).

Background

Data is downloaded from the provider’s server and saved locally, mirroring the directory structure as found on the provider’s server.

Certain products contain besides precipitation files also other files like e.g., README.txt. Only precipitation files are downloaded according to prefixes and suffixes pre-established in the code.

The download procedure is either called by the user prior to his/her precipitation analysis of a server downloads periodically data, kepping the local data up-to-date. The latter is used to operationalize flood forecast, for dam operation, or for irrigation management. The download frequency depends on the temporal resolution of product. The download procedure scans the local directories searching for the last downloaded file, which is overwritten before new files are downloaded. This avoids that a possibly corrupted file is saved during the previous download procedure, in case this was abruptally interrupted.

It may occur that a data gap from the past was filled later by the provider. Setting update=True enforces that later added files are also downloaded. Note that files already downloaded will never be overwritten. Nevertheless, setting update=True will increase the download time. Update should be set to False when data is downloaded automatically at a relative high frequency for operational purposes. Sporadically download with update=True should be called.

New products

All implemented products are derived from the class SatellitePrecipitationFTP, which has the following parameters:

Class constructor

The constructor of the class FTPDownload required the following parameters: local_dir, prefix, suffix, dir_lens, ftp_host, ftp_dir, ftp_user=None, ftp_password=None, ftp_timeout=600, and product_subfolder=’‘. With exception of the GPM products, which require registration, all parameters but local_dir are defined in the sub-class constructor, like for example:

class ARC2BinFTP(SatellitePrecipitationFTP):
    def __init__(self, local_dir):
        super(ARC2BinFTP, self).__init__(local_dir, 'daily_clim.bin.', '.gz', None,
                                              'ftp.cpc.ncep.noaa.gov', '/fews/fewsdata/africa/arc2/bin/')

class CMorphV0x025degDailyFTP(CMorphFTP):
    def __init__(self, local_folder):
        super(CMorphV0x025degDailyFTP, self).__init__(local_folder, 'CMORPH_V0.x_RAW_0.25deg-DLY_00Z_', ['.bz2','gz'],
                                                     [4,6], '/precip/CMORPH_V0.x/RAW/0.25deg-DLY_00Z/')

class GPMImerg3BHHRFTP(SatellitePrecipitationFTP):
    def __init__(self, local_dir, product_subfolder, ftp_user, ftp_password):
        super(GPMImerg3BHHRFTP, self).__init__(local_dir, '', '.HDF5', [4, 2, 2], 'arthurhou.pps.eosdis.nasa.gov',
                                               '/gpmdata', ftp_user, ftp_password, product_subfolder=product_subfolder)

class ARC2BinFTP defines that only files starting with the prefix ‘daily_clim.bin.’ and ending with the suffix ‘.gz’ will be downloaded. This product has len_dirs=None. class CMorphV0x025degDailyFTP(CMorphFTP) defines that files starting with ‘CMORPH_V0.x_RAW_0.25deg-DLY_00Z_’ and ending with ‘.bz2’ or ‘gz’ will be downloaded. The folder structure is YYYY/YYYYMM. Note that the classes ARC2BinFTP and CMorphV0x025degDailyFTP have only local_folder as parameter, while GPMImerg3BHHRFTP has four parameters: local_dir, product_subfolder, ftp_user, ftp_password.

Class methods

Two methods must also be defined in classes inheriting from SatellitePrecipitationFTP:

  • get_datetime_from_file_name, used to extract the date and time from the file name
  • get_full_dir_name, used to retrieve the full path for a given date and time

Examples for get_datetime_from_file_name:

def get_datetime_from_file_name(self, filename):
    # filename = africa_arc.19830101.tif.zip
    return datetime.datetime.strptime(filename.split('.')[-3], '%Y%m%d')

def get_datetime_from_file_name(self, filename):
    # filename = B-HHR.MS.MRG.3IMERG.20141107-S000000-E002959.0000.V03D.HDF5
    s = os.path.basename(filename).split('.')[4].split('-')
    return datetime.datetime.strptime(''.join([s[0], s[1][1:]]), '%Y%m%d%H%M%S')

Examples for get_full_dir_name:

def get_full_dir_name(self, dt):
    # self.dir_lens = None
    return self.local_dir

def get_full_dir_name(self, dt):
    # self.dir_lens = [4]
    return self.local_dir + '/' + str(dt.year)

def get_full_dir_name(self, dt):
    # self.dir_lens = [6]
    return '/'.join([self.local_dir, str(dt.year) + str(dt.month).zfill(2)])

def get_full_dir_name(self, dt):
    # self.dir_lens = [4, 2]
    return '/'.join([self.local_dir, str(dt.year), str(dt.month).zfill(2)])

def get_full_dir_name(self, dt):
    # self.dir_lens = [4, 3]
    return '/'.join([self.local_dir, str(dt.timetuple().tm_yday).zfill(3)])

def get_datetime_from_file_name(self, filename):
    # self.dir_lens = [4, 3]
    return '/'.join([self.local_dir, str(dt.timetuple().tm_yday).zfill(3)])

def get_full_dir_name(self, dt):
    # self.dir_lens [4, 2, 2] in case product_subfolder is defined
    return '/'.join([self.local_dir, str(dt.year), str(dt.month).zfill(2), str(dt.day).zfill(2), self.product_subfolder])