Downloading¶
Usage¶
The easiest way to download data is using the procedure download, which requires a list of tuples (product group, product). The examples below show how to download individual products, since only one tuple is given in each example. An example of a list for downloading more than one product in one command would be sarp_list = [(‘arc2’, ‘tif’), (‘rfe2’, ‘africa_tif’)].
The possible values of product group and product are defined in the configuration file (see Configuration). They can be also accessed using:
from warsa.precipitation.satellite.products import get_satellite_precipitation_groups
spm = get_satellite_precipitation_groups()
print [(g, p) for g, p in spm.group_product_names()]
Output:
[('arc2', 'bin'), ('arc2', 'tif'),
('chirps20', 'global_daily_05_tif'), ('chirps20', 'global_daily_25_tif'),
('cmorph', 'v0x_025deg_3hly'), ('cmorph', 'v0x_025deg_daily'), ('cmorph', 'v0x_8km_30min'),
('cmorph', 'v1x_025deg_3hly'), ('cmorph', 'v1x_025deg_daily'), ('cmorph', 'v1x_8km_30min'),
('gpmimerg', '3b_hhr_early'), ('gpmimerg', '3b_hhr_late'), ('gpmimerg', '3b_hhr_v03'),
('gpmimerg', '3b_hhr_v04'), ('gpmimerg', '3b_hhr_v05'), ('gpmimerg', '3b_mo_v03'),
('gpmimerg', '3b_mo_v04'), ('gpmimerg', '3b_mo_v05'), ('gpmimerg', 'gis_3b_daily_v03'),
('gpmimerg', 'gis_3b_daily_v04'), ('gpmimerg', 'gis_3b_daily_v05'),
('gpmimerg', 'gis_3b_hhr_v03'), ('gpmimerg', 'gis_3b_hhr_v04'),
('gpmimerg', 'gis_3b_hhr_v05'), ('gpmimerg', 'gis_3b_mo_v03'),
('gpmimerg', 'gis_3b_mo_v04'), ('gpmimerg', 'gis_3b_mo_v05'),
('rfe2', 'africa_bin'), ('rfe2', 'africa_tif'), ('rfe2', 'asia_bin'),
('trmmnascom', '3b42_v7x_3h_hd5'), ('trmmnascom', '3b42_v7x_3h_hd5z'),
('trmmnascom', '3b42_v7x_daily_bin'), ('trmmnascom', '3b42_v7x_daily_nc4'),
('trmmnascom', '3b42rt_v7x_3h_bin'), ('trmmnascom', '3b42rt_v7x_3h_nc4'),
('trmmopen', '3b40rt_v7x_3h'), ('trmmopen', '3b41rt_v7x_3h'), ('trmmopen', '3b42_v7x_3h'),
('trmmopen', '3b42_v7x_3h_gis'), ('trmmopen', '3b42rt_v7x_3h'),
('trmmopen', '3b42rt_v7x_3h_gis')]
Examples¶
from warsa.precipitation.satellite.products import download
sarp_list = [('arc2', 'tif')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('arc2', 'bin')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('rfe2', 'africa_tif')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('rfe2', 'africa_bin')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('rfe2', 'asia_bin')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('chirps20', 'global_daily_05_tif')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('chirps20', 'global_daily_25_tif')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v0x_8km_30min')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v0x_025deg_3hly')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v0x_025deg_daily')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v1x_8km_30min')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v1x_025deg_3hly')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('cmorph', 'v1x_025deg_daily')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_early')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_late')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_hhr_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_mo_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_mo_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', '3b_mo_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_daily_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_daily_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_daily_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_hhr_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_hhr_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_hhr_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_mo_v03')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_mo_v04')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('gpmimerg', 'gis_3b_mo_v05')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b40rt_v7x_3h')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b41rt_v7x_3h')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b42_v7x_3h')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b42_v7x_3h_gis')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b42rt_v7x_3h')]
download(sarp_list=sarp_list)
from warsa.precipitation.satellite.products import download
sarp_list = [('trmmopen', '3b42rt_v7x_3h_gis')]
download(sarp_list=sarp_list)
General download procedure¶
The general procedure to download data is:
prod = <<ProductDownloadClass>>(local_directory)
or
prod = <<ProductDownloadClass>>(local_directory, user, password)
where:
<<ProductDownloadClass>> is a download class
local_directory:
directory on a local hard-disk. If not given, the directory will be retrieved
from the configuration file (see :ref:`wconfiguration`)
user and password:
Only required for some products. The user has to register at the provider's site
in order to obtain her/his own user/password
prod.download( update=False, verbose=True, begin=None, end=None)
where:
update:
True: only files starting after the last downloaded file will be downloaded
False: all missing files (gaps) will be also downloaded. Files already downloaded will
not be downloaded
update:
True: print messages
begin:
starting date and time
end:
end date and time
The ARC2 example below shows how to use the download class.
from datetime import datetime
from warsa.precipitation.satellite.arc2.download import ARC2AfricaTifFTP
arc2 = ARC2AfricaTifFTP('D:/tmp/arc2')
arc2.download(update=False, verbose=True, begin=datetime(2017, 1, 1), end=datetime(2017, 1, 6))
Output:
Downloading from ftp://ftp.cpc.ncep.noaa.gov to D:/tmp/arc2 (2017-11-19 12:03)
africa_arc.20170101.tif.zip; OK; 1.80 seconds
africa_arc.20170102.tif.zip; OK; 1.78 seconds
africa_arc.20170103.tif.zip; OK; 2.12 seconds
africa_arc.20170104.tif.zip; OK; 2.04 seconds
africa_arc.20170105.tif.zip; OK; 2.29 seconds
africa_arc.20170106.tif.zip; OK; 2.04 seconds
Downloading from ftp://ftp.cpc.ncep.noaa.gov to D:/tmp/arc2 finished in 0.3 minutes (2017-11-19 12:04).
Background¶
Data is downloaded from the provider’s server and saved locally, mirroring the directory structure as found on the provider’s server.
Certain products contain besides precipitation files also other files like e.g., README.txt. Only precipitation files are downloaded according to prefixes and suffixes pre-established in the code.
The download procedure is either called by the user prior to his/her precipitation analysis of a server downloads periodically data, kepping the local data up-to-date. The latter is used to operationalize flood forecast, for dam operation, or for irrigation management. The download frequency depends on the temporal resolution of product. The download procedure scans the local directories searching for the last downloaded file, which is overwritten before new files are downloaded. This avoids that a possibly corrupted file is saved during the previous download procedure, in case this was abruptally interrupted.
It may occur that a data gap from the past was filled later by the provider. Setting update=True enforces that later added files are also downloaded. Note that files already downloaded will never be overwritten. Nevertheless, setting update=True will increase the download time. Update should be set to False when data is downloaded automatically at a relative high frequency for operational purposes. Sporadically download with update=True should be called.
New products¶
All implemented products are derived from the class SatellitePrecipitationFTP, which has the following parameters:
- local_dir: the local directory to mirror the provider’s product
- prefix: used to filter files from subdirectories
- suffix: used to filter files from subdirectories
- dir_lens: the lenght of the subdirectories under ftp_dir. For example:
- [4, 2] YYYY/MM.
- Example: ftp://disc2.nascom.nasa.gov/data/opendap/TRMM_L3/TRMM_3B42_Daily.7/1998/01/3B42_Daily.19980101.7.nc4
- [4, 3] YYYY/JJJ (day of year).
- Example: ftp://disc2.nascom.nasa.gov/data/opendap/TRMM_RT/TRMM_3B42RT.7/2000/060/3B42RT.2000030100.7R2.nc4
- [4, 6] YYYY/YYYYMM.
- Example: ftp://ftp.cpc.ncep.noaa.gov/precip/CMORPH_V0.x/RAW/8km-30min/2011/201108/CMORPH_V0.x_RAW_8km-30min_2011080100.gz
- [4, 2, 2] YYYY/MM/DD.
- Example: ftp://trmmopen.gsfc.nasa.gov/trmmdata/GIS/2014/01/01/3B42RT.2000030200.03hr.tif
- ftp_host and ftp_dir: the ftp-address and ftp-directory of the product. For a product file ftp://ftp.cpc.ncep.noaa.gov/fews/fewsdata/africa/arc2/bin/daily_clim.bin.19830101.gz: ftp_host=ftp.cpc.ncep.noaa.gov and ftp_dir=/fews/fewsdata/africa/arc2/bin
- ftp_user and ftp_password are optional. Many products do not require them.
- ftp_timeout: timeout in seconds for blocking operations like the connection attempt (the default is 600ms)
- product_subfolder: optional parameter used e.g., in GPM-imerg. Normally the product files are found direct under the folders date (combination of YYYY, MM, DD, and JJJ) as defined in dir_lens. In the example of a file GPM-imerg (ftp://arthurhou.pps.eosdis.nasa.gov/gpmdata/2014/11/07/imerg/3B-HHR.MS.MRG.3IMERG.20141107-S000000-E002959.0000.V03D.HDF5) the product_subfolder imerg follws year/month/day ([4, 2, 2]). Here product_subfolder=’imerg’
Class constructor¶
The constructor of the class FTPDownload required the following parameters: local_dir, prefix, suffix, dir_lens, ftp_host, ftp_dir, ftp_user=None, ftp_password=None, ftp_timeout=600, and product_subfolder=’‘. With exception of the GPM products, which require registration, all parameters but local_dir are defined in the sub-class constructor, like for example:
class ARC2BinFTP(SatellitePrecipitationFTP):
def __init__(self, local_dir):
super(ARC2BinFTP, self).__init__(local_dir, 'daily_clim.bin.', '.gz', None,
'ftp.cpc.ncep.noaa.gov', '/fews/fewsdata/africa/arc2/bin/')
class CMorphV0x025degDailyFTP(CMorphFTP):
def __init__(self, local_folder):
super(CMorphV0x025degDailyFTP, self).__init__(local_folder, 'CMORPH_V0.x_RAW_0.25deg-DLY_00Z_', ['.bz2','gz'],
[4,6], '/precip/CMORPH_V0.x/RAW/0.25deg-DLY_00Z/')
class GPMImerg3BHHRFTP(SatellitePrecipitationFTP):
def __init__(self, local_dir, product_subfolder, ftp_user, ftp_password):
super(GPMImerg3BHHRFTP, self).__init__(local_dir, '', '.HDF5', [4, 2, 2], 'arthurhou.pps.eosdis.nasa.gov',
'/gpmdata', ftp_user, ftp_password, product_subfolder=product_subfolder)
class ARC2BinFTP defines that only files starting with the prefix ‘daily_clim.bin.’ and ending with the suffix ‘.gz’ will be downloaded. This product has len_dirs=None. class CMorphV0x025degDailyFTP(CMorphFTP) defines that files starting with ‘CMORPH_V0.x_RAW_0.25deg-DLY_00Z_’ and ending with ‘.bz2’ or ‘gz’ will be downloaded. The folder structure is YYYY/YYYYMM. Note that the classes ARC2BinFTP and CMorphV0x025degDailyFTP have only local_folder as parameter, while GPMImerg3BHHRFTP has four parameters: local_dir, product_subfolder, ftp_user, ftp_password.
Class methods¶
Two methods must also be defined in classes inheriting from SatellitePrecipitationFTP:
- get_datetime_from_file_name, used to extract the date and time from the file name
- get_full_dir_name, used to retrieve the full path for a given date and time
Examples for get_datetime_from_file_name:
def get_datetime_from_file_name(self, filename):
# filename = africa_arc.19830101.tif.zip
return datetime.datetime.strptime(filename.split('.')[-3], '%Y%m%d')
def get_datetime_from_file_name(self, filename):
# filename = B-HHR.MS.MRG.3IMERG.20141107-S000000-E002959.0000.V03D.HDF5
s = os.path.basename(filename).split('.')[4].split('-')
return datetime.datetime.strptime(''.join([s[0], s[1][1:]]), '%Y%m%d%H%M%S')
Examples for get_full_dir_name:
def get_full_dir_name(self, dt):
# self.dir_lens = None
return self.local_dir
def get_full_dir_name(self, dt):
# self.dir_lens = [4]
return self.local_dir + '/' + str(dt.year)
def get_full_dir_name(self, dt):
# self.dir_lens = [6]
return '/'.join([self.local_dir, str(dt.year) + str(dt.month).zfill(2)])
def get_full_dir_name(self, dt):
# self.dir_lens = [4, 2]
return '/'.join([self.local_dir, str(dt.year), str(dt.month).zfill(2)])
def get_full_dir_name(self, dt):
# self.dir_lens = [4, 3]
return '/'.join([self.local_dir, str(dt.timetuple().tm_yday).zfill(3)])
def get_datetime_from_file_name(self, filename):
# self.dir_lens = [4, 3]
return '/'.join([self.local_dir, str(dt.timetuple().tm_yday).zfill(3)])
def get_full_dir_name(self, dt):
# self.dir_lens [4, 2, 2] in case product_subfolder is defined
return '/'.join([self.local_dir, str(dt.year), str(dt.month).zfill(2), str(dt.day).zfill(2), self.product_subfolder])