Source code for task_geo.data_sources.noaa.noaa_api_connector

"""Connector to the NOAA API.


Contributors:

The journal article describing GHCN-Daily is:
Menne, M.J., I. Durre, R.S. Vose, B.E. Gleason, and T.G. Houston, 2012:  An overview
of the Global Historical Climatology Network-Daily Database.  Journal of Atmospheric
and Oceanic Technology, 29, 897-910, doi:10.1175/JTECH-D-11-00103.1.

To acknowledge the specific version of the dataset used, please cite:
Menne, M.J., I. Durre, B. Korzeniewski, S. McNeal, K. Thomas, X. Yin, S. Anthony, R. Ray,
R.S. Vose, B.E.Gleason, and T.G. Houston, 2012: Global Historical Climatology Network -
Daily (GHCN-Daily), Version 3.26
NOAA National Climatic Data Center. http://doi.org/10.7289/V5D21VHZ [2020/03/30].
"""

import logging
import os
from datetime import datetime

import pandas as pd
import requests

from task_geo.data_sources.noaa.ftp_connector import download_noaa_files
from task_geo.data_sources.noaa.references import (
    COUNTRY_AND_TERRITORY_CODES, DATA_DIRECTORY, TERRITORY_ACTIVE_STATIONS_MAP, load_dataset)

logging.basicConfig(level=logging.DEBUG)
logging.getLogger("urllib3").setLevel(logging.WARNING)


DEFAULT_METRICS = ['TMAX', 'TMIN', 'TAVG', 'PCRP', 'SNOW', 'SNWD']


[docs]def get_stations_by_country(country): """Get all stations for a given country code. Arguments: country(str) Returns: list[str] """ territory_codes = COUNTRY_AND_TERRITORY_CODES.get(country) if territory_codes is None: raise ValueError('Wrong country code %s', country) stations = list() for code in territory_codes: code_stations = TERRITORY_ACTIVE_STATIONS_MAP.get(code) if code_stations is not None: stations.extend(code_stations) return stations
[docs]def get_request_urls(country, start_date, end_date=None, metrics=None): """Encodes the parameters the URL to make a GET request Arguments: country(str): FIPS Country code start_date(datetime) end_date(datetime): Defaults to today metrics(list[str]): Optional.List of metrics to retrieve,valid values are: TMIN: Minimum temperature. TMAX: Maximum temperature. TAVG: Average of temperature. SNOW: Snowfall (mm). SNWD: Snow depth (mm). PRCP: Precipitation. Returns: str """ base_url = 'https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries' max_stations_req = 50 if metrics is None: metrics = DEFAULT_METRICS request_common_args = ( f'&format=json' f'&units=metric' f'&dataTypes={",".join(metrics)}' ) if end_date is None: end_date = datetime.now() start = start_date.date().isoformat() end = end_date.date().isoformat() stations_list = get_stations_by_country(country) if len(stations_list) < max_stations_req: stations = ','.join(stations_list) return [ f'{base_url}&stations={stations}&startDate={start}&endDate={end}{request_common_args}'] else: chunked_station_list = [ stations_list[i:i + max_stations_req] for i in range(0, len(stations_list), max_stations_req) ] return [ ( f'{base_url}&stations={",".join(chunk)}&startDate={start}' f'&endDate={end}{request_common_args}' ) for chunk in chunked_station_list ]
[docs]def get_parse_response(urls): """Calls the urls in urls, return responses and errors Arguments: urls(list[str]): Urls as generated by `get_request_urls`. Returns: tuple[list[dict], list[Exception]]: The first element of the tuple is a list of dictionary with all the responses. The second element is a list with all the exceptions raised during the calls. """ results = list() errors = list() total = len(urls) - 1 for i, url in enumerate(urls): logging.debug('Making request %s / %s', i + 1, total + 1) response = requests.get(url) try: response.raise_for_status() except requests.exceptions.HTTPError: errors.append({ 'url': url, 'error': response.json(), }) continue results.extend(response.json()) return results, errors
[docs]def noaa_api_connector(countries, start_date, end_date=None, metrics=None): """Get data from NOAA API. Arguments: countries(list[str]): List of FIPS country codes to retrieve. start_date(datetime) end_date(datetime) metrics(list[str]): Optional.List of metrics to retrieve,valid values are: TMIN: Minimum temperature. TMAX: Maximum temperature. TAVG: Average of temperature. SNOW: Snowfall (mm). SNWD: Snow depth (mm). Returns: tuple[list[dict], list[Exception]] """ if not os.path.isfile(f'{DATA_DIRECTORY}/stations_metadata.txt'): download_noaa_files(large_files=False) result = list() for country in countries: logging.info('Requesting data for %s', country) urls = get_request_urls(country, start_date, end_date, metrics) country_results, errors = get_parse_response(urls) if errors: logging.info('The following errors where found during the operation:') for error in errors: logging.info(error) result.extend(country_results) data = pd.DataFrame(result) stations = load_dataset('stations') data = data.merge(stations, how='left', left_on='STATION', right_on='ID') del data['ID'] del data['STATE'] columns = [ 'DATE', 'STATION', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'NAME', 'GSN FLAG', 'HCN/CRN FLAG', 'WMO ID' ] if metrics is None: metrics = DEFAULT_METRICS columns.extend([metric for metric in metrics if metric in data.columns]) return data[columns]