Source code for task_geo.dataset_builders.nasa.nasa_connector

import itertools

import pandas as pd
import requests

from task_geo.dataset_builders.nasa.references import PARAMETERS


[docs]def nasa_data_loc(lat, lon, str_start_date, str_end_date, parms_str): """ Extract data for a single location. Parameters ---------- lat : string lon : string str_start_date : string str_end_date : string parms_str : string Returns ------- df : pandas.DataFrame """ base_url = "https://power.larc.nasa.gov/cgi-bin/v1/DataAccess.py" identifier = "identifier=SinglePoint" user_community = "userCommunity=SSE" temporal_average = "tempAverage=DAILY" output_format = "outputList=JSON,ASCII" user = "user=anonymous" url = ( f"{base_url}?request=execute&{identifier}&{parms_str}&" f"startDate={str_start_date}&endDate={str_end_date}&" f"lat={lat}&lon={lon}&{temporal_average}&{output_format}&" f"{user_community}&{user}" ) response = requests.get(url) data_json = response.json() df = pd.DataFrame(data_json['features'][0]['properties']['parameter']) df['lon'] = lon df['lat'] = lat return df
[docs]def nasa_connector(df_locations, start_date, end_date=None, parms=None): """Retrieve meteorologic data from NASA. Given a dataset with columns country, region, sub_region, lon, and lat, for each geographic coordinate (lon, lat) corresponding to a place (specified if country, region, or sub_region) extract the time series of the desired data at the location. Arguments: --------- df_locations(pandas.DataFrame): Dataset with columns lon, and lat start_date(datetime): Start date for the time series end_date(datetime): End date for the time series (optional) parms(list of strings): Desired data, accepted are 'temperature', 'humidity', and 'pressure' (optional) Return: ------ pandas.DataFrame: Columns are country, region, sub_region (non-null), lon, lat, date, and the desired data. """ if parms is None: parms = list(PARAMETERS.keys()) df_locations = df_locations[ ~pd.isna(df_locations[['lon', 'lat']]).all(axis=1) ] location_data = ['country', 'region', 'sub_region', 'lon', 'lat'] locations = df_locations[location_data].drop_duplicates() str_start_date = str(start_date.date()).replace('-', '') if end_date is None: str_end_date = str(pd.Timestamp.today().date()).replace('-', '') else: str_end_date = str(end_date.date()).replace('-', '') all_parms = list(itertools.chain.from_iterable([PARAMETERS[p] for p in parms])) parms_str = f"parameters={','.join(all_parms)}" return pd.concat([ nasa_data_loc(row.lat, row.lon, str_start_date, str_end_date, parms_str) for row in locations.itertuples() ])