Source code for task_geo.data_sources.noaa.noaa_api_formatter

import numpy as np
import pandas as pd

from task_geo.common.country_codes import fips_to_name
from task_geo.data_sources.noaa.noaa_api_connector import DEFAULT_METRICS


[docs]def noaa_api_formatter(raw, metrics=None, country_aggr=False): """Format the output of the NOAA API to the task-geo Data Model. Arguments: raw(pandas.DataFrame):Data to be formatted. metrics(list[str]): Optional.List of metrics requested,valid metric values are: TMIN: Minimum temperature. TMAX: Maximum temperature. TAVG: Average of temperature. SNOW: Snowfall (mm). SNWD: Snow depth (mm). PRCP: Precipitation country_aggr(bool): When True, only an aggregate for each date/country will be returned. Returns: pandas.DataFrame """ if metrics is None: metrics = [metric.lower() for metric in DEFAULT_METRICS if metric in raw.columns] data = raw.copy() data.columns = [column.lower() for column in data.columns] column_order = [ 'latitude', 'longitude', 'elevation', 'country', 'name', 'date', 'station'] column_order.extend(metrics) data.date = pd.to_datetime(data.date) for column in ['tmax', 'tavg', 'tmin']: if column in data.columns: data[column] = data[column].astype(float) if 'snwd' in data.columns: data['snwd'] = data['snwd'].astype(float) / 1000 data.snwd.fillna(0, inplace=True) if 'prcp' in data.columns: data['prcp'] = data['prcp'].astype(float) / 1000 data.prcp.fillna(0, inplace=True) data['country'] = data.station.str.slice(0, 2).apply(fips_to_name) data = data[column_order] if country_aggr: aggregations = {} if 'tmin' in metrics: aggregations['tmin'] = np.min if 'tmax' in metrics: aggregations['tmax'] = np.max agg_columns = list(aggregations.keys()) return data.groupby(['country', 'date'])[agg_columns].aggregate(aggregations).reset_index() return data