Source code for task_geo.data_sources.covid.spain.es_covid_formatter
import pandas as pd
[docs]def es_covid_formatter(df):
"""Formats data retrieved from https://covid19.isciii.es
Arguments:
raw(pandas.DataFrame):
Returns:
pandas.DataFrame
"""
df.columns = df.columns.str.lower()
df.rename(columns={'ccaa': 'autonomous_community_iso', 'fecha': 'date',
'casos': 'cases', 'hospitalizados': 'hospitalized', 'uci': 'icu',
'fallecidos': 'deceased', 'recuperados': 'recovered'}, inplace=True)
# ### Replace NaN with 0
df['cases'].fillna(value=0, inplace=True)
df['hospitalized'].fillna(value=0, inplace=True)
df['icu'].fillna(value=0, inplace=True)
df['deceased'].fillna(value=0, inplace=True)
df['recovered'].fillna(value=0, inplace=True)
df['date'] = pd.to_datetime(df['date'])
# Undo cumulative sums
# create a copy of the dataframe, without date
unrolled_df = df.copy()
unrolled_df.drop(['date'], axis=1, inplace=True)
# unroll (i.e. undo the cumulative values)
unrolled_df = unrolled_df.groupby('autonomous_community_iso').diff().fillna(unrolled_df)
# add back autonomous_community_iso, date columns
unrolled_df = pd.concat([df[['autonomous_community_iso', 'date']], unrolled_df], axis=1)
# Insert Country
unrolled_df.insert(0, 'country', 'Spain')
# Remove rows that are not a region. This is significant because the last row includes
# some text
unrolled_df = unrolled_df[unrolled_df['autonomous_community_iso'].isin(
["CE", "AR", "CM", "PV", "MC", "AS", "AN", "CL", "CT", "MD", "IB", "GA", "CN", "VC", "RI",
"NC", "EX", "ME", "CB"])]
# Add Area, Population, Density, GDP
unrolled_df['area_km_squared'] = unrolled_df['autonomous_community_iso'].map({"CE": 18.5,
"AR": 47719,
"CM": 79463,
"PV": 7234,
"MC": 11313,
"AS": 10604,
"AN": 87268,
"CL": 94223,
"CT": 32114,
"MD": 8028,
"IB": 4992,
"GA": 29574,
"CN": 7447,
"VC": 23255,
"RI": 5045,
"NC": 10391,
"EX": 41634,
"ME": 12.3,
"CB": 5321
})
unrolled_df['population'] = unrolled_df['autonomous_community_iso'].map({"CE": 84777,
"AR": 1319291,
"CM": 2032863,
"PV": 2207776,
"MC": 1493898,
"AS": 1022800,
"AN": 8414240,
"CL": 2399548,
"CT": 7675217,
"MD": 6663394,
"IB": 1149460,
"GA": 2699499,
"CN": 2153389,
"VC": 5003769,
"RI": 316798,
"NC": 654214,
"EX": 1067710,
"ME": 86487,
"CB": 581078
})
unrolled_df['density_pop_per_km_squared'] = unrolled_df['population'] / unrolled_df[
'area_km_squared']
unrolled_df['gdp_per_capita_euros'] = unrolled_df['autonomous_community_iso'].map({"CE": 19335,
"AR": 25540,
"CM": 17698,
"PV": 30829,
"MC": 18520,
"AS": 21035,
"AN": 16960,
"CL": 22289,
"CT": 27248,
"MD": 29385,
"IB": 24393,
"GA": 20723,
"CN": 19568,
"VC": 19964,
"RI": 25508,
"NC": 29071,
"EX": 15394,
"ME": 16981,
"CB": 22341
})
# Reorder Columns
return unrolled_df[
['country', 'autonomous_community_iso', 'area_km_squared', 'population',
'gdp_per_capita_euros', 'density_pop_per_km_squared', 'date', 'cases', 'hospitalized',
'icu', 'deceased', 'recovered']]