Source code for task_geo.data_sources.covid.cds.cds_formatter
import pandas as pd
[docs]def cds_formatter(df):
"""Formats data from CoronaDataScraper.
Arguments:
df(pandas.DataFrame): Raw data returned from cds_connector.
Returns:
pandas.DataFrame
"""
assert df[pd.isnull(df.country)].empty
del df['population']
df = df.rename(columns={
'growthFactor': 'growth_factor',
'state': 'region',
'county': 'sub_region'
})
df = df.reindex(columns=[
'country', 'region', 'sub_region', 'city', 'lat', 'long',
'date', 'url', 'aggregate', 'tz', 'cases', 'deaths', 'recovered',
'active', 'tested', 'growth_factor'
])
df['date'] = pd.to_datetime(df.date)
metrics = ['cases', 'deaths', 'recovered', 'active', 'tested']
df[metrics] = df[metrics].fillna(0).astype(int)
df.loc[(df["aggregate"] == "state") & (df["region"].isnull()), "aggregate"] = "country"
return df.sort_values(by='date').reset_index(drop=True)