In [1]:
from datetime import datetime
import pandas as pd
import pandas as pd
import io
import requests
import cufflinks as cf
import plotly
import plotly.offline as py
import plotly.graph_objs as go
cf.go_offline() # required to use plotly offline (no account required).
py.init_notebook_mode() # graphs charts inline (IPython).
In [4]:
url="https://raw.githubusercontent.com/COVID19Tracking/covid-tracking-data/master/data/states_daily_4pm_et.csv"
data=pd.read_csv(url)
data['date2'] = data['date'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d'))
In [7]:
data.head()
Out[7]:
date state positive negative pending hospitalizedCurrently hospitalizedCumulative inIcuCurrently inIcuCumulative onVentilatorCurrently ... total totalTestResults posNeg fips deathIncrease hospitalizedIncrease negativeIncrease positiveIncrease totalTestResultsIncrease date2
0 20200429 AK 355.0 18764.0 NaN 14.0 NaN NaN NaN NaN ... 19119.0 19119.0 19119.0 2 0.0 0.0 2026.0 4.0 2030.0 2020-04-29
1 20200429 AL 6842.0 73607.0 NaN NaN 945.0 NaN 335.0 NaN ... 80449.0 80449.0 80449.0 1 3.0 34.0 4467.0 155.0 4622.0 2020-04-29
2 20200429 AR 3192.0 42257.0 NaN 93.0 389.0 NaN NaN 18.0 ... 45449.0 45449.0 45449.0 5 7.0 389.0 4697.0 81.0 4778.0 2020-04-29
3 20200429 AS 0.0 3.0 17.0 NaN NaN NaN NaN NaN ... 20.0 3.0 3.0 60 0.0 0.0 0.0 0.0 0.0 2020-04-29
4 20200429 AZ 7202.0 61611.0 NaN 756.0 1131.0 312.0 NaN 191.0 ... 68813.0 68813.0 68813.0 4 11.0 36.0 1121.0 254.0 1375.0 2020-04-29

5 rows × 26 columns

In [8]:
positive = data.groupby(['state', 'date2']).mean().reset_index().pivot(index='date2', columns='state', values='positive')
positive = positive.fillna(method='bfill')
death = data.groupby(['state', 'date2']).mean().reset_index().pivot(index='date2', columns='state', values='death')
death = death.fillna(method='bfill')
recovered = data.groupby(['state', 'date2']).mean().reset_index().pivot(index='date2', columns='state', values='recovered')
recovered = recovered.fillna(method='bfill')
new_positives = positive.diff()
In [9]:
# Let's look at new_positives/currently_infected
#This doesn't work because some states (e.g. CA) are not tracking recovered
normalized_contagiousness = new_positives.div(positive-recovered-death)
In [12]:
def plot(target, states, title):
    fig = go.Figure()
    for state in states:
        fig.add_trace(go.Scatter(x=target.index, y=target[state].tolist(), mode='lines', name=state, visible = "legendonly"))
    fig.update_layout(
        title=title,
    )
    fig.show()
In [15]:
#states = ['TN', 'FL', 'TX', 'CA', 'NY', 'AR', 'IA', 'NE', 'ND', 'OK', 'SC', 'SD', 'UT', 'WY', 'MI']
states = positive.columns
plot(positive, states, 'Cumulative Positive Cases')
plot(100*positive.pct_change(), states, 'Percent Daily Change in Positive Cases')
plot(positive.diff(), states, 'New Cases')
In [ ]: