In [1]:
import pandas as pd
import arrow
import cufflinks as cf
from scipy.optimize import curve_fit
import numpy as np
import plotly.graph_objs as go
import requests

cf.go_offline()
pd.set_option('display.max_columns', None)  
pd.set_option('display.max_rows', None)  
In [2]:
data = requests.get('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv').text
date = arrow.now().format('MM-DD')
filename = f"coronavirus-{date}.csv"
open(filename, 'w').write(data)
Out[2]:
78271
In [3]:
data = pd.read_csv(filename)
In [4]:
data = data.iloc[:, :-1]  # last day information is incomplete
last = data.keys()[-1]
In [5]:
# group countries together
data = (data
        .drop(['Lat', 'Long'], axis=1)
        .groupby('Country/Region')
        .sum()
        .sort_values(last, ascending=False))
In [6]:
# only get countries with more than 100 transmissions
data = data[data[last] > 100]
In [7]:
def exp(x, a, b, c):
    return a * np.exp(b * x) + c
In [8]:
for index, row in data.iterrows():
    diff_row = row.diff()
    diff_row[0] = row[0]
    x = np.arange(1, len(diff_row) + 1)

    first_nonzero = np.argmax(np.array(diff_row) > 2)
    x = x[first_nonzero:]
    diff_row = diff_row[first_nonzero:]
    
    try:
        popt, _ = curve_fit(exp, x, np.array(diff_row), p0=(1, 0.1, 0), maxfev=10000)
    except:
        print(f'Unable to fit curve for {index}')
        continue

    predicted = exp(x, *popt)
    
    r = np.corrcoef(predicted, np.array(diff_row))
    r_2 = (r * r)[0][1]
    
    future_x = np.arange(first_nonzero + 1, first_nonzero + len(diff_row) + 28)
    future_predict = exp(future_x, *popt)
    
    layout = go.Layout(
        yaxis=dict(
            range=[np.min(diff_row) - 30, np.max(diff_row) * 1.5]
        ),
        title=f"{index} growth R^2 = {r_2:.3f} days since {diff_row.keys()[0]}",
        hovermode="x"
    )

    plot = pd.DataFrame(
        {'actual': pd.Series(np.array(diff_row)), 
         'predicted':  pd.Series(future_predict)}) \
        .iplot(
             layout=layout)
Unable to fit curve for Italy
/usr/local/lib/python3.7/site-packages/scipy/optimize/minpack.py:787: OptimizeWarning:

Covariance of the parameters could not be estimated

Unable to fit curve for New Zealand
Unable to fit curve for Senegal
In [ ]: