import pandas as pd
import arrow
import cufflinks as cf
from scipy.optimize import curve_fit
import numpy as np
import plotly.graph_objs as go
import requests
cf.go_offline()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
data = requests.get('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv').text
date = arrow.now().format('MM-DD')
filename = f"coronavirus-{date}.csv"
open(filename, 'w').write(data)
data = pd.read_csv(filename)
data = data.iloc[:, :-1] # last day information is incomplete
last = data.keys()[-1]
# group countries together
data = (data
.drop(['Lat', 'Long'], axis=1)
.groupby('Country/Region')
.sum()
.sort_values(last, ascending=False))
# only get countries with more than 100 transmissions
data = data[data[last] > 100]
def exp(x, a, b, c):
return a * np.exp(b * x) + c
for index, row in data.iterrows():
diff_row = row.diff()
diff_row[0] = row[0]
x = np.arange(1, len(diff_row) + 1)
first_nonzero = np.argmax(np.array(diff_row) > 2)
x = x[first_nonzero:]
diff_row = diff_row[first_nonzero:]
try:
popt, _ = curve_fit(exp, x, np.array(diff_row), p0=(1, 0.1, 0), maxfev=10000)
except:
print(f'Unable to fit curve for {index}')
continue
predicted = exp(x, *popt)
r = np.corrcoef(predicted, np.array(diff_row))
r_2 = (r * r)[0][1]
future_x = np.arange(first_nonzero + 1, first_nonzero + len(diff_row) + 28)
future_predict = exp(future_x, *popt)
layout = go.Layout(
yaxis=dict(
range=[np.min(diff_row) - 30, np.max(diff_row) * 1.5]
),
title=f"{index} growth R^2 = {r_2:.3f} days since {diff_row.keys()[0]}",
hovermode="x"
)
plot = pd.DataFrame(
{'actual': pd.Series(np.array(diff_row)),
'predicted': pd.Series(future_predict)}) \
.iplot(
layout=layout)