Covid-19 Analysis using python language.
I will do some analysis on the Death rate of the pandemic Covid-19 using python.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
worldometer_df = pd.read_csv('worldometer_snapshots_April18_to_May18.csv')
worldometer_df = pd.read_csv('worldometers_snapshots_April18_to_September20.csv')
worldometer_df = pd.read_csv('worldometers_snapshots_April18_to_August1.csv')
worldometer_df = pd.read_csv('worldometers_snapshots_April18_to_July30.csv')
worldometer_df = pd.read_csv('worldometers_snapshots_April18_to_July3.csv')
worldometer_df = pd.read_csv('worldometers_snapshots_October11_to_October12.csv')
worldometer_df
country_name = 'Bangladesh'
country_df = worldometer_df.loc[worldometer_df['Country'] == country_name, :].reset_index(drop=True)
country_df
selected_date = datetime.strptime('10/11/2020', '%d/%m/%Y')
selected_date_df = worldometer_df.loc[worldometer_df['Date'] == selected_date.strftime('%Y-%m-%d'), :].reset_index(drop=True)
selected_date_df
last_date = datetime.strptime('12/10/2020', '%d/%m/%Y')
last_date_df = worldometer_df.loc[worldometer_df['Date'] == last_date.strftime('%Y-%m-%d'), :].reset_index(drop=True)
last_date_df
last_date_df['Case Fatality Ratio'] = last_date_df['Total Deaths'] / last_date_df['Total Cases']
plt.figure(figsize=(12,8))
plt.hist(100 * np.array(last_date_df['Case Fatality Ratio']), bins=np.arange(35))
plt.xlabel('Death Rate (%)', fontsize=16)
plt.ylabel('Number of Countries', fontsize=16)
plt.title('Histogram of Death Rates for various Countries', fontsize=18)
plt.show()
min_number_of_cases = 1000
greatly_affected_df = last_date_df.loc[last_date_df['Total Cases'] > min_number_of_cases,:]
plt.figure(figsize=(12,8))
plt.hist(100 * np.array(greatly_affected_df['Case Fatality Ratio']), bins=np.arange(35))
plt.xlabel('Death Rate (%)', fontsize=16)
plt.ylabel('Number of Countries', fontsize=16)
plt.title('Histogram of Death Rates for various Countries', fontsize=18)
plt.show()
last_date_df['Num Tests per Positive Case'] = last_date_df['Total Tests'] / last_date_df['Total Cases']
min_number_of_cases = 1000
greatly_affected_df = last_date_df.loc[last_date_df['Total Cases'] > min_number_of_cases,:]
x_axis_limit = 80
death_rate_percent = 100 * np.array(greatly_affected_df['Case Fatality Ratio'])
num_test_per_positive = np.array(greatly_affected_df['Num Tests per Positive Case'])
num_test_per_positive[num_test_per_positive > x_axis_limit] = x_axis_limit
total_num_deaths = np.array(greatly_affected_df['Total Deaths'])
population = np.array(greatly_affected_df['Population'])
plt.figure(figsize=(16,12))
plt.scatter(x=num_test_per_positive, y=death_rate_percent,
s=0.5*np.power(np.log(1+population),2),
c=np.log10(1+total_num_deaths))
plt.colorbar()
plt.ylabel('Death Rate (%)', fontsize=16)
plt.xlabel('Number of Tests per Positive Case', fontsize=16)
plt.title('Death Rate as function of Testing Quality', fontsize=18)
plt.xlim(-1, x_axis_limit + 12)
plt.ylim(-0.2,17)
# plot on top of the figure the names of the
#countries_to_display = greatly_affected_df['Country'].unique().tolist()
countries_to_display = ['USA', 'Russia', 'Spain', 'Bangladesh', 'Brazil', 'UK', 'Italy', 'France',
'Germany', 'India', 'Canada', 'Belgium', 'Mexico', 'Netherlands',
'Sweden', 'Portugal', 'UAE', 'Poland', 'Indonesia', 'Romania',
'Israel','Thailand','Kyrgyzstan','El Salvador', 'S. Korea',
'Denmark', 'Serbia', 'Norway', 'Algeria', 'Bahrain','Slovenia',
'Greece','Cuba','Hong Kong','Lithuania', 'Australia', 'Morocco',
'Malaysia', 'Nigeria', 'Moldova', 'Ghana', 'Armenia', 'Bolivia',
'Iraq', 'Hungary', 'Cameroon', 'Azerbaijan']
for country_name in countries_to_display:
country_index = greatly_affected_df.index[greatly_affected_df['Country'] == country_name]
plt.text(x=num_test_per_positive[country_index] + 0.5,
y=death_rate_percent[country_index] + 0.2,
s=country_name, fontsize=10)
plt.show()
good_testing_threshold = 300
good_testing_df = greatly_affected_df.loc[greatly_affected_df['Num Tests per Positive Case'] > good_testing_threshold,:]
good_testing_df
estimated_death_rate_percent = 100 * good_testing_df['Total Deaths'].sum() / good_testing_df['Total Cases'].sum()
print('Death Rate only for "good testing countries" is %.2f%s' %(estimated_death_rate_percent,'%'))