Notebook sobre Vacunación contra el Covid19 en Costa Rica y el Mundo¶

elaborada por el Dr. Juan I. Barrios¶

Instituto Algoritmia, Barcelona. España 2021¶

!conda activate geo_env

## Aca se importan las librerias necesarias, la mas importante para esta prueba es PANDAS 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pycountry
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure, output_file, save
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer
from datetime import datetime
from bokeh.models import Slider, HoverTool
import pandas as pd
import geopandas as gpd
sns.set_style('darkgrid')
%matplotlib inline

##Con este comando leemos los datos de la web 
dataframe=pd.read_csv('https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-data.csv?raw=true', error_bad_lines=False)

# Listados los primeros registros del set de datos
dataframe.tail(5)

## Escogemos las variables especificas que necesitamos 
df=dataframe
dataframe_pais=dataframe[['location','continent','total_vaccinations','people_fully_vaccinated','population']]
dataframe_pais.tail(5)

##  aca agrupamos los registros de acuerdo a las variables location y continente, creamos un nuevo dataset y le decimos al nuevo data set (df_pais) las variables adicionales que vamos a utilizar
dataframe_pais=dataframe_pais.groupby(['location',])['continent', 'total_vaccinations','people_fully_vaccinated','population'].last().reset_index()

C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

## aca listamos ese nuevo dataset pero solo los primeros 5 registros (paises con la función (heading) 
dataframe_pais.head()

Creamos las nuevas variables del dataframe "_pais"¶

dataframe_pais['Cobertura']=(dataframe_pais['total_vaccinations']*0.5)/(dataframe_pais['population']*1.25)*100
dataframe_cobertura=dataframe_pais.sort_values('Cobertura',ascending=False)
df1=df[[ 'location', 'date','total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations',
       'population',]]
df1.dropna(subset=['new_vaccinations'],inplace=True)
cntry=[]
value=[]
for loc in df1.location.unique():
  cntry.append(loc)
  value.append(df1[df1['location']==loc]['new_vaccinations'].rolling(7, center=False).mean().mean())
df1=pd.DataFrame({'location':cntry,'Average_daily_doses':value})
df_full2=pd.merge(dataframe_cobertura,df1,on='location',how='outer')
df_full=pd.merge(dataframe_cobertura,df1,on='location',how='outer')
df_full['Cobertura']=((df_full['total_vaccinations']*0.5)/df_full['population']*1.25)*100
df_full['Days_70%_vaccination']=(((df_full['population']*0.7)-(df_full['total_vaccinations']*0.5))/(df_full['Average_daily_doses']*0.5))
df_full['Percent_Vaccinated']=((df_full['total_vaccinations']*0.5)/df_full['population'])*100
df_full.sort_values('Days_70%_vaccination',ascending=True,inplace=True)
df_full.dropna()
df_full.sort_values('Days_70%_vaccination') 
pd.reset_option('display.max_rows')
pd.set_option("display.max_rows",215)

C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

# Pais de comprobación
# df_full[df_full['location']=='Spain']

Imprimir el reporte general de países "_salida"¶

dataframe_salida = df_full
dataframe_salida.rename(columns = {'location':'PAISES', 'continent':'CONTINENTE',
                          'total_vaccinations':'TOTAL VACUNAS APLICADAS', 
                          'Percent_Vaccinated':'PORCENTAJE POBLACION VACUNADA', 
                          'Cobertura': 'POBLACION +DE 18a CUBIERTA'}, inplace = True)
dataframe_salida.dropna(subset = ["PORCENTAJE POBLACION VACUNADA"], inplace=True)
dataframe_salida=dataframe_salida[['PAISES','CONTINENTE','TOTAL VACUNAS APLICADAS','population', 'POBLACION +DE 18a CUBIERTA', 'PORCENTAJE POBLACION VACUNADA']]                          
dataframe_salida2=dataframe_salida.sort_values('PORCENTAJE POBLACION VACUNADA',ascending=False,inplace=True)
pd.options.display.float_format = '{:,.0f}'.format
dataframe_salida.to_html('../vacunas/salidas/listado_general.html')
dataframe_salida.rename(columns = {'location':'PAISES', 'continent':'CONTINENTE',
                          'TOTAL VACUNAS APLICADAS':'total_vaccinations', 
                          'PORCENTAJE POBLACION VACUNADA':'Percent_Vaccinated', 
                          'POBLACION +DE 18a CUBIERTA':'Cobertura'}, inplace = True)

C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
C:\Users\Tommy\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py:4446: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,

Acá calculamos el número de días para llegar al 70% con el "_dias"¶

dataframe_dias = df_full
#dataframe_dias.dropna(subset = ["Days_70%_vaccination"], inplace=True)
dataframe_dias.rename(columns = {'PAIS':'PAISES','POBLACION':'population','Days_70%_vaccination':'NUMERO DE DIAS 70%'}, inplace = True)
#                              'total_vaccinations':'TOTAL VACUNAS APLICADAS', 
#                              'Percent_Vaccinated':'PORCENTAJE DE LA POBLACION VACUNADA', 
#                              'Days_70%_vaccination':'NUMERO DE DIAS 70%'}, inplace = True)
dataframe_dias=dataframe_dias[['PAISES', 'CONTINENTE','population','NUMERO DE DIAS 70%']]                          
dataframe_dias2=dataframe_dias.sort_values('NUMERO DE DIAS 70%',ascending=False,inplace=True)
pd.options.display.float_format = '{:,.0f}'.format
dataframe_dias.to_html('../vacunas/salidas/listado_dias.html')
dataframe_dias.rename(columns = {'PAIS':'PAISES', 'POBLACION':'population'}, inplace = True)

C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
C:\Users\Tommy\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py:4446: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,

Inicia proceso análisis : Países mas avanzados en el proceso de vacunación¶

df_full.columns

Index(['PAISES', 'CONTINENTE', 'TOTAL VACUNAS APLICADAS',
       'people_fully_vaccinated', 'population', 'POBLACION +DE 18a CUBIERTA',
       'Average_daily_doses', 'NUMERO DE DIAS 70%',
       'PORCENTAJE POBLACION VACUNADA'],
      dtype='object')

dataframe_calculos=df_full
dataframe_calculos.dropna
dataframe_calculos_sort=dataframe_calculos.sort_values('POBLACION +DE 18a CUBIERTA',ascending=False)
plt.figure(figsize=(12,8))
plt.title('Los 10 países mas avanzados en el proceso de vacunación')
dataframe_calculos_sort.rename(columns = {'POBLACION +DE 18a CUBIERTA':'Porcentaje de población mayor de 18 años'}, inplace = True)
sns.barplot(x = "PAISES", y = "Porcentaje de población mayor de 18 años", data = dataframe_calculos_sort.head(10))
plt.xticks(rotation=70)
plt.savefig('../vacunas/salidas/los_mejores.jpg')
plt.show()
dataframe_calculos_sort.rename(columns = {'Porcentaje de población mayor de 18 años':'Cobertura'}, inplace = True)

Países mas rezagados con procesos de vacunación activos ¶

dataframe_nulos= dataframe_pais[dataframe_pais['Cobertura'].isnull()]
dataframe_nulos1=dataframe_nulos.head(30)
plt.figure(figsize=(12,8))
plt.title('Los 21 países sin procesos de vacunación')
dataframe_nulos1.rename(columns = {'Cobertura':'Porcentaje de población mayor de 18 años','location':'PAISES' }, inplace = True)
sns.barplot(x = 'PAISES', y = 'Porcentaje de población mayor de 18 años', data = dataframe_nulos1)
plt.xticks(rotation=70)
plt.savefig('../vacunas/salidas/sin_vacuna.jpg')
plt.show()
dataframe_nulos1.rename(columns = {'Porcentaje de población mayor de 18 años':'Cobertura','PAISES':'location'}, inplace = True)

dataframe_calculos.columns

Index(['PAISES', 'CONTINENTE', 'TOTAL VACUNAS APLICADAS',
       'people_fully_vaccinated', 'population', 'POBLACION +DE 18a CUBIERTA',
       'Average_daily_doses', 'NUMERO DE DIAS 70%',
       'PORCENTAJE POBLACION VACUNADA'],
      dtype='object')

dataframe_calculos.dropna
dataframe_calculos_sort=dataframe_calculos.sort_values('POBLACION +DE 18a CUBIERTA',ascending=True)
plt.figure(figsize=(12,8))
plt.title('Los 20 países mas rezagados en el proceso de vacunación  (de los que ya vacunan) ')
dataframe_calculos_sort.rename(columns = {'POBLACION +DE 18a CUBIERTA':'Porcentaje de población mayor de 18 años','location':'PAISES'}, inplace = True)
sns.barplot(x = "PAISES", y = "Porcentaje de población mayor de 18 años", data = dataframe_calculos_sort.head(20))
plt.xticks(rotation=70)
plt.savefig('../vacunas/salidas/los_rezagados.jpg')
plt.show()
dataframe_calculos_sort.rename(columns = {'Porcentaje de población mayor de 18 años':'Cobertura'}, inplace = True)

COBERTURA DE VACUNACIÓN POR CONTINENTE¶

Listando los datos de un contiente específico

dataframe_continente=dataframe_pais.groupby('continent')['total_vaccinations','population','Cobertura'].last().reset_index()
dataframe=dataframe.groupby('location')['total_cases','total_deaths','population'].last().reset_index()
dataframe_continente.sort_values('Cobertura',ascending=True)

C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  """Entry point for launching an IPython kernel.
C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

Cobertura de vacunación por Continentes ¶

dataframe_continente.rename(columns = {'Cobertura':'Porcentaje de población mayor de 18 años','continent':'CONTINENTES'}, inplace = True)
plt.figure(figsize=(12,8))
sns.barplot(x = "CONTINENTES", y = "Porcentaje de población mayor de 18 años", data = dataframe_continente.sort_values('Porcentaje de población mayor de 18 años',ascending=True))
plt.xticks(rotation=70)
plt.title('Cobertura de vacunación global por continente')
plt.savefig('../vacunas/salidas/cobertura_continente.jpg')
plt.show()
dataframe_continente.rename(columns = {'Porcentaje de población mayor de 18 años':'Cobertura','CONTINENTES':'continent'}, inplace = True)

Cobertura de vacunación en Norte y Centroamérica¶

Graficando países de Norte y Centro America con respecto a Cobertura¶

## creamos un nuevo dataset para ver los países con las coberturas mas altas en Norte America
dataframe_continente=dataframe_pais.groupby('continent')['total_vaccinations','population','Cobertura'].last().reset_index()
dataframe=dataframe.groupby('location')['total_cases','total_deaths','population'].last().reset_index()
dataframe_continente_america=dataframe_pais[dataframe_pais['continent']=='North America']
dataframe_continente_america2=dataframe_continente_america.sort_values('Cobertura',ascending=True,)

C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  
C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  This is separate from the ipykernel package so we can avoid doing imports until

# Construyendo el grafico
##Quitamos los valores Nan 
plt.figure(figsize=(12,8))
dataframe_continente_america2.dropna(inplace=True)
sns.barplot(x = "location", y = "Cobertura", data = dataframe_continente_america2.head(100))
plt.xticks(rotation=60)
plt.title('America del Norte y Centro America coberturas en vacunación')
plt.savefig('../vacunas/salidas/america.jpg')
plt.show()

Generando un mapa interactivo con BOKEH¶

df_full.rename(columns = {'PAISES':'location','CONTINENTE':'continent',
                          'TOTAL VACUNAS APLICADAS':'total_vaccinations', 
                          'PORCENTAJE DE LA POBLACION VACUNADA':'Percent_Vaccinated'}, inplace = True)
df_full.columns

Index(['location', 'continent', 'total_vaccinations',
       'people_fully_vaccinated', 'population', 'POBLACION +DE 18a CUBIERTA',
       'Average_daily_doses', 'NUMERO DE DIAS 70%',
       'PORCENTAJE POBLACION VACUNADA'],
      dtype='object')

import pandas as pd
import geopandas as gpd
shapefile = '../data/countries_110m/ne_110m_admin_0_countries.shp'

gdf = gpd.read_file(shapefile)[['ADMIN', 'ADM0_A3', 'geometry']]
gdf.columns = ['country', 'country_code', 'geometry']
gdf = gdf.drop(gdf.index[159])
gdf.head()

import pycountry

input_countries = df_full.location.values.tolist()

codes = []
for country in input_countries:
    try:
      codes.append(pycountry.countries.search_fuzzy(country)[0].alpha_3)
    except:
      codes.append('Unknown code')

df_full['country_code']=codes

df_full.columns

Index(['location', 'continent', 'total_vaccinations',
       'people_fully_vaccinated', 'population', 'POBLACION +DE 18a CUBIERTA',
       'Average_daily_doses', 'NUMERO DE DIAS 70%',
       'PORCENTAJE POBLACION VACUNADA', 'country_code'],
      dtype='object')

#Perform left merge to preserve every row in gdf.
df_full.rename(columns = {'PORCENTAJE POBLACION VACUNADA':'Percent_Vaccinated','POBLACION +DE 18a CUBIERTA':'Cobertura'}, inplace = True)
merged = gdf.merge(df_full[['country_code','Cobertura','Percent_Vaccinated']], left_on = 'country_code', right_on = 'country_code', how = 'left')

#Replace NaN values to string 'No data'.
merged.fillna('No data', inplace = True)

import json

#Read data to json
merged_json = json.loads(merged.to_json())

#Convert to str like object
json_data = json.dumps(merged_json)

df2=df[[ 'location', 'date','total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations',
       'population',]]
df2['date']=pd.to_datetime(df2['date'])

def mapper(month):
   return month.strftime('%Y-%m') 

df2['Month']=df2['date'].apply(mapper)

C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':

df2.groupby(['location','Month'])['total_vaccinations','people_fully_vaccinated','population'].last().reset_index()
df2['Cobertura']=(df2['people_fully_vaccinated']/df2['population'])*100
df2['Percent_Vaccinated']=((df2['total_vaccinations']*0.5)/df2['population']*1.25)*100

C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  """Entry point for launching an IPython kernel.
C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
C:\Users\Tommy\Anaconda3\envs\geo_env\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until

#Perform left merge to preserve every row in gdf.
merged = gdf.merge(df_full[['country_code','Cobertura','Percent_Vaccinated']], left_on = 'country_code', right_on = 'country_code', how = 'left')

#Replace NaN values to string 'No data'.
merged.fillna(0, inplace = True)

import json

#Read data to json
merged_json = json.loads(merged.to_json())

#Convert to str like object
json_data = json.dumps(merged_json)

#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)
#Define a sequential multi-hue color palette.
palette = brewer['YlGnBu'][8]
#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]
#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 20)
#Define custom tick labels for color bar.
tick_labels = {'0': '0%', '1': '1%', '5':'5%', '10':'10%', '20':'20%', '50':'50%'}
#Add hover tool
hover = HoverTool(tooltips = [ ('Pais/Region','@country'),('Cobertura población mayor de 18 años', '@Cobertura'),('Porcentaje población vacunada','@Percent_Vaccinated')])
#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)
#Create figure object.
p = figure(title = f'Cobertura de vacunación COVID por paises al {datetime.now().date()}', plot_height = 600 , plot_width = 950, toolbar_location = None,tools=[hover])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'Percent_Vaccinated', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)
#Specify figure layout.
p.add_layout(color_bar, 'below')

#Display figure inline in Jupyter Notebook.
output_notebook()

#Display figure.
show(p)
output_file("../vacunas/salidas/mapai.html")
save(p)

'D:\\Machine learning\\mapas\\vacunas\\salidas\\mapai.html'

#  Aplicando algoritmo Kmeans a nuestro dataset 
from sklearn.cluster import KMeans
dataframe_clusters = dataframe_pais
dataframe_clusters = df.reset_index()

inertias = [] 
K = range(1,10) 
dataframe_clusters.fillna(0,inplace=True)  
for k in K: 
    #Crear y ajustar el modelo 
    kmeanModel = KMeans(n_clusters=k).fit(dataframe_clusters.drop(['continent','location','iso_code','date','tests_units'],axis=1)) 
    inertias.append(kmeanModel.inertia_) 
    
plt.plot(K, inertias, 'bx-') 
plt.xlabel('VALORES DE K') 
plt.ylabel('Inertia') 
plt.title('EL METODO DEL CODO USANDO INERTIA' ) 
plt.show()

## El numero de clusters o grupos sera de 4 
kmeans = KMeans(n_clusters = 4, init = 'k-means++', random_state = 42)
y_kmeans = kmeans.fit_predict(dataframe_clusters.drop(['continent','location','iso_code','date','tests_units'],axis=1))
y_kmeans1=y_kmeans+1
cluster = pd.DataFrame(y_kmeans1)
today_sub=dataframe_clusters.drop(['continent','location','iso_code','date','tests_units'],axis=1)
# aca añadimos la variable cluster a nuestro nuevo dataset 
today_sub['cluster'] = cluster
# Promedio de los valores del cluster
kmeans_mean_cluster = pd.DataFrame(round(today_sub.groupby('cluster').mean(),1))

## Listando los valores promedios de las variables utilizadas en cada cluster
kmeans_mean_cluster

## Acá podemos ver el grupo de paises en cada cluster o grupo 
dataframe_clusters_2=dataframe_clusters.copy()
dataframe_clusters_2['cluster']= cluster

##Para listar los paises dentro de cada cluster separados
dataframe_clusters_2[dataframe_clusters_2[('cluster')]==2]

dataframe_clusters_2[dataframe_clusters_2['cluster']==3]

dataframe_clusters_2[dataframe_clusters_2['cluster']==4]

dataframe_clusters_2[dataframe_clusters_2['cluster']==1]

	iso_code	continent	location	date	total_cases	new_cases	new_cases_smoothed	total_deaths	new_deaths	new_deaths_smoothed	...	gdp_per_capita	extreme_poverty	cardiovasc_death_rate	diabetes_prevalence	female_smokers	male_smokers	handwashing_facilities	hospital_beds_per_thousand	life_expectancy	human_development_index
85166	ZWE	Africa	Zimbabwe	2021-04-26	38102.0	16.0	34.714	1560.0	3.0	1.000	...	1899.775	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571
85167	ZWE	Africa	Zimbabwe	2021-04-27	38164.0	62.0	41.286	1565.0	5.0	1.571	...	1899.775	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571
85168	ZWE	Africa	Zimbabwe	2021-04-28	38191.0	27.0	30.143	1565.0	0.0	1.429	...	1899.775	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571
85169	ZWE	Africa	Zimbabwe	2021-04-29	38235.0	44.0	31.000	1567.0	2.0	1.714	...	1899.775	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571
85170	ZWE	Africa	Zimbabwe	2021-04-30	38257.0	22.0	30.286	1567.0	0.0	1.571	...	1899.775	21.4	307.846	1.82	1.6	30.7	36.791	1.7	61.49	0.571

	location	continent	total_vaccinations	people_fully_vaccinated	population
85166	Zimbabwe	Africa	411610.0	57776.0	14862927.0
85167	Zimbabwe	Africa	433939.0	63263.0	14862927.0
85168	Zimbabwe	Africa	458013.0	69992.0	14862927.0
85169	Zimbabwe	Africa	477597.0	76826.0	14862927.0
85170	Zimbabwe	Africa	500342.0	85607.0	14862927.0

	location	continent	total_vaccinations	people_fully_vaccinated	population
0	Afghanistan	Asia	240000.0	NaN	3.892834e+07
1	Africa	None	17827619.0	4876161.0	1.340598e+09
2	Albania	Europe	476903.0	655.0	2.877800e+06
3	Algeria	Africa	75000.0	NaN	4.385104e+07
4	Andorra	Europe	26414.0	4681.0	7.726500e+04

	continent	total_vaccinations	population	Cobertura
1	Asia	506,435	29,825,968	0
5	South America	250,000	28,435,943	0
0	Africa	500,342	14,862,927	1
4	Oceania	5,367	307,150	2
2	Europe	48,748,962	809	29
3	North America	240,159,677	331,002,647	29

	country	country_code	geometry
0	Fiji	FJI	MULTIPOLYGON (((180.00000 -16.06713, 180.00000...
1	United Republic of Tanzania	TZA	POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...
2	Western Sahara	SAH	POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
3	Canada	CAN	MULTIPOLYGON (((-122.84000 49.00000, -122.9742...
4	United States of America	USA	MULTIPOLYGON (((-122.84000 49.00000, -120.0000...

	index	total_cases	new_cases	new_cases_smoothed	total_deaths	new_deaths	new_deaths_smoothed	total_cases_per_million	new_cases_per_million	new_cases_smoothed_per_million	...	gdp_per_capita	extreme_poverty	cardiovasc_death_rate	diabetes_prevalence	female_smokers	male_smokers	handwashing_facilities	hospital_beds_per_thousand	life_expectancy	human_development_index
cluster
1	43,076	394,292	2,724	2,696	10,395	63	62	9,915	73	72	...	17,683	8	238	7	8	23	23	3	70	1
2	83,736	46,359,287	323,819	318,283	1,132,150	6,839	6,747	5,948	42	41	...	15,469	10	233	8	6	35	60	3	73	1
3	4,238	11,280,115	83,231	79,990	187,348	1,119	1,086	2,431	18	17	...	0	0	0	0	0	0	0	0	0	0
4	19,735	5,051,030	37,555	36,685	119,438	741	730	5,646	42	42	...	5,498	6	137	5	1	18	15	1	37	0

	index	iso_code	continent	location	date	total_cases	new_cases	new_cases_smoothed	total_deaths	new_deaths	...	extreme_poverty	cardiovasc_death_rate	diabetes_prevalence	female_smokers	male_smokers	handwashing_facilities	hospital_beds_per_thousand	life_expectancy	human_development_index	cluster
83504	83504	OWID_WRL	0	World	2020-01-22	557	0	0	17	0	...	10	233	9	6	35	60	3	73	1	2
83505	83505	OWID_WRL	0	World	2020-01-23	655	98	0	18	1	...	10	233	9	6	35	60	3	73	1	2
83506	83506	OWID_WRL	0	World	2020-01-24	941	286	0	26	8	...	10	233	9	6	35	60	3	73	1	2
83507	83507	OWID_WRL	0	World	2020-01-25	1,433	492	0	42	16	...	10	233	9	6	35	60	3	73	1	2
83508	83508	OWID_WRL	0	World	2020-01-26	2,118	685	0	56	14	...	10	233	9	6	35	60	3	73	1	2
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
83964	83964	OWID_WRL	0	World	2021-04-26	147,872,402	682,784	824,855	3,120,469	11,156	...	10	233	9	6	35	60	3	73	1	2
83965	83965	OWID_WRL	0	World	2021-04-27	148,716,872	844,470	823,432	3,134,956	14,487	...	10	233	9	6	35	60	3	73	1	2
83966	83966	OWID_WRL	0	World	2021-04-28	149,622,864	905,992	825,721	3,150,675	15,719	...	10	233	9	6	35	60	3	73	1	2
83967	83967	OWID_WRL	0	World	2021-04-29	150,520,466	897,602	825,413	3,165,665	14,990	...	10	233	9	6	35	60	3	73	1	2
83968	83968	OWID_WRL	0	World	2021-04-30	151,399,480	879,014	822,724	3,180,238	14,573	...	10	233	9	6	35	60	3	73	1	2

	index	iso_code	continent	location	date	total_cases	new_cases	new_cases_smoothed	total_deaths	new_deaths	...	extreme_poverty	cardiovasc_death_rate	diabetes_prevalence	female_smokers	male_smokers	handwashing_facilities	hospital_beds_per_thousand	life_expectancy	human_development_index	cluster
4006	4006	OWID_ASI	0	Asia	2020-01-22	556	0	0	17	0	...	0	0	0	0	0	0	0	0	0	3
4007	4007	OWID_ASI	0	Asia	2020-01-23	654	98	0	18	1	...	0	0	0	0	0	0	0	0	0	3
4008	4008	OWID_ASI	0	Asia	2020-01-24	937	283	0	26	8	...	0	0	0	0	0	0	0	0	0	3
4009	4009	OWID_ASI	0	Asia	2020-01-25	1,428	491	0	42	16	...	0	0	0	0	0	0	0	0	0	3
4010	4010	OWID_ASI	0	Asia	2020-01-26	2,105	677	0	56	14	...	0	0	0	0	0	0	0	0	0	3
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4466	4466	OWID_ASI	0	Asia	2021-04-26	37,496,027	440,992	468,236	499,552	4,483	...	0	0	0	0	0	0	0	0	0	3
4467	4467	OWID_ASI	0	Asia	2021-04-27	37,986,034	490,007	474,154	504,552	5,000	...	0	0	0	0	0	0	0	0	0	3
4468	4468	OWID_ASI	0	Asia	2021-04-28	38,492,711	506,677	479,168	509,870	5,318	...	0	0	0	0	0	0	0	0	0	3
4469	4469	OWID_ASI	0	Asia	2021-04-29	39,007,933	515,222	483,807	515,111	5,241	...	0	0	0	0	0	0	0	0	0	3
4470	4470	OWID_ASI	0	Asia	2021-04-30	39,526,308	518,375	489,099	520,286	5,175	...	0	0	0	0	0	0	0	0	0	3

	index	iso_code	continent	location	date	total_cases	new_cases	new_cases_smoothed	total_deaths	new_deaths	...	extreme_poverty	cardiovasc_death_rate	diabetes_prevalence	female_smokers	male_smokers	handwashing_facilities	hospital_beds_per_thousand	life_expectancy	human_development_index	cluster
432	432	OWID_AFR	0	Africa	2020-02-13	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	4
433	433	OWID_AFR	0	Africa	2020-02-14	1	1	0	0	0	...	0	0	0	0	0	0	0	0	0	4
434	434	OWID_AFR	0	Africa	2020-02-15	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	4
435	435	OWID_AFR	0	Africa	2020-02-16	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	4
436	436	OWID_AFR	0	Africa	2020-02-17	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	4
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
35473	35473	IND	Asia	India	2021-04-26	17,636,186	323,023	330,745	197,894	2,771	...	21	282	10	2	21	60	1	70	1	4
35474	35474	IND	Asia	India	2021-04-27	17,997,113	360,927	340,140	201,187	3,293	...	21	282	10	2	21	60	1	70	1	4
35475	35475	IND	Asia	India	2021-04-28	18,376,421	379,308	349,378	204,832	3,645	...	21	282	10	2	21	60	1	70	1	4
35476	35476	IND	Asia	India	2021-04-29	18,762,976	386,555	357,040	208,330	3,498	...	21	282	10	2	21	60	1	70	1	4
35477	35477	IND	Asia	India	2021-04-30	19,164,969	401,993	364,927	211,853	3,523	...	21	282	10	2	21	60	1	70	1	4

	index	iso_code	continent	location	date	total_cases	new_cases	new_cases_smoothed	total_deaths	new_deaths	...	extreme_poverty	cardiovasc_death_rate	diabetes_prevalence	female_smokers	male_smokers	handwashing_facilities	hospital_beds_per_thousand	life_expectancy	human_development_index	cluster
0	0	AFG	Asia	Afghanistan	2020-02-24	1	1	0	0	0	...	0	597	10	0	0	38	0	65	1	1
1	1	AFG	Asia	Afghanistan	2020-02-25	1	0	0	0	0	...	0	597	10	0	0	38	0	65	1	1
2	2	AFG	Asia	Afghanistan	2020-02-26	1	0	0	0	0	...	0	597	10	0	0	38	0	65	1	1
3	3	AFG	Asia	Afghanistan	2020-02-27	1	0	0	0	0	...	0	597	10	0	0	38	0	65	1	1
4	4	AFG	Asia	Afghanistan	2020-02-28	1	0	0	0	0	...	0	597	10	0	0	38	0	65	1	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
85166	85166	ZWE	Africa	Zimbabwe	2021-04-26	38,102	16	35	1,560	3	...	21	308	2	2	31	37	2	61	1	1
85167	85167	ZWE	Africa	Zimbabwe	2021-04-27	38,164	62	41	1,565	5	...	21	308	2	2	31	37	2	61	1	1
85168	85168	ZWE	Africa	Zimbabwe	2021-04-28	38,191	27	30	1,565	0	...	21	308	2	2	31	37	2	61	1	1
85169	85169	ZWE	Africa	Zimbabwe	2021-04-29	38,235	44	31	1,567	2	...	21	308	2	2	31	37	2	61	1	1
85170	85170	ZWE	Africa	Zimbabwe	2021-04-30	38,257	22	30	1,567	0	...	21	308	2	2	31	37	2	61	1	1