import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import re
import geopandas
import folium
%matplotlib inline
df = pd.read_csv('./hospital_data.csv')
df.head()
df.shape
df.info()
df.describe()
df.Type.unique()
df.Type.value_counts()
df.Ownership.value_counts()
df.Ownership.unique()
df.Region.unique()
region_district_df = df.groupby(['Region','District']).size().reset_index(name='Count')
# List all districts per region to find same district having different names
for val in list(region_district_df['Region'].unique()):
districts = region_district_df.query('Region == @val')['District'].unique()
print(" Region: {}, Number of Districts: {}\n{}\n".format(val, len(districts),districts))
# Missing towns
missing_towns = df[df.Town.isnull()]
missing_towns
missing_towns.shape
# missing coordinates
missing_coordinates = df[df.Latitude.isnull()|df.Longitude.isnull()]
missing_coordinates
missing_coordinates.shape
# save data with missing data into a dataframe so that client can help retrieve them.
missing_coordinates.to_csv('missing_coordinates.csv', index=False)
missing_towns.to_csv('missing_towns.csv', index=False)
df[df.District == 'Bosome Freho']
Type:
* District Health Directorate,DHD
* Clinic, clinic
* Health Centre, Centre
* CHPS, CPHS
* Municipal Health Directorate, Municipal Health Directorate
Ownership:
* Government, government
* Private, private
* Islamic, Muslim
Region: Western:
* 'Prestea-Huni Valley','Pretea-Huni Valley'
* 'Nzema East','Nzema East Municipal'
Eastern:
* 'Birim South','Birim south'
* 'Kwaebibirem','Kweabibirem'
* 'New Juaben','New Juaben Municipal'
Brong Ahafo:
* 'Sunyani' 'Sunyani Municipal'
* 'Techiman','Techiman Municipal'
* 'Wenchi','Wenchi Municipal'
# Create copy of the original dataframe
clean_df = df.copy()
missing_coordinates
# Function to Convert Pascal Case Names to SnakeCase
def pascal_to_snake_case(name):
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
# make column names snake_case
clean_df.rename(columns=lambda x: pascal_to_snake_case(x), inplace=True, errors="raise")
# Verify
list(clean_df.columns)
# Code
# Replace missing coordinates with available data
data_to_replace = {
357: [6.466667,-1.433333],
358: [6.4279761,-1.3352532],
1651:[6.1695534,-1.1417168],
2439:[9.9291129,-0.2140466],
3416: [4.861389 ,-2.195556],
3417: [5.033333, -2.266667],
3663: [5.04348,-2.46458],
3664: [5.0494868, -2.4820236],
3669: [4.9743222, -2.4239446],
3672: [5.1439822, -2.3725535],
3678: [5.5485504, -0.1992678]
}
for index in data_to_replace:
clean_df.loc[index, 'latitude'] = data_to_replace[index][0]
clean_df.loc[index, 'longitude'] = data_to_replace[index][1]
# Test
clean_df[clean_df.latitude.isnull()|clean_df.longitude.isnull()].shape
# TYPE
# Code
clean_df.type.replace(['DHD','clinic','Centre','CPHS', 'Municipal Health Directorate'],
['District Health Directorate','Clinic','Health Centre','CHPS','Municipal Health Directorate'],
inplace=True)
# Test
clean_df.type.value_counts()
# OWNERSHIP
## Code
clean_df.ownership.replace(['government','private','Muslim'],['Government','Private','Islamic'], inplace=True)
# Test
clean_df.ownership.value_counts()
# DISTRICT
# Code
clean_df.district.replace(['Pretea-Huni Valley','Nzema East','Birim south','Kweabibirem','New Juaben',
'Sunyani','Techiman','Wenchi'],
['Prestea-Huni Valley','Nzema East Municipal','Birim South','Kwaebibirem',
'New Juaben Municipal','Sunyani Municipal','Techiman Municipal','Wenchi Municipal'],
inplace=True)
#Test
error_region_district_df = clean_df.groupby(['region','district']).size().reset_index(name='Count')
error_regions = ['Eastern','Western','Brong Ahafo']
for val in list(error_regions):
districts = error_region_district_df.query('region == @val')['district'].unique()
print(" Region: {}, Number of Districts: {}\n{}\n".format(val, len(districts),districts))
Convert Region, District, Type, Ownership to Category datatype
cols = ['region','district','type','ownership']
for col in cols:
clean_df[col] = clean_df[col].astype('category')
clean_df.info()
## Save cleaned (partially) dataframe
clean_df.to_csv('cleaned_hospital_data.csv', index=False)
# Distribution of Clinics Per Region
default_color = sb.color_palette()[0]
plt.figure(figsize=[18,10])
sb.countplot(data=clean_df, x='region', color=default_color)
# add annotations
n_points = clean_df.shape[0]
cat_counts = clean_df.region.value_counts()
locs, labels = plt.xticks() # get the current tick locations and labels
# loop through each pair of locations and labels
for loc, label in zip(locs, labels):
# get the text property for the label to get the correct count
count = cat_counts[label.get_text()]
pct_string = '{:0.2f}%'.format(100*count/n_points)
# print the annotation just below the top of the bar
plt.text(loc, count-30, pct_string, ha = 'center', color = 'w', fontsize=15)
# plt.ylabel('Number of Health Facilities', fontsize=20)
plt.ylabel('')
plt.xlabel('Regions', labelpad=10, fontsize=20)
plt.title('Distribution of Health Facilities Per Region', fontsize=20)
plt.yticks([])
plt.xticks(fontsize=15)
plt.box(on=None);
plt.figure(figsize=[20,10])
sb.countplot(data=clean_df, x='type', color=default_color);
plt.xticks(rotation=80, fontsize=15);
plt.xlabel('Type of Facility', labelpad=20, fontsize=20);
plt.ylabel('Number of Health Facilities', fontsize=20);
plt.title('Distribution of the Type of Health Facilities', fontsize=20);
plt.figure(figsize=[20,10])
sb.countplot(data=clean_df, x='ownership', color=default_color);
plt.xticks(rotation=80, fontsize=15);
plt.xlabel('Owner of Facility', labelpad=20, fontsize=20);
plt.ylabel('Number of Health Facilities', fontsize=20);
plt.title('Distribution of the Ownership of Health Facilities', fontsize=20);
plt.figure(figsize=[25,10])
ax = sb.countplot(data=clean_df, x='region', hue='ownership');
plt.xticks(rotation=80, fontsize=15);
plt.xlabel('Owner of Facility', labelpad=20, fontsize=20);
plt.ylabel('Number of Health Facilities', fontsize=20);
plt.title('Distribution of the Ownership of Health Facilities', fontsize=20);
ax.legend(title='Owners of Health Facilities',loc = 1, ncol = 2);
clean_df
gdf = geopandas.GeoDataFrame(
clean_df, geometry=geopandas.points_from_xy(clean_df.longitude, clean_df.latitude))
gdf.crs = {'init': 'epsg:4326'}
regions = geopandas.read_file('./Map_of_Regions_in_Ghana.shx')
ax = regions.plot(figsize=(25,25), color='whitesmoke', linestyle=':', edgecolor='black')
gdf.to_crs(epsg=32630).plot(markersize=1, ax=ax, label='Distribution of Health Facilities Across Ghana');
# gdf.crs = {'init': 'epsg:4326'}
# Use Facilities with coordinates
gdf = gdf[gdf.latitude.notnull()]
# # Create a map
# import folium
# f = folium.Figure(width=1200, height=1500)
# m = folium.Map(location=[7.9528, -1.0307], tiles='openstreetmap', zoom_start=9)
# # Add points to the map
# for idx, row in gdf.iterrows():
# folium.Marker([row['latitude'], row['longitude']], popup=row['facility_name']).add_to(m)
# # Display the map
# f.add_child(m)
# f
def plot_regional_maps(gdf, coordinates, zoom, region_name):
"""
gdf (Geopandas dataframe): This is a geopandas dataframe
coordinates (list): List containining [Latitiude, Longitude]. Eg.[6.7373876, -1.8975697]
zoom (int): The zoom level to start with. Eg. 2
"""
m = folium.Map(location=coordinates, tiles='openstreetmap', zoom_start=zoom)
for idx, row in gdf.iterrows():
folium.Marker([row['latitude'], row['longitude']], popup=row['facility_name']).add_to(m)
m.save("{}_map.html".format(region_name.replace(" ","_")))
regional_coordinates = {
'Ashanti': [6.7373876, -1.8975697],
'Brong Ahafo': [7.5810711, -2.5497912],
'Central':[5.667745, -1.8332853],
'Eastern': [6.4576381, -1.0233539],
'Greater Accra': [5.7452223, -0.4538212],
'Northern': [9.3328873, -2.2296366],
'Upper East': [10.7213849, -1.3312247],
'Upper West': [10.3357329, -2.7159009],
'Volta': [7.2555048,-0.7223261],
'Western': [5.8935708, -3.4580734 ]
}
for region_name in regional_coordinates:
geo_df = gdf.query('region == @region_name')
plot_regional_maps(gdf = geo_df, coordinates = regional_coordinates[region_name],
zoom = 9, region_name=region_name)
from IPython.display import IFrame
IFrame(src='./Ashanti_map.html',width=800, height=700)
IFrame(src='./Brong_Ahafo_map.html',width=800, height=700)
IFrame(src='./Central_map.html',width=800, height=700)
IFrame(src='./Eastern_map.html',width=800, height=700)
IFrame(src='./Greater_Accra_map.html',width=800, height=700)
IFrame(src='./Northern_map.html',width=800, height=700)
IFrame(src='./Upper_East_map.html',width=800, height=700)
IFrame(src='./Upper_West_map.html',width=800, height=700)
IFrame(src='./Volta_map.html',width=800, height=700)
IFrame(src='./Western_map.html',width=800, height=700)