Fertility Rate on European Countries with R
This is the source from https://notebooks.azure.com/ignatandrei/libraries/EuropeStatData – FertitlityRate
Preload packages¶
Pacman¶
First install pacman to install once
if (!require("pacman")) install.packages("pacman")
Load the necessary packages¶
pacman::p_load('XML', "magrittr","ggplot2", "RCurl", "rlist", "rvest", "dplyr", "devtools","assertthat","XLConnect","tidyr")
Use the functions in the packages¶
library(stringr)
library(magrittr)
library(XML)
library(RCurl)
library(rlist)
library(ggplot2)
library(rvest)
library(tidyr)
Step 1 download and parse the HTML¶
Downloading from http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/demo_r_d3dens.tsv.gz
theUrl<-"http://ec.europa.eu/eurostat/web/products-datasets/-/demo_r_frate3"
content <- read_html(theUrl)
urlsDownload <- html_nodes(x = content, xpath = '//a') %>% # find tags a
html_attr("href") %>% # find href attribute\n",
.[grepl(glob2rx("*BulkDownload*"), .)] # glob2rx - transform regular expression\n",
#print(urlsDownload)\n",
assert_that(length(urlsDownload) ==1, msg= paste("urlsDownload must have length 1! real length", length(urlsDownload)))
urlFile<-urlsDownload[1]
print(paste("Downloading from ",urlFile))
Now download file from {{theUrl}} in order to see it
#print(getwd())
dest<-file.path(getwd(),"file.gz")
download.file(urlFile,dest, mode = "wb", cacheOK = F) #mode binary\n",
assert_that(file.exists(dest))
Read data¶
data<-as.data.frame(read.table(gzfile(dest),fill = TRUE, header=T) )
keeps <- c("unit.geo.time", "X2015")
print(head(data))
Tidying the data¶
Split the first colum
data<- data[keeps] %>%
separate(unit.geo.time, c("unit", "geo"), ",")
print(head(data))
theUrlNuts<-"http://ec.europa.eu/eurostat/ramon/documents/nuts/NUTS_2013.zip"
destNuts<-file.path(getwd(),"nuts.zip")
download.file(theUrlNuts,destNuts, mode = "wb", cacheOK = F) #mode binary\n"
assert_that(file.exists(destNuts))
Reading into memory¶
nuts3<-as.data.frame(readWorksheetFromFile(unzip(destNuts),sheet = 1))
Tidy the data¶
keeps <- c("NUTS.CODE", "NUTS.LABEL","NUTS.LEVEL","COUNTRY.CODE")
nuts3<- nuts3[keeps]
print(head(nuts3))
Merging the data with metadata¶
allData <- merge(nuts3,data,by.x="NUTS.CODE",by.y="geo")
allData$Val <- with(allData, as.numeric(as.character(X2015)))
print(head(allData))
Data for countries¶
Filtering data¶
allCountries <- allData %>%
filter(NUTS.LEVEL==0) %>%
select(NUTS.CODE,Val) %>%
as.data.frame
print(allCountries)
Show data in graphic form¶
sapply(allCountries, mode)
png(filename="allCountries.png", width=600, height=600)
ggplot(allCountries, aes(x=reorder(NUTS.CODE,-Val), y=Val))+geom_point()
dev.off()
Image for all countries¶
countryCode <- "RO"
Filtering the data¶
Filtering data for {{countryCode}}
#head(allData)
Country<-allData %>%
filter(COUNTRY.CODE==CountryCode ) %>%
filter(NUTS.LEVEL==3) %>%
select(NUTS.LABEL, NUTS.CODE, Val)
Country <- as.data.frame(Country)
#print(Country)
Show data in graphic form¶
#sapply(Country, mode)
file<- paste('Country',CountryCode,'.png', sep='')
print(file)
png(filename=file , width=600, height=600)
ggplot(Country, aes(x=Val, y=reorder(NUTS.LABEL,-Val)))+geom_point()
dev.off()