Preload packages¶

Pacman¶

First install pacman to install once

if (!require("pacman")) install.packages("pacman")

Loading required package: pacman

Load the necessary packages¶

pacman::p_load('XML', "magrittr","ggplot2", "RCurl", "rlist", "rvest", "dplyr", "devtools","assertthat","XLConnect","tidyr")

Use the functions in the packages¶

library(stringr)
library(magrittr)
library(XML)
library(RCurl)
library(rlist)
library(ggplot2)
library(rvest)
library(tidyr)

Primary data¶

Download fertility rate for regions / counties¶

Step 1 download and parse the HTML¶

Downloading from http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/demo_r_d3dens.tsv.gz

theUrl<-"http://ec.europa.eu/eurostat/web/products-datasets/-/demo_r_frate3"
content <- read_html(theUrl)
urlsDownload <- html_nodes(x = content, xpath = '//a') %>% # find tags a
                 html_attr("href") %>% # find href attribute\n",
                .[grepl(glob2rx("*BulkDownload*"), .)]  # glob2rx  - transform regular expression\n",

#print(urlsDownload)\n",
assert_that(length(urlsDownload) ==1, msg= paste("urlsDownload must have length 1! real length", length(urlsDownload)))
urlFile<-urlsDownload[1]
print(paste("Downloading from ",urlFile))

[1] "Downloading from  http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/demo_r_frate3.tsv.gz"

Now download file from {{theUrl}} in order to see it

#print(getwd())
dest<-file.path(getwd(),"file.gz")
download.file(urlFile,dest, mode = "wb", cacheOK = F) #mode binary\n",
assert_that(file.exists(dest))

Read data¶

data<-as.data.frame(read.table(gzfile(dest),fill = TRUE, header=T) )
keeps <- c("unit.geo.time", "X2015")
print(head(data))

  unit.geo.time X2015 X2014
1         NR,AL  1.67  1.79
2        NR,AL0  1.67  1.76
3         NR,AT  1.50  1.47
4        NR,AT1  1.45  1.43
5       NR,AT11  1.37  1.33
6      NR,AT111  1.39  1.39

Tidying the data¶

Split the first colum

data<- data[keeps] %>%
 separate(unit.geo.time, c("unit", "geo"), ",")
      
print(head(data))

  unit   geo X2015
1   NR    AL  1.67
2   NR   AL0  1.67
3   NR    AT  1.50
4   NR   AT1  1.45
5   NR  AT11  1.37
6   NR AT111  1.39

Dictionary of metadata¶

Download dictionary¶

theUrlNuts<-"http://ec.europa.eu/eurostat/ramon/documents/nuts/NUTS_2013.zip"
destNuts<-file.path(getwd(),"nuts.zip")
download.file(theUrlNuts,destNuts, mode = "wb", cacheOK = F) #mode binary\n"
assert_that(file.exists(destNuts))

Reading into memory¶

nuts3<-as.data.frame(readWorksheetFromFile(unzip(destNuts),sheet = 1))

Tidy the data¶

keeps <- c("NUTS.CODE", "NUTS.LABEL","NUTS.LEVEL","COUNTRY.CODE")
nuts3<- nuts3[keeps]
print(head(nuts3))

  NUTS.CODE
1        BE
2       BE1
3      BE10
4     BE100
5       BE2
6      BE21
                                                            NUTS.LABEL
1                                              BELGIQUE-BELGI<U+00CB> 
2 R<U+00C9>GION DE BRUXELLES-CAPITALE / BRUSSELS HOOFDSTEDELIJK GEWEST
3 R<U+00E9>gion de Bruxelles-Capitale / Brussels Hoofdstedelijk Gewest
4              Arr. de Bruxelles-Capitale / Arr. van Brussel-Hoofdstad
5                                                        VLAAMS GEWEST
6                                                      Prov. Antwerpen
  NUTS.LEVEL COUNTRY.CODE
1          0           BE
2          1           BE
3          2           BE
4          3           BE
5          1           BE
6          2           BE

Merging the data with metadata¶

allData <- merge(nuts3,data,by.x="NUTS.CODE",by.y="geo")
allData$Val <- with(allData, as.numeric(as.character(X2015)))
print(head(allData))

  NUTS.CODE           NUTS.LABEL NUTS.LEVEL COUNTRY.CODE unit X2015  Val
1        AT    <U+00D6>STERREICH          0           AT   NR  1.50 1.50
2       AT1 OST<U+00D6>STERREICH          1           AT   NR  1.45 1.45
3      AT11          Burgenland           2           AT   NR  1.37 1.37
4     AT111     Mittelburgenland          3           AT   NR  1.39 1.39
5     AT112       Nordburgenland          3           AT   NR  1.36 1.36
6     AT113 S<U+00FC>dburgenland          3           AT   NR  1.38 1.38

Data for countries¶

Filtering data¶

allCountries <- allData %>%
     filter(NUTS.LEVEL==0) %>%
     select(NUTS.CODE,Val) %>%
    as.data.frame

print(allCountries)

   NUTS.CODE  Val
1         AT 1.50
2         BE 1.70
3         BG 1.53
4         CY 1.33
5         CZ 1.57
6         DE 1.50
7         DK 1.70
8         EE 1.58
9         EL 1.33
10        ES 1.34
11        FI 1.65
12        FR 1.96
13        HR 1.40
14        HU 1.45
15        IE 1.94
16        IT 1.35
17        LT 1.71
18        LU 1.47
19        LV 1.71
20        MT 1.45
21        NL 1.66
22        PL 1.33
23        PT 1.30
24        RO 1.58
25        SE 1.85
26        SI 1.58
27        SK 1.41
28        UK 1.81

Show data in graphic form¶

sapply(allCountries, mode)
png(filename="allCountries.png", width=600, height=600)
ggplot(allCountries, aes(x=reorder(NUTS.CODE,-Val), y=Val))+geom_point()
dev.off()

Image for all countries¶

All Countries

Display data for a chosen country¶

Chosing the country¶

countryCode <- "RO"

Filtering the data¶

Filtering data for {{countryCode}}

#head(allData)
Country<-allData %>% 
filter(COUNTRY.CODE==CountryCode ) %>%
filter(NUTS.LEVEL==3) %>%
select(NUTS.LABEL, NUTS.CODE, Val)

Country <- as.data.frame(Country)
#print(Country)

Error in filter_impl(.data, quo): Evaluation error: object 'CountryCode' not found.
Traceback:

1. allData %>% filter(COUNTRY.CODE == CountryCode) %>% filter(NUTS.LEVEL == 
 .     3) %>% select(NUTS.LABEL, NUTS.CODE, Val)
2. withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
3. eval(quote(`_fseq`(`_lhs`)), env, env)
4. eval(quote(`_fseq`(`_lhs`)), env, env)
5. `_fseq`(`_lhs`)
6. freduce(value, `_function_list`)
7. function_list[[i]](value)
8. filter(., COUNTRY.CODE == CountryCode)
9. filter.data.frame(., COUNTRY.CODE == CountryCode)
10. as.data.frame(filter(tbl_df(.data), ...))
11. filter(tbl_df(.data), ...)
12. filter.tbl_df(tbl_df(.data), ...)
13. filter_impl(.data, quo)

Show data in graphic form¶

#sapply(Country, mode)
file<- paste('Country',CountryCode,'.png', sep='')
print(file)
png(filename=file , width=600, height=600)
ggplot(Country, aes(x=Val, y=reorder(NUTS.LABEL,-Val)))+geom_point()
dev.off()

Show graphic about country¶

alt text

A Programmer with Microsoft tools

Friday links 256

Fertility Rate on European Countries with R

Preload packages¶

Pacman¶

Load the necessary packages¶

Use the functions in the packages¶

Primary data¶

Download fertility rate for regions / counties¶

Step 1 download and parse the HTML¶

Read data¶

Tidying the data¶

Dictionary of metadata¶

Download dictionary¶

Reading into memory¶

Tidy the data¶

Merging the data with metadata¶

Data for countries¶

Filtering data¶

Show data in graphic form¶

Image for all countries¶

Display data for a chosen country¶

Chosing the country¶

Filtering the data¶

Show data in graphic form¶

Show graphic about country¶

Friday links 255

Download large files with R and RCurl

Friday links 254

Github as a repository for database (sqlite)

Friday links 253

Interpreting pdf to database(sqlite)

Friday links 252

License plate number from image

Preload packages¶

Pacman¶

Load the necessary packages¶

Use the functions in the packages¶

Primary data¶

Download fertility rate for regions / counties¶

Step 1 download and parse the HTML¶

Read data¶

Tidying the data¶

Dictionary of metadata¶

Download dictionary¶

Reading into memory¶

Tidy the data¶

Merging the data with metadata¶

Data for countries¶

Filtering data¶

Show data in graphic form¶

Image for all countries¶

Display data for a chosen country¶

Chosing the country¶

Filtering the data¶

Show data in graphic form¶

Show graphic about country¶

Andrei Ignat weekly software news(mostly .NET)