Hands-on Exercise 7

Author

Oh Jia Wen

Published

June 9, 2023

Modified

June 9, 2023

Getting started

  1. Using p_load() of pacman package to load the required libraries
pacman::p_load(scales, viridis, lubridate, ggthemes, gridExtra, readxl, knitr, 
               data.table, CGPfunctions, ggHoriPlot, tidyverse)
  1. Importing Data
attacks <- read_csv("data/eventlog.csv")
  1. Data Preparation

Step 1: Deriving weekday and hour of day fields

make_hr_wkday <- function(ts, sc, tz) {
  real_times <- ymd_hms(ts, 
                        tz = tz[1], 
                        quiet = TRUE)
  dt <- data.table(source_country = sc,
                   wkday = weekdays(real_times),
                   hour = hour(real_times))
  return(dt)
  }

Step 2: Deriving the attacks tibble data frame

wkday_levels <- c('Saturday', 'Friday', 
                  'Thursday', 'Wednesday', 
                  'Tuesday', 'Monday', 
                  'Sunday')

attacks <- attacks %>%
  group_by(tz) %>%
  do(make_hr_wkday(.$timestamp, 
                   .$source_country, 
                   .$tz)) %>% 
  ungroup() %>% 
  mutate(wkday = factor(
    wkday, levels = wkday_levels),
    hour  = factor(
      hour, levels = 0:23))
kable(head(attacks))
tz source_country wkday hour
Africa/Cairo BG Saturday 20
Africa/Cairo TW Sunday 6
Africa/Cairo TW Sunday 8
Africa/Cairo CN Sunday 11
Africa/Cairo US Sunday 15
Africa/Cairo CA Monday 11

1. Plotting Calender Heatmap

grouped <- attacks %>% 
  count(wkday, hour) %>% 
  ungroup() %>%
  na.omit()

ggplot(grouped, 
       aes(hour, 
           wkday, 
           fill = n)) + 
geom_tile(color = "white", 
          size = 0.1) + 
theme_tufte(base_family = "Helvetica") + 
coord_equal() +
scale_fill_gradient(name = "# of attacks",
                    low = "sky blue", 
                    high = "dark blue") +
labs(x = NULL, 
     y = NULL, 
     title = "Attacks by weekday and time of day") +
theme(axis.ticks = element_blank(),
      plot.title = element_text(hjust = 0.5),
      legend.title = element_text(size = 8),
      legend.text = element_text(size = 6) )

1.1 Plotting Multiple Calender Heatmap

Step 1: Deriving attack by country object

In order to identify the top 4 countries with the highest number of attacks, you are required to do the followings:

  • count the number of attacks by country,

  • calculate the percent of attackes by country, and

  • save the results in a tibble data frame.

attacks_by_country <- count(
  attacks, source_country) %>%
  mutate(percent = percent(n/sum(n))) %>%
  arrange(desc(n))

Step 2: Preparing the tidy data frame

Extract the attack records of the top 4 countries from attacks data frame and save the data in a new tibble data frame (i.e. top4_attacks).

top4 <- attacks_by_country$source_country[1:4]
top4_attacks <- attacks %>%
  filter(source_country %in% top4) %>%
  count(source_country, wkday, hour) %>%
  ungroup() %>%
  mutate(source_country = factor(
    source_country, levels = top4)) %>%
  na.omit()

Step 3: Plotting the heatmaps

ggplot(top4_attacks, 
       aes(hour, 
           wkday, 
           fill = n)) + 
  geom_tile(color = "white", 
          size = 0.1) + 
  theme_tufte(base_family = "Helvetica") + 
  coord_equal() +
  scale_fill_gradient(name = "# of attacks",
                    low = "sky blue", 
                    high = "dark blue") +
  facet_wrap(~source_country, ncol = 2) +
  labs(x = NULL, y = NULL, 
     title = "Attacks on top 4 countries by weekday and time of day") +
  theme(axis.ticks = element_blank(),
        axis.text.x = element_text(size = 7),
        plot.title = element_text(hjust = 0.5),
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6) )

2. Plotting Cycle Plot

2.1 Importing Data

arrivals_by_air.xlsx will be used for this exercise.

air <- read_excel("data/arrivals_by_air.xlsx")

2.2 Data Wrangling

Step 1: Derive month and year field

air$month <- factor(month(air$`Month-Year`), 
                    levels=1:12, 
                    labels=month.abb, 
                    ordered=TRUE) 
air$year <- year(ymd(air$`Month-Year`))

Step 2: Select the target country

Vietnam <- air |> 
  select(Vietnam,
         month,
         year) |> 
  filter(year >= 2010)

Step 3: Compute year average by months

hline.data <- Vietnam |> 
  group_by(month) |> 
  summarise(avgvalue = mean(Vietnam))

2.3 Plotting Cycle Plot

ggplot() + 
  geom_line(data=Vietnam,
            aes(x=year, 
                y=`Vietnam`, 
                group=month), 
            colour="black") +
  geom_hline(aes(yintercept=avgvalue), 
             data=hline.data, 
             linetype=6, 
             colour="red", 
             size=0.5) + 
  facet_grid(~month) +
  labs(axis.text.x = element_blank(),
       title = "Visitor arrivals from Vietnam by air, Jan 2010-Dec 2019") +
  xlab("") +
  ylab("No. of Visitors")

3. Plotting slopegraph

3.1 Importing Data

rice.csv will be used for this exercise.

rice <- read_csv("data/rice.csv")

3.2 Plotting slopegraph

rice %>% 
  mutate(Year = factor(Year)) %>%
  filter(Year %in% c(1961, 1980)) %>%
  newggslopegraph(Year, Yield, Country,
                Title = "Rice Yield of Top 11 Asian Counties",
                SubTitle = "1961-1980",
                Caption = "Prepared by: Oh Jia Wen")

4. Horizon Graph

Do refer to In-class Exercise 07