Segregating Long but Non-Risky Landing Distance from Risky Distance
By: varsha10 • March 7, 2019 • Coursework • 6,879 Words (28 Pages) • 886 Views
Statistical Modelling Assignment 2- FAA Project
Varsha
February 19, 2019
segregating long but non-risky landing distance from risky distance
Packages Required
library(tidyverse) #to visualize, transform, input, tidy and join data
library(dplyr) #data wrangling
library(stringr) #string related functions
library(kableExtra) #to create HTML Table
library(DT) #to preview the data sets
library(lubridate) #to apply the date functions
library(xlsx) #to load excel files
library(ROCR) #to use ROC curves
library(faraway) #to use the ilogit function
Long Landing
Loaded the datasets and did initial data cleaning(detailed steps were performed in the first assignment)
faa1 <- read.xlsx("FAA1.xls", sheetName = "FAA1")
faa2 <- read.xlsx("FAA2_2.xls", sheetName = "Sheet1")
faa <- bind_rows(faa1, faa2)
check <- faa %>%
select(-duration) %>%
duplicated() %>%
which()
faa <- faa[-check,]
faa_check <- faa %>%
filter((duration > 40| is.na(duration)) & (speed_ground >= 30) & (speed_ground <= 140) &
(height >= 6) & (distance < 6000))
faa <- faa_check
faa$duration_corrected <- NA
faa <- transform(faa, duration_corrected = ifelse(is.na(faa$duration), mean(faa$duration, na.rm=TRUE), faa$duration))
Step 1:
faa <- faa %>%
mutate(long.landing = as.factor(ifelse(distance > 2500, 1,0 )) ,
risky.landing = as.factor(ifelse(distance > 3000,1,0 )),
aircraft = as.factor(aircraft))
faa$duration <- NULL
faa$distance <- NULL
Step 2: Histogram to show distribution of “long.landing”
faa %>%
ggplot(aes(long.landing)) +
geom_bar()
[pic 1]
only 12% aircrafts have long landing
round(prop.table(table(faa$long.landing)),2)
##
## 0 1
## 0.88 0.12
Step 3:
mdl_duration <- glm (faa$long.landing ~ faa$duration_corrected, family = "binomial")
mdl_speedgrnd <- glm (faa$long.landing ~ faa$speed_ground, family = "binomial")
mdl_height <- glm (faa$long.landing ~ faa$height, family = "binomial")
mdl_pitch <- glm (faa$long.landing ~ faa$pitch, family = "binomial")
mdl_nopasg <- glm (faa$long.landing ~ faa$no_pasg, family = "binomial")
mdl_speedair <- glm (faa$long.landing ~ faa$speed_air, family = "binomial")
mdl_aircraft <- glm (faa$long.landing ~ faa$aircraft, family = "binomial")
duration <- summary(mdl_duration)$coef[2,c(1,4)]
speed_ground <- summary(mdl_speedgrnd)$coef[2,c(1,4)]
height <- summary(mdl_height)$coef[2,c(1,4)]
pitch <- summary(mdl_pitch)$coef[2,c(1,4)]
no_pasg <- summary(mdl_nopasg)$coef[2,c(1,4)]
speed_air <- summary(mdl_speedair)$coef[2,c(1,4)]
aircraft_boeing <- summary(mdl_aircraft)$coef[2,c(1,4)]
aircraft_airbus <- summary(mdl_aircraft)$coef[1,c(1,4)]
coefficients <- c(duration[1], speed_ground[1], height[1], pitch[1], no_pasg[1],speed_air[1],aircraft_boeing[1],aircraft_airbus[1])
coefficients <- round(coefficients, digits = 3)
odds_ratio <- round(exp(coefficients), 3)
p_value <- c(duration[2], speed_ground[2], height[2], pitch[2], no_pasg[2],speed_air[2],aircraft_boeing[2],aircraft_airbus[2])
p_value <- round(p_value, digits = 3)
variable_names <- c("Duration","Ground Speed","Height","Pitch","No. of Passengers","Air Speed","Aircraft-Boeing", "Aircraft-Airbus")
table_2 <- data.frame(variable_names, coefficients,odds_ratio, p_value)
table_2$slope_direction <- ifelse(coefficients > 0 , "Positive", "Negative")
table_2 <- table_2 %>%
select(variable_names, coefficients, odds_ratio, p_value, slope_direction) %>%
arrange(p_value)
table_2
...