Basics of the ‘trafficSafety’ Package

Bojan Zunar

15 05 2019

Package ‘trafficSafety’ was primarily created for the Coursera’s ‘Building R Packages’ course. It contains a number of functions that load, summarise, and visualise data from the US National Highway Traffic Safety Administration’s ‘Fatality Analysis Reporting System’, which is a nationwide census providing the American public yearly data regarding fatal injuries suffered in motor vehicle traffic crashes.

Loading the Dataset

The package comes with three files that list fatalities throughout the years 2013-2015. They can be loaded with function fars_read which outputs a tibble. For the convenience, filenames can also be generated through a helper function make_filename.

# library(trafficSafety)
library(devtools)
load_all()
#> Loading trafficSafety
getwd()
#> [1] "D:/RStudio/M03-RPackages/trafficSafety/inst/extdata"
# external data bundled with the package
y13 <- system.file("extdata", "accident_2013.csv.bz2", package = "trafficSafety")
y14 <- system.file("extdata", "accident_2014.csv.bz2", package = "trafficSafety")
y15 <- system.file("extdata", "accident_2015.csv.bz2", package = "trafficSafety")

# load the data
fars13 <- fars_read(y13)
head(fars13, 3)
#> # A tibble: 3 x 50
#>   STATE ST_CASE VE_TOTAL VE_FORMS PVH_INVL  PEDS PERNOTMVIT PERMVIT PERSONS
#>   <dbl>   <dbl>    <dbl>    <dbl>    <dbl> <dbl>      <dbl>   <dbl>   <dbl>
#> 1     1   10001        1        1        0     0          0       8       8
#> 2     1   10002        2        2        0     0          0       2       2
#> 3     1   10003        1        1        0     0          0       1       1
#> # ... with 41 more variables: COUNTY <dbl>, CITY <dbl>, DAY <dbl>,
#> #   MONTH <dbl>, YEAR <dbl>, DAY_WEEK <dbl>, HOUR <dbl>, MINUTE <dbl>,
#> #   NHS <dbl>, ROAD_FNC <dbl>, ROUTE <dbl>, TWAY_ID <chr>, TWAY_ID2 <chr>,
#> #   MILEPT <dbl>, LATITUDE <dbl>, LONGITUD <dbl>, SP_JUR <dbl>,
#> #   HARM_EV <dbl>, MAN_COLL <dbl>, RELJCT1 <dbl>, RELJCT2 <dbl>,
#> #   TYP_INT <dbl>, WRK_ZONE <dbl>, REL_ROAD <dbl>, LGT_COND <dbl>,
#> #   WEATHER1 <dbl>, WEATHER2 <dbl>, WEATHER <dbl>, SCH_BUS <dbl>,
#> #   RAIL <chr>, NOT_HOUR <dbl>, NOT_MIN <dbl>, ARR_HOUR <dbl>,
#> #   ARR_MIN <dbl>, HOSP_HR <dbl>, HOSP_MN <dbl>, CF1 <dbl>, CF2 <dbl>,
#> #   CF3 <dbl>, FATALS <dbl>, DRUNK_DR <dbl>

# helper function
make_filename(2013)
#> [1] "accident_2013.csv.bz2"
system.file("extdata", make_filename(2013), package = "trafficSafety")
#> [1] "D:/RStudio/M03-RPackages/trafficSafety/inst/extdata/accident_2013.csv.bz2"

Several years worth of data can also be imported using the function fars_read_years which will extract only months and year, saving data from each file as a separate element of the list.

fars_read_years(c(2013, 2014))
#> [[1]]
#> # A tibble: 30,202 x 2
#>    MONTH  year
#>    <dbl> <dbl>
#>  1     1  2013
#>  2     1  2013
#>  3     1  2013
#>  4     1  2013
#>  5     1  2013
#>  6     1  2013
#>  7     1  2013
#>  8     1  2013
#>  9     1  2013
#> 10     1  2013
#> # ... with 30,192 more rows
#> 
#> [[2]]
#> # A tibble: 30,056 x 2
#>    MONTH  year
#>    <dbl> <dbl>
#>  1     1  2014
#>  2     1  2014
#>  3     1  2014
#>  4     1  2014
#>  5     1  2014
#>  6     1  2014
#>  7     1  2014
#>  8     1  2014
#>  9     1  2014
#> 10     1  2014
#> # ... with 30,046 more rows

Summarise Number of Accidents

One can summarise the number of accidents by using fars_summarize_years function.

fars_summarize_years(c(2013, 2014, 2015))
#> # A tibble: 12 x 4
#>    MONTH `2013` `2014` `2015`
#>    <dbl>  <int>  <int>  <int>
#>  1     1   2230   2168   2368
#>  2     2   1952   1893   1968
#>  3     3   2356   2245   2385
#>  4     4   2300   2308   2430
#>  5     5   2532   2596   2847
#>  6     6   2692   2583   2765
#>  7     7   2660   2696   2998
#>  8     8   2899   2800   3016
#>  9     9   2741   2618   2865
#> 10    10   2768   2831   3019
#> 11    11   2615   2714   2724
#> 12    12   2457   2604   2781

Visualise Loaction of the Accidents

Finally, one can visualise where the accidents occured for a specific state in the specified year using the fars_map_state function.

fars_map_state(10, 2014)

sessionInfo()
#> R version 3.6.0 (2019-04-26)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows >= 8 x64 (build 9200)
#> 
#> Matrix products: default
#> 
#> Random number generation:
#>  RNG:     Mersenne-Twister 
#>  Normal:  Inversion 
#>  Sample:  Rounding 
#>  
#> locale:
#> [1] LC_COLLATE=Croatian_Croatia.1250  LC_CTYPE=Croatian_Croatia.1250   
#> [3] LC_MONETARY=Croatian_Croatia.1250 LC_NUMERIC=C                     
#> [5] LC_TIME=Croatian_Croatia.1250    
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] trafficSafety_0.1.0 usethis_1.5.0       devtools_2.0.2     
#> [4] BiocStyle_2.12.0   
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_1.0.1         pillar_1.4.0       compiler_3.6.0    
#>  [4] BiocManager_1.30.4 prettyunits_1.0.2  remotes_2.0.4     
#>  [7] tools_3.6.0        zeallot_0.1.0      testthat_2.1.1    
#> [10] digest_0.6.18      pkgbuild_1.0.3     pkgload_1.0.2     
#> [13] tibble_2.1.1       evaluate_0.13      memoise_1.1.0     
#> [16] pkgconfig_2.0.2    rlang_0.3.4        cli_1.1.0         
#> [19] rstudioapi_0.10    yaml_2.2.0         xfun_0.7          
#> [22] dplyr_0.8.1        withr_2.1.2        stringr_1.4.0     
#> [25] knitr_1.22         vctrs_0.1.0        hms_0.4.2         
#> [28] maps_3.3.0         desc_1.2.0         fs_1.3.1          
#> [31] tidyselect_0.2.5   rprojroot_1.3-2    glue_1.3.1        
#> [34] R6_2.4.0           processx_3.3.1     fansi_0.4.0       
#> [37] rmarkdown_1.12     bookdown_0.10      sessioninfo_1.1.1 
#> [40] tidyr_0.8.3        readr_1.3.1        purrr_0.3.2       
#> [43] callr_3.2.0        magrittr_1.5       backports_1.1.4   
#> [46] ps_1.3.0           htmltools_0.3.6    assertthat_0.2.1  
#> [49] utf8_1.1.4         stringi_1.4.3      crayon_1.3.4