Data cleaning step 2
Flagging records with problematic occurrence information using functions of the coordinatecleaner package.
clean_step2 <- clean_step1 %>%
filter(!is.na(decimallatitude),
!is.na(decimallongitude),
countrycode == "MG") %>% # "MG" is the iso code for Madagascar
cc_dupl() %>%
cc_zero() %>%
cc_equ() %>%
cc_val() %>%
cc_sea() %>%
cc_cap(buffer = 2000) %>%
cc_cen(buffer = 2000) %>%
cc_gbif(buffer = 2000) %>%
cc_inst(buffer = 2000)
print(paste0(nrow(gbif_download)-nrow(clean_step2), " records deleted; ",
nrow(clean_step2), " records remaining."))
Plotting raw records vs. cleaned records (step 2)
ggplot() +
geom_sf(data = world_map) +
geom_point(data = gbif_download,
aes(x = decimallongitude,
y = decimallatitude),
shape = "+",
color = "black") +
geom_point(data = clean_step2,
aes(x = decimallongitude,
y = decimallatitude),
shape = "+",
color = "red") +
theme_bw()
The black "" markers indicate the occurrences of the raw dataset; whereas the red "" markers indicate the occurrences of the cleaned dataset.
Zooming in to madagascar
ggplot() +
geom_sf(data = country_map) +
geom_point(data = gbif_download,
aes(x = decimallongitude,
y = decimallatitude),
shape = "+",
color = "black") +
geom_point(data = clean_step2,
aes(x = decimallongitude,
y = decimallatitude),
shape = "+",
color = "red") +
coord_sf(xlim = st_bbox(country_map)[c(1,3)],
ylim = st_bbox(country_map)[c(2,4)]) +
theme_bw()