knitr::opts_chunk$set(echo = TRUE)
The first step is to read in the sampled species names. These names are assigned to a vector.
library(ggplot2)
ggplot(family_composition, aes(x = Family)) +
geom_bar(aes(y = percent_present, fill = "1"), stat = "identity") +
geom_bar(aes(y = percent_absent, fill = "0"), stat = "identity") + # specify the two categories "1" and "0"
labs(x = "Family", y = "Percentage", fill = "Parietal eye value:") + # assign bars to columns
ggtitle("Composition of families by parietal eye value:") + # add title
scale_fill_manual(values = c("1" = "green", "0" = "red"),
name = "Parietal eye value:",
labels = c("absent", "present")) + # assign colors according to parietal eye state
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1),
title = element_text(face = "bold")) # adjust x-axis text by angel and change legend title to bold
library(knitr)
library(dplyr)
library(tidyr)
# filter families and exclude families with less than 10 representatives
filtered_data <- data_families %>%
group_by(Family) %>% # group by the families of the species
filter(n() > 10) %>%
ungroup()
#saveRDS(filtered_data, file="C:/Users/Daniel/Desktop/results_R/filtered_data.RDS")
# summarise all data for the table
summary_table <- filtered_data %>%
group_by(Family) %>%
summarise(
Present = mean(parietal_eye == 1) * 100, # present prietal eye percentage
Absent = 100 - Present, # absent parietal eye percentage
`Total count` = n() # family count
)
# adjust the table structure by adding column names to it
summary_table <- summary_table %>%
pivot_longer(cols = c(Present, Absent), names_to = "Eye_Status", values_to = "Percentage") %>%
pivot_wider(names_from = Eye_Status, values_from = Percentage)
# name the columns
summary_table <- summary_table[, c("Family", "Present", "Absent", "Total count")]
summary_table
The first analyses and calculations are carried out with “uncleaned data”. Later, the data will be cleaned for more precise results. This comparison should also provide information about the quality of the cleaned data compared to the uncleaned data. Here are the results of the uncleaned data.
At the very beginning, based on the sampled species names and the created “species_vector”, data about “scientificName”, “decimalLongitude” and “decimalLatitude” are downloaded from GBIF and put into a list called “gbif_data_uncleaned”:
library(rgbif)
gbif_data_uncleaned <- vector(mode = "list", length = length(species_vector))
for (i in 1:length(species_vector)) { # loop through "species_vector"
results <- occ_data(scientificName = species_vector[i], hasCoordinate = TRUE, limit = 500) #limit is set to n= 500 for downloaded cases of each species
results <- as.data.frame(results$data, col.names= c("scientificName", "decimalLongitude", "decimalLatitude")) # assignment of downloaded data to columns
if (nrow(results) > 0) {
gbif_data_uncleaned[[i]] <- results[,c("scientificName", "decimalLongitude", "decimalLatitude")]
names(gbif_data_uncleaned)[i] <- results$scientificName[1]
} else {
gbif_data_uncleaned[[i]] <- NA # conditional statement differentiates between available and not available data
}
}
gbif_data_uncleaned
# saveRDS(gbif_data_uncleaned, file="C:/Users/Daniel/Desktop/results_R/gbif_data_uncleaned.RDS")
gbif_data_uncleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/gbif_data_uncleaned.RDS")
The end of the list is shown as an example. This way you can see if the function that will be used later to clean up the downloaded coordinates is essential.
gbif_data_uncleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/gbif_data_uncleaned.RDS")
tail_gbif_data_uncleaned<- tail(gbif_data_uncleaned$`Zonosaurus ornatus (Gray, 1831)`,22)
tail_gbif_data_uncleaned
Furthermore, the total number of downloaded individuals is re cored to get an overview about the sample size:
# counting total rows of gbif_data_uncleaned:
total_rows_gbif_data_uncleaned <- 0
for (i in 1:length(gbif_data_uncleaned)) { # iteration through the list
if (inherits(gbif_data_uncleaned[[i]], "data.frame") || inherits(gbif_data_uncleaned[[i]], "tbl_df")) {
total_rows_gbif_data_uncleaned <- total_rows_gbif_data_uncleaned + nrow(gbif_data_uncleaned[[i]]) # adding up the rows
}
}
print(total_rows_gbif_data_uncleaned)
## [1] 175521
The median of the coordinates for each species is calculated because of the large number of individuals and also because it is more accurate.
library(dplyr)
library(purrr)
coordsmedian_uncleaned <- gbif_data_uncleaned %>%
map_df(~ .x %>%
group_by(scientificName) %>% # group the data by their "scientificName"
summarise(n = n(),
medianLat = median(decimalLatitude, na.rm = TRUE), # calculate median of latitude
medianLon = median(decimalLongitude, na.rm = TRUE))) %>% # calculate median of longitude
group_by(scientificName) %>%
summarise(medianLat = sum(n * medianLat) / sum(n), # calculate the mean latitude for each species
medianLon = sum(n * medianLon) / sum(n)) # calculate the mean longitude for each species
coordsmedian_uncleaned
#saveRDS(coordsmedian_uncleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian_uncleaned.RDS")
#coordsmedian_uncleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian_uncleaned.RDS")
At this point the sampled state of the parietal eye for each species is added to a new column in the list “coordsmedian.parietal_uncleaned”. In addition, all species that do not match the names in the Excel table are excluded from the final list.
coordsmedian.parietal_uncleaned<- coordsmedian_uncleaned
coordsmedian.parietal_uncleaned$parietal_eye <- NA # add a new column
for (i in 1:nrow(squamata_Amph.Lac.Serp_xl)) { # Loop through rows of "squamata_Amph.Lac.Serp_xl"
match_row <- which(sapply(strsplit(coordsmedian.parietal_uncleaned$scientificName, " "), function(x) any(x %in% strsplit(squamata_Amph.Lac.Serp_xl$Taxon[i], " ")[[1]]))) # find matching rows
if (length(match_row) > 0) {
coordsmedian.parietal_uncleaned$parietal_eye[match_row] <- squamata_Amph.Lac.Serp_xl$parietal_eye[i] # add the information to new column
}
}
coordsmedian.parietal_uncleaned
#saveRDS(coordsmedian.parietal_uncleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal_uncleaned.RDS")
#coordsmedian_uncleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal_uncleaned.RDS")
coordsmedian.parietalfiltered_uncleaned <- coordsmedian.parietal_uncleaned %>% # filter "coordsmean_uncleaned"
filter(parietal_eye == 1 | parietal_eye == 0)
#saveRDS(coordsmedian.parietalfiltered_uncleaned, #file="C:/Users/Daniel/Desktop/results_R/coordsmedian.parietalfiltered_uncleaned.RDS")
#coordsmedian.parietalfiltered_uncleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian.parietalfiltered_uncleaned.RDS")
The state of the parietal eye within the sampled squamates is then plotted on a world map to provide an insight into the median distribution of each species with information on the state of the parietal eye.
# plot median geographical distribution by state of parietal eye - non interactive world map (uncleaned)
library(maps)
library(ggplot2)
coordsmedian.parietalfiltered_uncleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian.parietalfiltered_uncleaned.RDS")
world_map_uncleaned <- map_data("world") # load data for the world map
point_colors <- c("#FF0000", "#99CC00") # assign colors to the two categories of the parietal eye
ggplot() +
geom_polygon(data = world_map_uncleaned, aes(x = long, y = lat, group = group), fill = "#99CCFF", color = "#00FFCC") + # plot the world map
geom_point(data = coordsmedian.parietalfiltered_uncleaned, aes(x = medianLon, y = medianLat, color = factor(parietal_eye)), # plot the median geographical coordinates of the species
shape = 1,
size = 0.5) +
xlab("Longitude") +
ylab("Latitude") +
coord_fixed() +
theme_bw() +
labs(color = "State of parietal eye:") +
ggtitle(expression(bold("Median geographical distribution by parietal eye (uncleaned data):"))) +
scale_color_manual(values = c("#FF0000", "#99CC00"), breaks = c("0", "1"), # two categories of parietal eye
labels = c("absent", "present"),
guide = guide_legend(override.aes = list(shape = 1, size = 3))) + # Adjust legend
theme(legend.key.size = unit(0.3, "cm"),
plot.title = element_text(size = rel(1), face = "bold"))
In the following part, the dependence of the different median latitudinal distributions on the state of the parietal eye is compared. First, the globe is split into its two hemispheres to prevent the values from cancelling each other out. from cancelling each other out, and to see the differences in the two gradients towards the poles, since the Earth’s mass is not distributed centrally between the hemispheres, but more towards the north.
coordsmediannorth_uncleaned <- coordsmedian.parietalfiltered_uncleaned %>%
filter(medianLat >0 ) # Filter data for North Hemisphere
# Create boxplots with median, 25- and 75-quantiles
ggplot(coordsmediannorth_uncleaned %>% filter(medianLat > 0), aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) + # Adjust boxplot width
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + #calculate median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (North Hemisphere uncleaned):") +
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend with the two categories
labs(fill = "Parietal eye:") + # Adjust legend title
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
The same procedure is done for the northern hemisphere, but for species living between the coordinates -125° and -30° degrees. This includes North and South America.
library(ggplot2)
coordsmediannorth_america_uncleaned <- coordsmedian.parietalfiltered_uncleaned %>%
filter(medianLon >= -125 & medianLon <= -30 & medianLat > 0) # Filter data between -30° and -125° longitude for North American species
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediannorth_america_uncleaned %>% filter(medianLat > 0), aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) + # Adjust boxplot width
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 25 quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 75 quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (American North Hemisphere uncleaned):") +
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") + # Adjust legend title
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
Despite the New World squamates, the Old World squamates are also analysed for the state of the parietal eye as a function of the median latitudinal distribution. For this procedure, the coordinate range is the opposite of that used for the Americas.
library(ggplot2)
coordsmediannorth_not_america_uncleaned <- coordsmedian.parietalfiltered_uncleaned %>%
filter(!(medianLon >= -125 & medianLon <= -30) & medianLat > 0) # Filter data that are not in -30° and -125° longitude to exclude American species
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediannorth_not_america_uncleaned %>% filter(medianLat > 0), aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) + # Adjust boxplot width
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (Not American North Hemisphere uncleaned):") +
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") + # Adjust legend title
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
The same procedure is used for the southern hemisphere. The structure of the code remains the same, the only difference being the selection of squamates whose species median latitude is in the southern part of the equator.
library(ggplot2)
coordsmediansouth_uncleaned <- coordsmedian.parietalfiltered_uncleaned %>%
filter(medianLat < 0 ) # Filter data for South Hemisphere
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediansouth_uncleaned %>% filter(medianLat < 0), aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) + # Adjust boxplot width
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 75-uantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (South Hemisphere uncleaned):") +
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") + # Adjust legend title
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
As above for the species living in the northern hemisphere of the Americas, here is the counterpart for the southern hemisphere.
library(ggplot2)
coordsmediansouth_america_uncleaned <- coordsmedian.parietalfiltered_uncleaned %>%
filter(medianLon >= -125 & medianLon <= -30 & medianLat < 0) # Filter data that are in -30° and -125° longitude for South Hemisphere in America
# Create boxplots with mean, 25-and 75-quantiles
ggplot(coordsmediansouth_america_uncleaned, aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) +
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 25-qunatile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (American South Hemisphere uncleaned):") + # Adjust title
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") + # Adjust legend title
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
Here is the calculation of species that have their median latitudinal distribution in the southern hemisphere of the globe, excluding species from America.
library(ggplot2)
coordsmediansouth_not_america_uncleaned <- coordsmedian.parietalfiltered_uncleaned %>%
filter(!(medianLon >= -125 & medianLon <= -30) & medianLat < 0) # Filter data that are in -30° and -125° longitude for rest of the world despite America
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediansouth_not_america_uncleaned, aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) +
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate #median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 25-qunatile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate 75-qunatile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (Not American South Hemisphere uncleaned):") + # Adjust title
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend labels
labs(fill = "Parietal Eye:") + # Adjust legend title
theme(plot.title = element_text(size = 11, face = "bold"), # Increase title size
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
In addition, a statistical evaluation of the dependence of the parietal eye state on the median latitudinal distribution of squamates in certain areas of the world is made.
t_test_northhemisphere_uncleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediannorth_uncleaned)
print(t_test_northhemisphere_uncleaned)
t_test_northhemisphere_america_uncleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediannorth_america_uncleaned)
print(t_test_northhemisphere_america_uncleaned)
t_test_northhemisphere_not_america_uncleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediannorth_not_america_uncleaned)
print(t_test_northhemisphere_not_america_uncleaned)
t_test_southhemisphere_uncleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediansouth_uncleaned)
print(t_test_southhemisphere_uncleaned)
t_test_southhemisphere_america_uncleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediansouth_america_uncleaned)
print(t_test_southhemisphere_america_uncleaned)
t_test_southhemisphere_not_america_uncleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediansouth_not_america_uncleaned)
print(t_test_southhemisphere_not_america_uncleaned)
At this point it is important to consider the accuracy of the data stored in “gbif_data_uncleaned”. There are several cases that indicate biased medians due to duplicate or incorrect coordinates, here is a little more insight into the data stored in “gbif_data_uncleand”.
tail_gbif_data_uncleaned<- tail(gbif_data_uncleaned$`Zonosaurus ornatus (Gray, 1831)`,22)
tail_gbif_data_uncleaned
It becomes clear that many of the downloaded data distort the median distribution of the species. To get rid of these coordinates, there is a useful package called “CoordinateCleaner”. Its functions remove duplicate or incorrect coordinates. In this way a more correct median distribution of the species can be calculated.
#extract with "CoordinateCleaner" from "GBIF" information about "scientificName", "decimalLongitude"and "decimalLatitude" and put them in a list called "gbif_data_cleaned"
library(sp)
library(CoordinateCleaner)
gbif_data_cleaned <- vector(mode = "list", length = length(squamata_Amph.Lac.Serp_xl$Taxon)) # create a vector
removed_data_count <- 0 # start the count of removed data
for (i in 1:length(species_vector)) { # Iteration through species names
results <- occ_data(scientificName = species_vector[i], hasCoordinate = TRUE, limit = 500) # set download limit for each speces to n=500
results <- as.data.frame(results$data, col.names = c("scientificName", "decimalLongitude", "decimalLatitude")) # store the results in the named columns
if (nrow(results) > 0) {
results_clean <- results
before_cleaning <- nrow(results_clean)
results_clean <- cc_cap(results_clean) # remove coordinates of capitals
removed_data_count <- removed_data_count + (before_cleaning - nrow(results_clean))
results_clean <- cc_cen(results_clean) # remove coordinates of country centroids
removed_data_count <- removed_data_count + (before_cleaning - nrow(results_clean))
results_clean <- cc_dupl(results_clean) # remove duplicated coordinates
removed_data_count <- removed_data_count + (before_cleaning - nrow(results_clean))
results_clean <- cc_equ(results_clean)
removed_data_count <- removed_data_count + (before_cleaning - nrow(results_clean))
results_clean <- cc_gbif(results_clean) # remove coordinates of gbif headquaters
removed_data_count <- removed_data_count + (before_cleaning - nrow(results_clean))
results_clean <- cc_inst(results_clean) # remove coordinates of biological institutions
removed_data_count <- removed_data_count + (before_cleaning - nrow(results_clean))
results_clean <- cc_val(results_clean) # remove invalid coordinates
removed_data_count <- removed_data_count + (before_cleaning - nrow(results_clean)) #count total number of removed coordinates
gbif_data_cleaned[[i]] <- results_clean[, c("scientificName", "decimalLongitude", "decimalLatitude")]
names(gbif_data_cleaned)[i] <- species_vector[i]
} else {
gbif_data_cleaned[[i]] <- NA
}
}
gbif_data_cleaned
# saveRDS(gbif_data_uncleaned, file="C:/Users/Daniel/Desktop/results_R/gbif_data_cleaned.RDS")
# gbif_data_cleaned<-readRDS("C:/Users/Daniel/Desktop/results_R/gbif_data_cleaned.RDS")
#gbif_data_cleaned<-readRDS("C:/Users/Daniel/Desktop/results_R/gbif_data_cleaned.RDS)
The total number of removed coordinates is also printed to give an idea of the huge potential of the “CoordinateCleaner” package:
removed_data_count <- readRDS("C:/Users/Daniel/Desktop/results_R/removed_data_count.RDS")
removed_data_count
## [1] 333108
All of these removed data may bias the result of the uncleaned analysis above.
Additionally here is a display of the sample size of individuals within “gbif_data_cleaned”.
total_rows_gbif_data_cleaned <- 0
for (i in 1:length(gbif_data_cleaned)) {
if (inherits(gbif_data_cleaned[[i]], "data.frame") || inherits(gbif_data_cleaned[[i]], "tbl_df")) {
total_rows_gbif_data_cleaned <- total_rows_gbif_data_cleaned + nrow(gbif_data_cleaned[[i]])
}
}
print(total_rows_gbif_data_cleaned)
With a sample size that is from such an enormous size and even is represented by cleaned coordinates, the statistical evaluation can be repeated.
# calculate the centroid distribution of each species from "gbif_data_cleaned":
coordsmedian_cleaned <- gbif_data_cleaned %>% #pipe in data of "gbif_data_uncleaned"
map_df(~ .x %>%
group_by(scientificName) %>% #group the data by "scientificName"
summarise(n = n(),
medianLat = median(decimalLatitude, na.rm = TRUE),
medianLon = median(decimalLongitude, na.rm = TRUE))) %>%
group_by(scientificName) %>%
summarise(medianLat = sum(n * medianLat) / sum(n), # calculate the median latitude for each species
medianLon = sum(n * medianLon) / sum(n)) # calculate the median longitude for each species
coordsmedian_cleaned
#saveRDS(coordsmedian_cleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian_cleaned.RDS")
Next, the median coordinates of each species are again made dependent on the state of the parietal eye that has been sampled. A final step is to remove the “BOLD” species from the list. Furthermore, the list is cleared of species with no parietal eye state information available.
# add a new column to "coordsmean_uncleaned" called "parietal_eye" with information of "squamata_Amph.Lac.Serp_xl" and exclude GBIF Backbone Taxonomy (BOLD) in the list "coordsmedianfiltered.parietal_uncleaned"
coordsmedian.parietal_cleaned<- coordsmedian_cleaned
coordsmedian.parietal_cleaned$parietal_eye <- NA # create a new column
for (i in 1:nrow(squamata_Amph.Lac.Serp_xl)) { # Loop through rows of "squamata_Amph.Lac.Serp_xl"
match_row <- which(sapply(strsplit(coordsmedian.parietal_cleaned$scientificName, " "), function(x) any(x %in% strsplit(squamata_Amph.Lac.Serp_xl$Taxon[i], " ")[[1]])))
if (length(match_row) > 0) {
coordsmedian.parietal_cleaned$parietal_eye[match_row] <- squamata_Amph.Lac.Serp_xl$parietal_eye[i] # add information to new column if there are matching names
}
}
coordsmedian.parietal_cleaned
#saveRDS(coordsmedian.parietal_cleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal_cleaned.RDS")
coordsmedian.parietalfiltered_cleaned <- coordsmedian.parietal_cleaned %>% # filter coordsmedian.parietal_cleaned for the both categories of the parietal eye to exclude all others
filter(parietal_eye == 1 | parietal_eye == 0)
#saveRDS(coordsmedian.parietalfiltered_cleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian.parietalfiltered_cleaned")
At this point, the data is again plotted on a world map with information about the state of the parietal eye as a function of latitude.
library(ggplot2)
coordsmedian.parietalfiltered_cleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian.parietalfiltered_cleaned")
# plot median geographical distribution by state of parietal eye - non interactive world map
coordsmedian.parietalfiltered_cleaned <- coordsmedian.parietalfiltered_cleaned %>%
filter(!is.na(parietal_eye) & parietal_eye %in% c("0", "1")) # excluding rows with NA
world_map_cleaned <- map_data("world") # load data for world map
point_colors <- c("#FF0000", "#99CC00") # assigning the colors to the two categories
ggplot() +
geom_polygon(data = world_map_cleaned, aes(x = long, y = lat, group = group), fill = "#99CCFF", color = "#00FFCC") + # plot world map
geom_point(data = coordsmedian.parietalfiltered_cleaned, aes(x = medianLon, y = medianLat, color = factor(parietal_eye)), # plot median geographical coordinates
shape = 1,
size = 0.5) +
xlab("Longitude") +
ylab("Latitude") +
coord_fixed() +
theme_bw() +
labs(color = "State of parietal eye:") +
ggtitle(expression(bold("Median geographical distribution by parietal eye (cleaned data):"))) + # Adjust title
scale_color_manual(values = c("#FF0000", "#99CC00"), breaks = c("0", "1"), # two categorical states assigned to colors
labels = c("absent", "present"),
guide = guide_legend(override.aes = list(shape = 1, size = 3))) + # Adjust legend
theme(legend.key.size = unit(0.3, "cm"),
plot.title = element_text(size = rel(1), face = "bold"))
This graph shows the status of the parietal eye in families with at least 10 analysed species in relation to the latitudinal distribution. Only families with a constant parietal eye state of 0 or 100% are included in the statistics.`
library(dplyr)
coordsmedian_cleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian_cleaned.RDS")
filtered_data<- readRDS("C:/Users/Daniel/Desktop/results_R/filtered_data.RDS")
filtered_data$medianLat<- NA # add new column named "medianLat"
# filter data for matching rows and ad information about "medianLat" for the species
for (i in 1:nrow(filtered_data)) {
taxon <- filtered_data$Taxon[i]
matching_rows <- grep(paste(unlist(strsplit(taxon, "\\s+")), collapse=".*"), coordsmedian_cleaned$scientificName)
if (length(matching_rows) > 0) {
matching_row <- coordsmedian_cleaned[matching_rows[1], ]
filtered_data$medianLat[i] <- matching_row$medianLat
}
}
# only keep families with 10 or more sampled species and 0% or 100% parietal eye occurence
keep_families <- c("Amphisbaenidae", "Diplodactylidae", "Gekkonidae", "Gymnophthalmidae", "Phyllodactylidae", "Sphaerodactylidae", "Teiidae", "Anguidae", "Anolidae", "Leiosauridae", "Liolaemidae", "Phrynosomatidae")
# Filter families
filtered_data <- filtered_data %>%
filter(Family %in% keep_families)
# Calculate mean medianLat per family
mean_medianLat <- filtered_data %>%
group_by(Family) %>%
summarize(mean_medianLat = mean(medianLat, na.rm = TRUE))
# Merge Parietal Eye data
mean_medianLat <- merge(mean_medianLat, summary_table, by = "Family")
# Create new column parietal_eye
mean_medianLat$parietal_eye <- ifelse(mean_medianLat$Present == 100, 1, 0)
# Calculate positive and negative means for green (present) points
mean_green_positive <- mean(mean_medianLat$mean_medianLat[mean_medianLat$parietal_eye == 1 & mean_medianLat$mean_medianLat > 0], na.rm = TRUE)
mean_green_negative <- mean(mean_medianLat$mean_medianLat[mean_medianLat$parietal_eye == 1 & mean_medianLat$mean_medianLat < 0], na.rm = TRUE)
# Calculate positive and negative means for red (absent) points
mean_red_positive <- mean(mean_medianLat$mean_medianLat[mean_medianLat$parietal_eye == 0 & mean_medianLat$mean_medianLat > 0], na.rm = TRUE)
mean_red_negative <- mean(mean_medianLat$mean_medianLat[mean_medianLat$parietal_eye == 0 & mean_medianLat$mean_medianLat < 0], na.rm = TRUE)
ggplot(mean_medianLat, aes(x = Family, y = mean_medianLat, color = factor(parietal_eye))) +
geom_point(size = 3) +
geom_text(aes(label = round(mean_medianLat, 2)), hjust = -0.2, vjust = -0.4, size = 3, fontface = "plain") +
scale_color_manual(values = c("#FF0000", "#99CC00"),
labels = c("0"= "absent", "1" = "present")) + # adjust legend labels
labs(x = "Family", y = "Median Lat", color = "Parietal eye:") +
geom_hline(yintercept = c(mean_green_positive, mean_green_negative), color = "#99CC00", linetype = "solid") + # add mean line for green positives
geom_hline(yintercept = c(mean_red_positive, mean_red_negative), color = "#FF0000", linetype = "solid") + # add mean line for red negatives
annotate("text", y = mean_green_positive, x = 1, label = paste(round(mean_green_positive, 2)), hjust = 0.8, vjust = -0.6, size = 3, fontface = "plain") +
annotate("text", y = mean_green_negative, x = 1, label = paste(round(mean_green_negative, 2)), hjust = 0.8, vjust = -0.6, size = 3, fontface = "plain") +
annotate("text", y = mean_red_positive, x = 1, label = paste(round(mean_red_positive, 2)), hjust = 0.8, vjust = -0.6, size = 3, fontface = "plain") +
annotate("text", y = mean_red_negative, x = 1, label = paste(round(mean_red_negative, 2)), hjust = 0.8, vjust = -0.6, size = 3, fontface = "plain") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), # adjust x-axis labels
plot.title = element_text(face = "bold", size = 11), # make the plot title bold
legend.title = element_text(face = "bold")) + # make the legend title bold
ggtitle("Parietal eye state in dependence to families' median latitude:") +
guides(color = guide_legend(override.aes = list(label = ""))) # correct the legend labels
Although the distribution of parietal eye states as a function of latitude gives a first inside, it is not possible to assign the points to species. For this reason, an interactive world map with a drop-down menu and tooltip has been created, which gives a closer inset for each plotted point.
library(usethis)
library(devtools)
library(shiny)
library(leaflet)
# plot median geographical distribution by state of parietal eye -interactive world map with drop down menu and tooltip ("scientificName", "medianLat", "meanLon" and "parietal_eye")
coordsmedian.parietalfiltered_cleaned$point_colors <- ifelse(coordsmedian.parietalfiltered_cleaned$parietal_eye == 1, "#99CC00", "#FF0000") # assign colors
# create Tooltip with information of the columns "scientificName", "medianLat", "medianLon" and "eye_status"
coordsmedian.parietalfiltered_cleaned$tooltip <- paste("Species:", "<i>", coordsmedian.parietalfiltered_cleaned$scientificName, "</i>",
"<br>Parietal Eye:", ifelse(coordsmedian.parietalfiltered_cleaned$parietal_eye == 1, "present", "absent"),
"<br>Median Latitude:", coordsmedian.parietalfiltered_cleaned$medianLat,
"<br>Median Longitude:", coordsmedian.parietalfiltered_cleaned$medianLon) # creating the three parameters of the Tooltip
# Define the UI
ui <- fluidPage(
titlePanel("State of parietal eye in dependency to median geographical distribution (cleaned):"), #add the title to the graphic
fluidRow(
column(3,
selectizeInput("selectScientificName", "Select Scientific Names", # creating the drop down menu with its scientific names
choices = c("All", unique(gsub("\\(.?\\)|\\[.?\\]", "", coordsmedian.parietalfiltered_cleaned$scientificName))), # optional choices of the drop down menu
multiple = TRUE, # allow to select several scientific names
selected = "All" # option of selecting all species
)
),
column(9,
leafletOutput("map")
)
)
)
# Define the server logic
server <- function(input, output, session) {
filtered_data_cleaned <- reactive({
if ("All" %in% input$selectScientificName) {
return(coordsmedian.parietalfiltered_cleaned)
} else {
return(coordsmedian.parietalfiltered_cleaned[coordsmedian.parietalfiltered_cleaned$scientificName %in% input$selectScientificName, ])
}
}) # returning the selected scientific names within the drop down menu in the leaflet map
# Render the leaflet map
output$map <- renderLeaflet({
leaflet() %>% # Initialization
addProviderTiles("OpenStreetMap.Mapnik") %>%
addCircleMarkers(data = filtered_data_cleaned(),
lng = ~medianLon,
lat = ~medianLat,
color = ~point_colors,
radius = 3,
fillOpacity = 1,
popup = ~tooltip) %>% #Adjust the plot
addLegend("bottomleft", # Adjust the legend
colors = c("#FF0000", "#99CC00"),
labels = c("absent", "present"),
title = "State of parietal eye")
})
}
shinyApp(ui = ui, server = server) # Run the Shiny app
# get access to interactive world map by: Listening on http://127.0.0.1:6064
By clicking on the following link the interactive world map can be opend.
Statistical analysis of the cleaned data is also essential for further work and to gain insight into the interplay between squamate parietal eye condition and latitudinal distribution.
library(ggplot2)
# statistics of parietal eye state in dependency of latitudinal distribution:
coordsmediannorth_cleaned <- coordsmedian.parietalfiltered_cleaned %>%
filter(medianLat >0 ) # Filter data for North Hemisphere
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediannorth_cleaned %>% filter(medianLat > 0), aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) +
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # Median value
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (North Hemisphere cleaned):") + # Adjust title
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") +
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
Again, the interplay is also calculated for American north hemisphere.
library(ggplot2)
coordsmediannorth_america_cleaned <- coordsmedian.parietalfiltered_cleaned %>%
filter(medianLon >= -125 & medianLon <= -30 & medianLat > 0) # Filter data between -30° and -125° longitude for North hemisphere of America
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediannorth_america_cleaned %>% filter(medianLat > 0), aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) +
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate the median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (American North Hemisphere cleaned):") + # Adjust title
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend labels
labs(fill = "Parietal Eye:") + # Adjust legend title
theme(plot.title = element_text(size = 11, face = "bold"), # Increase title size
legend.position = "right",
legend.title = element_text(size = 12), # Increase legend title size
legend.text = element_text(size = 12)) # Increase legend text size
Here the counterpart of the American North Hemisphere is calculated by looking at the rest of the North Hemisphere coordinates despite those of America.
library(ggplot2)
coordsmediannorth_not_america_cleaned <- coordsmedian.parietalfiltered_cleaned %>%
filter(!(medianLon >= -125 & medianLon <= -30) & medianLat > 0) # Filter data despite -30° and -125° longitude to exclude North Hemisphere of America
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediannorth_not_america_cleaned, aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) +
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate the median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (Not American North Hemisphere cleaned):") + # Adjust title
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") +
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
Here is the graphic that represents the cleaned data of the South Hemisphere of the whole globe.
library(ggplot2)
coordsmediansouth_cleaned <- coordsmedian.parietalfiltered_cleaned %>%
filter(medianLat < 0 ) # Filter data for South Hemisphere
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediansouth_cleaned %>% filter(medianLat < 0), aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) +
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate the median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (South Hemisphere cleaned):") + # Adjust title
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") +
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
In this part the statistics of American´s southern hemisphere are calculated
library(ggplot2)
coordsmediansouth_america_cleaned <- coordsmedian.parietalfiltered_cleaned %>%
filter(medianLon >= -125 & medianLon <= -30 & medianLat < 0) # filtere coordinates for south hemisphere of America
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediansouth_america_uncleaned, aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) +
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate the median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (American South Hemisphere cleaned):") + # Adjust title
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") +
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
Furthermore, the counterpart of the American southern hemisphere coordinates is calculated by looking at the Old World squamates.
library(ggplot2)
coordsmediansouth_not_america_cleaned <- coordsmedian.parietalfiltered_cleaned %>%
filter(!(medianLon >= -125 & medianLon <= -30) & medianLat < 0) # filter coordinates despite the South Hemisphere of America
# Create boxplots with mean, 25- and 75-quantiles
ggplot(coordsmediansouth_not_america_cleaned, aes(x = factor(parietal_eye), y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 0.75) +
stat_summary(fun = median, geom = "text", aes(label = round(..y.., 2)), vjust = 1, size = 3, color = "#000000") + # calculate the median
stat_summary(fun = function(x) quantile(x, 0.25), geom = "text", aes(label = round(..y.., 2)), vjust = 1.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 25-quantile
stat_summary(fun = function(x) quantile(x, 0.75), geom = "text", aes(label = round(..y.., 2)), vjust = -0.5, hjust = 1.1, size = 3, color = "#3399CC") + # calculate the 75-quantile
xlab("Parietal eye") +
ylab("Median latitude") +
ggtitle("Median latitudinal distribution (Not American South Hemisphere cleaned):") + # Adjust title
theme_bw() +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") +
theme(plot.title = element_text(size = 11, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 12))
This boxplot graph contains all the cleaned boxplots. This gives a better overview of the effect of different regions on the effect on parietal eye state.
all_data <- bind_rows(
coordsmediannorth_cleaned %>% mutate(Location = "North"),
coordsmediannorth_america_cleaned %>% mutate(Location = "North (America)"),
coordsmediannorth_not_america_cleaned %>% mutate(Location = "North (not America"),
coordsmediansouth_cleaned %>% mutate(Location = "South"),
coordsmediansouth_america_cleaned %>% mutate(Location = "South (America)"),
coordsmediansouth_not_america_cleaned %>% mutate(Location = "South (not America)") # create the combined list for the cleaned boxplots
)
# create the combined boxplot
plot_combined <- ggplot(all_data, aes(x = Location, y = medianLat, fill = factor(parietal_eye))) +
geom_boxplot(width = 1) +
stat_summary(data = subset(all_data, parietal_eye == 1), fun = median, geom = "text", aes(x = Location, label = round(..y.., 2)), vjust = -0.8, hjust= -0.1, size = 2.4, color = "#000000") + # calculate median for category 1
stat_summary(data = subset(all_data, parietal_eye == 0), fun = median, geom = "text", aes(x = Location, label = round(..y.., 2)), vjust = -0.8, hjust= 1.1, size = 2.4, color = "#000000") + # calculate median for category 0
ylab("Mean Latitude") +
xlab("Hemisphere") +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust legend
labs(fill = "Parietal eye:") +
ggtitle("Median latitudinal distribution by Hemisphers, New- and Old World Squamates:") +
theme_bw() +
theme(plot.title = element_text(size = 9.5, face = "bold"),
legend.position = "right",
legend.title = element_text(size = 9),
axis.text.x = element_text(angle = 45, hjust = 1))
print(plot_combined)
As with the unpurified data above, here is the purified data plotted above and tested with a t-test. For the statistical tests, the specific selected lists are used according to the region of the world analysed.
t_test_northhemisphere_cleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediannorth_cleaned)
print(t_test_northhemisphere_cleaned)
t_test_northhemisphere_america_cleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediannorth_america_cleaned)
print(t_test_northhemisphere_america_cleaned)
t_test_northhemisphere_not_america_cleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediannorth_not_america_cleaned)
print(t_test_northhemisphere_not_america_cleaned)
t_test_southhemisphere_cleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediansouth_cleaned)
print(t_test_southhemisphere_cleaned)
t_test_southhemisphere_america_cleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediansouth_america_cleaned)
print(t_test_southhemisphere_america_cleaned)
t_test_southhemisphere_not_america_cleaned <- t.test(medianLat ~ parietal_eye, data = coordsmediansouth_not_america_cleaned)
print(t_test_southhemisphere_not_america_cleaned)
Beside the dependency of the latitude to the state of the parietal eye, also the circadian rhythm is analysed. Therefore the researched data is used. There are three categories of circadian rhythm which are abbreviated by their first letter (nocturnal, diurnal, cathemeral)
At first a list with information about the coordinates distribution of each species as well as their circadian rhythm is created. Additionally the rows that have no information in the excel data about the circadian rhythm are also excluded in the list “coordsmedian.parietal.cirica_cleaned”.
coordsmedian.parietal.cirica_cleaned <- coordsmedian.parietalfiltered_cleaned
coordsmedian.parietal.cirica_cleaned$circadian_rhythm <- NA # create new column
for (i in 1:nrow(squamata_Amph.Lac.Serp_xl)) { # loop through "squamata_Amph.Lac.Serp_xl" and add the information to "coordsmedian.parietal.cirica_cleaned"
match_row <- which(sapply(strsplit(coordsmedian.parietal.cirica_cleaned$scientificName, " "),
function(x) any(x %in% strsplit(squamata_Amph.Lac.Serp_xl$Taxon[i], " ")[[1]]))) # searching for matching rows in both data
if (length(match_row) > 0) {
coordsmedian.parietal.cirica_cleaned$circadian_rhythm[match_row] <- squamata_Amph.Lac.Serp_xl$circadian_rhythm[i] # add information to the column
}
}
coordsmedian.parietal.ciricafiltered_cleaned <- coordsmedian.parietal.cirica_cleaned[coordsmedian.parietal.cirica_cleaned$circadian_rhythm %in% c("c", "n", "d"), ] # filter data and exclude rows with "NA"
#saveRDS(coordsmedian.parietal.ciricafiltered_cleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal.ciricafiltered_cleaned.RDS")
With the list plotted above a graphical display of the data, which show the dependence of the circadian rhythm to the parietal eye state, can be made.
ggplot(coordsmedian.parietal.ciricafiltered_cleaned, aes(x = factor(circadian_rhythm), fill = factor(parietal_eye))) + # assigning the color factor
geom_bar(position = "stack") +
labs(title = "Distribution of parietal eye by circadian rhythm:", # Making title bold
x = "Circadian rhythm",
y = "Count",
fill = "Parietal eye:") +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust labels by colors
theme(plot.title = element_text(face = "bold"))
Furthermore a graphic showing a percentage presentation is also important.
# Calculate the proportions of the three categories
proportions <- coordsmedian.parietal.ciricafiltered_cleaned %>%
group_by(parietal_eye, circadian_rhythm) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
# Plot with ggplot() the percentual circadian rhythm
ggplot(proportions, aes(x = circadian_rhythm, y = percent, fill = factor(parietal_eye))) +
geom_bar(stat = "identity", position = "fill") +
labs(title = "Distribution of parietal eye by circadian rhythm:",
x = "Circadian rhythm",
y = "Proportion",
fill = "Parietal eye:") +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present"))+ # Adjust labels by color
theme(plot.title = element_text(face = "bold"))
To avoid giving the impression that there is a clear tendency for cathemeral squamates to be diurnal, the absolute numbers have to be added to the graph.
library(ggplot2)
library(dplyr)
coordsmedian.parietal.ciricafiltered_cleaned <- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal.ciricafiltered_cleaned.RDS")
# Calculate the proportions of the three categories
proportions <- coordsmedian.parietal.ciricafiltered_cleaned %>%
group_by(parietal_eye, circadian_rhythm) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
`summarise()` has grouped output by 'parietal_eye'. You can override using the
`.groups` argument.
# Plot percentual circadian rhythm with ggplot()
ggplot(proportions, aes(x = circadian_rhythm, y = percent, fill = factor(parietal_eye))) +
geom_bar(stat = "identity", position = "fill") +
geom_text(aes(label = count), position = position_fill(vjust = 0.5), size = 3) + # Add the absolute numbers
labs(title = "Distribution of parietal eye by circadian rhythm:",
x = "Circadian Rhythm",
y = "Proportion",
fill = "Parietal eye:") +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) + # Adjust labels by colors
theme(plot.title = element_text(face = "bold"))
As proceeded above with the median latitude also the statistics for the circadian rhythm in dependence to the parietal eye state has to be calculated.
chisq_circadianrhythm <-chisq.test(coordsmedian.parietal.ciricafiltered_cleaned$parietal_eye, coordsmedian.parietal.ciricafiltered_cleaned$circadian_rhythm)
chisq_circadianrhythm
fisher_test_circadianrhythm <- fisher.test(table(coordsmedian.parietal.ciricafiltered_cleaned$parietal_eye, coordsmedian.parietal.ciricafiltered_cleaned$circadian_rhythm))
print(fisher_test_circadianrhythm)
In both cases the chi square and the fishers exact test are highly significant results presented.
The final analysed factor towards the parietal eye state is the habitat of the squamates.
To analyse the habitat in dependence to the habitat, the researched habitats are assigned towards the species.
#create a new column in list "coordsmedian.parietal.habitat_cleaned" and fill it with data out of "squamata_Amph.Lac.Serp_xl$habitat_abb"
coordsmedian.parietal.habitat_cleaned<- coordsmedian.parietalfiltered_cleaned
coordsmedian.parietal.habitat_cleaned$habitat_abb<- NA # assign new column
for (i in 1:nrow(squamata_Amph.Lac.Serp_xl)) { # loop through "squamata_Amph.Lac.Serp_xl"
match_row <- which(sapply(strsplit(coordsmedian.parietal.habitat_cleaned$scientificName, " "), function(x) any(x %in% strsplit(squamata_Amph.Lac.Serp_xl$Taxon[i], " ")[[1]]))) # Find matching rows of"coordsmedian.parietal.ciricafiltered.habitat_cleaned" and "squamata_Amph.Lac.Serp_xl$Taxon"
if (length(match_row) > 0) {
coordsmedian.parietal.habitat_cleaned$habitat_abb[match_row] <- squamata_Amph.Lac.Serp_xl$habitat_abb[i] # add information of "squamata_Amph.Lac.Serp_xl$habitat_abb" to "coordsmedian.parietal.cirica_cleaned$habitat_abb" if it matches
}
}
coordsmedian.parietal.habitat_cleaned
#saveRDS(coordsmedian.parietal.habitat_cleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal.habitat_cleaned.RDS")
Due to the large number of different habitats, it is necessary to categorise them. There are several ways to categorise them. One of them is the IUCN Red List categorisation shown here. The allocation of the different habitat categories is shown below. The longevity of the habitats studied can be seen in the thesis.
#create a new column called "coordsmean.parietal.habitat_cleaned$habitat_category" that puts "squamata_Amph.Lac.Serp_xl$habitat_abb" in diffrent categories of IUCN red list
coordsmedian.parietal.habitat_cleaned$habitat_category <- NA # create a new column
habitat_mapping <- c(
"ba" = "Marine Coastal/Supratidal",
"be" = "Marine Coastal/Supratidal",
"bj" = "Rocky Areas",
"cl" = "Forest",
"co" = "Marine Coastal/Supratidal",
"de" = "Wetland",
"dt" = "Desert",
"du" = "Desert",
"dv" = "Desert",
"fo" = "Forest",
"fw" = "Wetland",
"gl" = "Grassland",
"he" = "Forest",
"jw" = "Forest",
"la" = "Wetland",
"lg" = "Wetland",
"ls" = "Rocky Areas",
"ma" = "Wetland",
"mf" = "Rocky Areas",
"mn" = "Rocky Areas",
"mo" = "Rocky Areas",
"mp" = "Rocky Areas",
"mr" = "Rocky Areas",
"pa" = "Grassland",
"pr" = "Grassland",
"ri" = "Wetland",
"rm" = "Wetland",
"sd" = "Desert",
"sf" = "Forest",
"sp" = "Grassland",
"ty" = "Wetland",
"un" = "Forest",
"vr" = "Rocky Areas",
"wa" = "Wetland",
"wc" = "Forest"
) # assign new categories
coordsmedian.parietal.habitat_cleaned$habitat_category <- habitat_mapping[as.character(coordsmedian.parietal.habitat_cleaned$habitat_abb)]
coordsmedian.parietal.habitat_cleaned
#saveRDS(coordsmedian.parietal.habitat_cleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal.habitat_cleaned.RDS")
As proceeded before, rows without information of habitats have to be excluded from the list.
# create a new list in which only these rows are included, saving information of the "habitat_category":
coordsmedian.parietal.habitatfiltered_cleaned<- coordsmedian.parietal.habitat_cleaned
coordsmedian.parietal.habitatfiltered_cleaned<- coordsmedian.parietal.habitatfiltered_cleaned[!is.na(coordsmedian.parietal.habitatfiltered_cleaned$habitat_category), ] # filter data and exclude these with NA
coordsmedian.parietal.habitatfiltered_cleaned$parietal_eye_filtered <- ifelse(is.na(coordsmedian.parietal.habitatfiltered_cleaned$habitat_category), NA, coordsmedian.parietal.habitatfiltered_cleaned$parietal_eye)
coordsmedian.parietal.habitatfiltered_cleaned
#saveRDS(coordsmedian.parietal.habitatfiltered_cleaned, file="C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal.habitatfiltered_cleaned.RDS")
#coordsmedian.parietal.habitatfiltered_cleaned<- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal.habitatfiltered_cleaned.RDS")
Moreover the data for the habitat categories in dependence to the parietal eye state are displayed graphically.
# absolute plot of dependency of parietal eye in relation to habitat categorie of IUCN red list:
ggplot(coordsmedian.parietal.habitatfiltered_cleaned, aes(x = habitat_category, fill = factor(parietal_eye_filtered, labels = c("absent", "present")))) +
geom_bar(position = "stack", stat = "count") +
geom_text(stat = 'count', aes(label = ..count..), position = position_stack(vjust = +0.5), size = 2.5, color = "#000000") +
labs(title = "Absolute distribution of parietal eye by habitat according to IUCN Red list:",
x = "Habitat category",
y = "Count",
fill = "Parietal eye:") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 0.85),
plot.title = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 10),
axis.text = element_text(size = 8)) +
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present"))
For the same data there is also a percentage presentation of the habitat distribution.
coordsmedian.parietal.habitatfiltered_cleaned<- readRDS("C:/Users/Daniel/Desktop/results_R/coordsmedian.parietal.habitatfiltered_cleaned.RDS")
library(ggplot2)
# percentage presentation of parietal eye in dependence to habitat categories of IUCN Red list:
ggplot(coordsmedian.parietal.habitatfiltered_cleaned, aes(x = habitat_category, fill = factor(parietal_eye, labels = c("Absent", "Present")))) +
geom_bar(position = "fill") +
geom_text(stat='count', aes(label=..count..), position = position_fill(vjust = 0.5), size = 2.5) + # adjust geom-text
labs(title = "Procentual distribution of parietal eye by habitat according to IUCN Red list:",
x = "Habitat category",
y = "Proportion",
fill = "Parietal eye:") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 0.85), # adjust axis
plot.title = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 10),
axis.text = element_text(size = 8))+
scale_fill_manual(values = c("#FF0000", "#99CC00"), labels = c("absent", "present")) # assign bicategorical filling colors
A statistical display of the interplay between the habitat and the parietal eye state is also given down below
# chi-square for "parietal_eye_filtered" in dependence of "habitat_category":
chisq_test_habitat <- chisq.test(coordsmedian.parietal.habitatfiltered_cleaned$parietal_eye_filtered, coordsmedian.parietal.habitatfiltered_cleaned$habitat_category)
chisq_test_habitat
The Chi-square test gives a highly significant result between the habitat of the species and the condition of the parietal eye.
Due to the fact that closely related species share a lot of phylogenetic history, character traits, or external factors such as parietal eye status or latitudinal distribution, species cannot be viewed independently. Therefore, phylogenetic structures must be taken into account.
Here is a display of the phylogenetic tree of all sampled squamates. Information on branch lengths, nodes and the overall topology of the tree is stored in Zheng’s phylogenetic tree
library(ape)
##
## Attache Paket: 'ape'
## Das folgende Objekt ist maskiert 'package:dplyr':
##
## where
library(phytools)
# creating a new species_vector with names for the data of the phylogenetic tree of Zheng
species_vector_phyl <- gsub(" ", "_", species_vector)
tree_Zheng <- read.tree(file = "C:/Users/Daniel/Desktop/read in data/Zheng_Wiens.txt") # read in Zheng´s tree
matching_species <- intersect(species_vector_phyl, tree_Zheng$tip.label) #searching for matching species
tree_squamata <- drop.tip(tree_Zheng, setdiff(tree_Zheng$tip.label, matching_species)) # excluding all species that do not mach both data structures
tip_colors <- rep("#FF0000", length(tree_squamata$tip.label)) # creating a default vector (red) for the parietal eye state
for (i in seq_along(tree_squamata$tip.label)) {
tree_name <- gsub("_", " ", tree_squamata$tip.label[i]) # substitute the "_" by a space
matching_rows <- grep(paste(unlist(strsplit(tree_name, " ")), collapse = "|"), coordsmedian.parietalfiltered_cleaned$scientificName, ignore.case = TRUE) # searching for matching species in "coordsmedian.parietalfiltered_cleaned$scientificName"
if (length(matching_rows) > 0) {
eye_status <- coordsmedian.parietalfiltered_cleaned$parietal_eye[matching_rows]
tip_colors[i] <- ifelse(any(eye_status == 1), "#99CC00", "#FF0000") # color the tip in dependence to the state of the "parietal_eye"
}
}
#write.tree(tree_squamata, file = "C:/Users/Daniel/Desktop/results_R/tree_squamata.txt")
pdf("C:/Users/Daniel/Desktop/results_R/tree_squamata.pdf", width = 20, height = 20) # save the phylogenetic tree as a pdf with state of parietal eye as colored tips
plot(tree_squamata, show.tip.label = TRUE, cex = 0.25, main = "Phylogenetic Tree of sampled Squamates with state of parietal eye:", cex.main = 1) # add title
legend("topleft", title = "State of Parietal Eye:", legend = c("absent", "present"), fill = c("#FF0000", "#99CC00"), cex = 0.8, inset = c(0.01, 0.01)) # add legend
tiplabels(pch = 21, bg = tip_colors, cex = 0.6, adj = c(-0.5, 0.5)) # adjust tiplabels
dev.off()
## png
## 2
Because of the enormous amount of tip species, the tree can not be presented here in its details. The pedigree therefore is delivered in an additional pdf data.
There is also a display of the phylogenetic structure for the American squamates because, as can be seen above, their dependence on latitude is different.
matching_species_america <- subset(coordsmedian.parietalfiltered_uncleaned, medianLon >= -125 & medianLon <= -30 & medianLat > 0)$scientificName # filter by excluding american distributed species
matching_species_america <- unique(sapply(matching_species_america, function(x) paste(strsplit(x, " ")[[1]][1:2], collapse = "_"))) # extracting the species name while excluding authors name and year
species_vector_phyl_america <- intersect(species_vector_phyl, matching_species_america) # selectin American species
matching_species_america <- intersect(species_vector_phyl_america, tree_Zheng$tip.label) # searching for matching species
tree_squamata_america <- drop.tip(tree_Zheng, setdiff(tree_Zheng$tip.label, matching_species_america)) # excluding not matching species
tip_colors <- rep("#FF0000", length(tree_squamata_america$tip.label)) # creating a default vector (red) for the parietal eye state
for (i in seq_along(tree_squamata_america$tip.label)) {
tree_name <- gsub("_", " ", tree_squamata_america$tip.label[i]) # substitute the "_" by a space
matching_rows <- grep(paste(unlist(strsplit(tree_name, " ")), collapse = "|"), coordsmedian.parietalfiltered_cleaned$scientificName, ignore.case = TRUE) # searching for maching species in "coordsmedian.parietalfiltered_cleaned$scientificName"
if (length(matching_rows) > 0) {
eye_status <- coordsmedian.parietalfiltered_cleaned$parietal_eye[matching_rows]
tip_colors[i] <- ifelse(any(eye_status == 1), "#99CC00", "#FF0000") # # color the tip labels based on the two categories of "eye_status"
}
}
#write.tree(tree_squamata_america, file = "C:/Users/Daniel/Desktop/results_R/tree_squamata_america.txt")
#pdf("C:/Users/Daniel/Desktop/results_R/tree_squamata_america.pdf", width = 20, height = 20) # save the phylogenetic tree as a pdf with state of parietal eye as coloured tips
plot(tree_squamata_america, show.tip.label = TRUE, cex = 0.25, main = "Phylogenetic Tree of sampled Squamates within America (longitude from -125° to -30°) with state of parietal eye:", cex.main = 0.7) # add title
legend("topleft", title = "State of Parietal Eye:", legend = c("Absent", "Present"), fill = c("#FF0000", "#99CC00"), cex = 0.8, inset = c(0.01, 0.01)) # add legend
tiplabels(pch = 21, bg = tip_colors, cex = 0.6, adj = c(-0.5, 0.5)) # adjust tiplabels
dev.off()
## null device
## 1
Same accords to this tree, which is also added separately as a pdf data to the thesis.
matching_species_circadian <- coordsmedian.parietal.ciricafiltered_cleaned$scientificName[coordsmedian.parietal.ciricafiltered_cleaned$circadian_rhythm %in% c("n", "d")] # extract only species names with "d" or "n" in the following column, therefore exclude those with "c"
matching_species_circadian <- sapply(strsplit(matching_species_circadian, " "), function(x) paste(x[1:2], collapse = "_")) #create vector "matching_species_circadian" by "sapply"
matching_species_circadian <- intersect(matching_species_circadian, tree_Zheng$tip.label) # search for matching species in Zheng´s tree
tree_squamata_circadian <- drop.tip(tree_Zheng, setdiff(tree_Zheng$tip.label, matching_species_circadian))
matching_species_circadian <- gsub("_", " ", matching_species_circadian)
#write.tree(tree_squamata_circadian, file = "C:/Users/Daniel/Desktop/results_R/tree_circadian.txt")
matching_species_habitat <- sapply(strsplit(coordsmedian.parietal.habitatfiltered_cleaned$scientificName, " "), function(x) paste(x[1:2], collapse = "_")) # create a vector with species having information saved about their habitat
matching_species_habitat <- intersect(matching_species_habitat, tree_Zheng$tip.label) #matching species with Zheng´s tree
tree_squamata_habitat <- drop.tip(tree_Zheng, setdiff(tree_Zheng$tip.label, matching_species_habitat)) # create tree for species with habitat information
matching_species_circadian <- gsub("_", " ", matching_species_habitat) # substitute space
#write.tree(tree_squamata_habitat, file = "C:/Users/Daniel/Desktop/results_R/tree_habitat.txt")
In later work, the threshold will be calculated using the threshBayes function. The prerequisite for this model is Brownian motion. Therefore, the phylogenetic tree must be tested for Brownian motion. Lambda can have values between 0 and 1, where 1 means that the analysed tree follows Brownian motion. To get an even more precise result with “threshBayes”, the tree can be modified in its branch lengths with the calculated value.
library(geiger)
lambda_data <- data.frame(row.names = matching_species) # create a data frame out of the matching species that are the rownames
rownames(lambda_data)<-gsub("_", " ", rownames(lambda_data)) #substitution
lambda_data$medianLat <- NA # create a new column
for (i in seq_len(nrow(lambda_data))) {
name <- rownames(lambda_data)[i]
matching_rows <- grep(name, coordsmedian.parietalfiltered_cleaned$scientificName) # searching for matching rows
if (length(matching_rows) > 0) {
lambda_data$medianLat[i] <- coordsmedian.parietalfiltered_cleaned$medianLat[matching_rows[1]] # add information about median latitude to the new assigend column
}
}
# Because of unequal scientific names in two cases the data of them have to be added manually
lambda_data["Malayotyphlops_ruber", "medianLat"] <- 12.08537
lambda_data["Hemidactylus_imbricatus", "medianLat"] <- -29.1
rownames(lambda_data)<-gsub(" ", "_", rownames(lambda_data))# substitution
#saveRDS(lambda_data, file= "C:/Users/Daniel/Desktop/results_R/lambda_data.RDS")
#lambda_data<-readRDS("C:/Users/Daniel/Desktop/results_R/lambda_data.RDS")
With these results lambda can be calculated.
library(geiger)
## Warning: Paket 'geiger' wurde unter R Version 4.3.3 erstellt
lambda_data <- readRDS("C:/Users/Daniel/Desktop/results_R/lambda_data.RDS")
results_geiger_lambda_data <- fitContinuous(phy = tree_squamata, dat = lambda_data)
results_geiger_lambda_data
## GEIGER-fitted comparative model of continuous data
## fitted 'BM' model parameters:
## sigsq = 3.683858
## z0 = 5.083600
##
## model summary:
## log-likelihood = -1690.090886
## AIC = 3384.181772
## AICc = 3384.210898
## free parameters = 2
##
## Convergence diagnostics:
## optimization iterations = 100
## failed iterations = 0
## number of iterations with same best fit = 100
## frequency of best fit = 1.000
##
## object summary:
## 'lik' -- likelihood function
## 'bnd' -- bounds for likelihood search
## 'res' -- optimization iteration summary
## 'opt' -- maximum likelihood parameter estimates
#saveRDS(results_geiger_lambda_data, file= "C:/Users/Daniel/Desktop/results_R/results_geiger_lambda_data.RDS")
library(geiger)
fitContinuous(phy= tree_squamata, dat= lambda_data, model= "lambda")
## GEIGER-fitted comparative model of continuous data
## fitted 'lambda' model parameters:
## lambda = 0.981244
## sigsq = 3.290608
## z0 = 5.065844
##
## model summary:
## log-likelihood = -1684.983941
## AIC = 3375.967881
## AICc = 3376.026275
## free parameters = 3
##
## Convergence diagnostics:
## optimization iterations = 100
## failed iterations = 0
## number of iterations with same best fit = 15
## frequency of best fit = 0.150
##
## object summary:
## 'lik' -- likelihood function
## 'bnd' -- bounds for likelihood search
## 'res' -- optimization iteration summary
## 'opt' -- maximum likelihood parameter estimates
With the value of lambda that best describes the phylogenetic structure, the branch lengths can be adjusted: best.
tree_rescaled<-rescale(x=tree_squamata, model = "lambda", 0.981244) # create the rescaled tree
write.tree(tree_rescaled, file = "C:/Users/Daniel/Desktop/results_R/tree_rescaled.txt")
pdf("C:/Users/Daniel/Desktop/results_R/tree_rescaled_america.pdf", width = 20, height = 20) # save the phylogenetic tree as a pdf with state of parietal eye as colored tips
The same is true for squamates in America, as this result indicates a different situation.
lambda_data_america <- data.frame(row.names = matching_species_america) # create a data frame out of matching american species vector
rownames(lambda_data_america)<-gsub("_", " ", rownames(lambda_data_america)) # substitution
lambda_data_america$medianLat <- NA # create a new column
for (i in seq_len(nrow(lambda_data_america))) {
name <- rownames(lambda_data_america)[i]
matching_rows <- grep(name, coordsmedian.parietalfiltered_cleaned$scientificName) # searching for matching rows
if (length(matching_rows) > 0) {
lambda_data_america$medianLat[i] <- coordsmedian.parietalfiltered_cleaned$medianLat[matching_rows[1]] # add mean latitude to the new created column
}
}
lambda_data_america
rownames(lambda_data_america)<-gsub(" ", "_", rownames(lambda_data_america)) # substitution
#saveRDS(lambda_data_america, file= "C:/Users/Daniel/Desktop/results_R/lambda_data_america.RDS")
lambda_data_america<-readRDS("C:/Users/Daniel/Desktop/results_R/lambda_data_america.RDS")
results_geiger_lambda_data_america<- fitContinuous(phy = tree_squamata_america, dat = lambda_data_america)
results_geiger_lambda_data_america
## GEIGER-fitted comparative model of continuous data
## fitted 'BM' model parameters:
## sigsq = 1.551216
## z0 = 16.952798
##
## model summary:
## log-likelihood = -414.564381
## AIC = 833.128762
## AICc = 833.236870
## free parameters = 2
##
## Convergence diagnostics:
## optimization iterations = 100
## failed iterations = 0
## number of iterations with same best fit = 100
## frequency of best fit = 1.000
##
## object summary:
## 'lik' -- likelihood function
## 'bnd' -- bounds for likelihood search
## 'res' -- optimization iteration summary
## 'opt' -- maximum likelihood parameter estimates
#saveRDS(results_geiger_lambda_data_america, file= "C:/Users/Daniel/Desktop/results_R/results_geiger_lambda_data_america.RDS")
fitContinuous(phy= tree_squamata_america, dat= lambda_data_america, model= "lambda")
## GEIGER-fitted comparative model of continuous data
## fitted 'lambda' model parameters:
## lambda = 0.876633
## sigsq = 0.950292
## z0 = 17.183140
##
## model summary:
## log-likelihood = -412.436030
## AIC = 830.872061
## AICc = 831.090242
## free parameters = 3
##
## Convergence diagnostics:
## optimization iterations = 100
## failed iterations = 0
## number of iterations with same best fit = 14
## frequency of best fit = 0.140
##
## object summary:
## 'lik' -- likelihood function
## 'bnd' -- bounds for likelihood search
## 'res' -- optimization iteration summary
## 'opt' -- maximum likelihood parameter estimates
tree_rescaled_america<-rescale(x=tree_squamata_america, model = "lambda", 0.876626) # create the rescaeld tree with the calculated value for lambda
tree_rescaled_america
##
## Phylogenetic tree with 114 tips and 113 internal nodes.
##
## Tip labels:
## Anelytropsis_papillosus, Coleonyx_brevis, Coleonyx_variegatus, Coleonyx_mitratus, Coleonyx_elegans, Aristelliger_georgeensis, ...
##
## Rooted; includes branch lengths.
write.tree(tree_rescaled_america, file = "C:/Users/Daniel/Desktop/results_R/tree_rescaled_america.txt")
pdf("C:/Users/Daniel/Desktop/results_R/tree_rescaled_america.pdf", width = 20, height = 20) # save the phylogenetic tree as a pdf with state of parietal eye as colored tips
Analysis of the data with the phylogenetic trees using Bayesian analysis.
tree_squamata <- read.tree(file = "C:/Users/Daniel/Desktop/results_R/tree_squamata.txt") # read in tree of squamates
tree_squamata_america <- read.tree(file = "C:/Users/Daniel/Desktop/results_R/tree_squamata_america.txt") # read in tree of squamates in ameica
bayes_data <- data.frame(row.names = matching_species) # create a data frame with the matching species with Zheng´s tree
First, the data frames must be created with the species as the row name and the parietal eye as the column.
# add a new column with information about the parietal eye state
bayes_parietal<- bayes_data
rownames(bayes_parietal) <- gsub("_", " ", rownames(bayes_parietal))
bayes_parietal$parietal_eye<- NA # create a new column
for (i in seq_len(nrow(bayes_parietal))) {
name <- rownames(bayes_parietal)[i]
matching_row <- grep(name, coordsmedian.parietalfiltered_cleaned$scientificName, value = TRUE) # searching for matching rows
if (length(matching_row) > 0) {
bayes_parietal$parietal_eye[i] <- coordsmedian.parietalfiltered_cleaned$parietal_eye[grep(name, coordsmedian.parietalfiltered_cleaned$scientificName)][1] # add information about parietal eye to the new created column
}
}
#saveRDS(bayes_parietal, file="C:/Users/Daniel/Desktop/results_R/bayes_parietal.RDS")
bayes_parietal <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_parietal.RDS")
# add a new column with information about the mean latitudinal distribution
bayes_medianLat<- bayes_parietal
rownames(bayes_medianLat) <- gsub("_", " ", rownames(bayes_medianLat)) # substitution
bayes_medianLat$medianLat <- NA # create a new column
for (i in seq_len(nrow(bayes_medianLat))) {
name <- rownames(bayes_medianLat)[i]
matching_rows <- grep(name, coordsmedian.parietalfiltered_cleaned$scientificName) # searching for matching rows
if (length(matching_rows) > 0) {
bayes_medianLat$medianLat[i] <- coordsmedian.parietalfiltered_cleaned$medianLat[matching_rows[1]] # add the information of matching rows to the new created column
}
}
# Because of unequal scientific names in two cases the data of them have to be added manually
bayes_medianLat["Malayotyphlops ruber", "parietal_eye"] <- 0
bayes_medianLat["Malayotyphlops ruber", "medianLat"] <- 12.08537
bayes_medianLat["Hemidactylus imbricatus", "parietal_eye"] <- 0
bayes_medianLat["Hemidactylus imbricatus", "medianLat"] <- -29.1
bayes_medianLat$parietal_eye <- factor(bayes_medianLat$parietal_eye, levels = c("0", "1"))
#saveRDS(bayes_medianLat, file= "C:/Users/Daniel/Desktop/results_R/bayes_medianLat.RDS")
bayes_medianLat <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_meanLat.RDS")
Same as proceeded above is done for the species living in America
bayes_medianLat <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_meanLat.RDS")
matching_species_america <- gsub("_", " ", matching_species_america) # substitution
bayes_species_america <- matching_species_america # create a new vector with only american species
bayes_medianLat_america <- subset(bayes_medianLat, rownames(bayes_medianLat) %in% bayes_species_america) # create a new data frame with information for bayesian analysation
bayes_medianLat_america$parietal_eye <- factor(bayes_medianLat_america$parietal_eye, levels = c("0", "1")) # assign the levels of the column as factor
#saveRDS(bayes_medianLat_america, file= "C:/Users/Daniel/Desktop/results_R/bayes_medianLat_america.RDS")
bayes_medianLat_america <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_medianLat_america.RDS")
And, as with the circadian rhythm, the data frame is created for later analysis by “threshBayes”.
bayes_circadian <- data.frame(row.names = matching_species_circadian) # create a data frame by using matching species
# add a new column with information about the mean latitudinal distribution
rownames(bayes_circadian) <- gsub("_", " ", rownames(bayes_circadian))
bayes_circadian$circadian_rhythm <- NA # create a new column
for (i in seq_len(nrow(bayes_circadian))) {
name <- rownames(bayes_circadian)[i]
matching_rows <- grep(name, coordsmedian.parietal.ciricafiltered_cleaned$scientificName) # searching for matching rows
if (length(matching_rows) > 0) {
bayes_circadian$circadian_rhythm[i] <- coordsmedian.parietal.ciricafiltered_cleaned$circadian_rhythm[matching_rows[1]] # add information of the matching rows to the new created column "circadian_rhythm"
}
}
bayes_circadian$parietal_eye<- NA # create a new column
for (i in seq_len(nrow(bayes_circadian))) {
name <- rownames(bayes_circadian)[i]
matching_row <- grep(name, coordsmedian.parietalfiltered_cleaned$scientificName, value = TRUE) # searching for matching rows
if (length(matching_row) > 0) {
bayes_circadian$parietal_eye[i] <- coordsmedian.parietalfiltered_cleaned$parietal_eye[grep(name, coordsmedian.parietalfiltered_cleaned$scientificName)][1] # add information about the parietal eye to the matching rows in the new created column
}
}
rownames(bayes_circadian)<- gsub(" ", "_", rownames(bayes_circadian)) # substitution
#saveRDS(bayes_circadian, file= "C:/Users/Daniel/Desktop/results_R/bayes_circadian.RDS")
bayes_circadian <- readRDS("C:/Users/Daniel/Desktop/results_R/circadian.RDS")
Create a species data frame with habitat information to calculate Threshold Bayes later.
bayes_habitat <- data.frame(row.names = matching_species_habitat) # create a data frame with matching species
# add a new column with information about the
rownames(bayes_habitat) <- gsub("_", " ", rownames(bayes_habitat))
bayes_habitat$habitat_category <- NA # create a new column
for (i in seq_len(nrow(bayes_habitat))) {
name <- rownames(bayes_habitat)[i]
matching_rows <- grep(name, coordsmedian.parietal.habitatfiltered_cleaned$scientificName,) # searching for matching rows
if (length(matching_rows) > 0) {
bayes_habitat$habitat_category[i] <- coordsmedian.parietal.habitatfiltered_cleaned$habitat_category[matching_rows[1]] # add informatiom about the habitat category to the euqually named column, if the species names match with the rownames
}
}
#rename all habitats that are not of the category "Forest" to sunny while the category "Forest" is renamed to "shady", because there is only a binary structure possible for "threshBayes".
bayes_habitat$habitat_category <- ifelse(bayes_habitat$habitat_category == "Forest", "shady", "sunny")
bayes_habitat$parietal_eye<- NA # create a new column
for (i in seq_len(nrow(bayes_habitat))) {
name <- rownames(bayes_habitat)[i]
matching_row <- grep(name, coordsmedian.parietal.habitatfiltered_cleaned$scientificName, value = TRUE) # searching for matching rows
if (length(matching_row) > 0) {
bayes_habitat$parietal_eye[i] <- coordsmedian.parietal.habitatfiltered_cleaned$parietal_eye[grep(name, coordsmedian.parietal.habitatfiltered_cleaned$scientificName)][1] # add information about the parietal eye state to the new created column
}
}
rownames(bayes_habitat)<- gsub(" ", "_", rownames(bayes_habitat)) # substitution
#saveRDS(bayes_habitat, file= "C:/Users/Daniel/results_R/Desktop/bayes_habitat.RDS")
#bayes_habitat <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_habitat.RDS")
Evaluate the results and plot them.
bayes_result_medianLat <- threshBayes(tree_rescaled, bayes_medianLat, types = c("discrete", "continuous"), trait = 2, burnin= 0.5, ngen=100000) # calculate with "threshBayes" the dependence to "meanLat" and the parietal eye state using the phylogenetic tree
plot(bayes_result_medianLat)
plot(density(bayes_result_medianLat))
#saveRDS(bayes_result_medianLat, file="C:/Users/Daniel/Desktop/results_R/bayes_result_medianLat.RDS")
bayes_result_medianLat <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_result_medianLat.RDS")
plot(bayes_result_medianLat)
Evaluate the results and plot them.
rownames(bayes_medianLat_america) <- gsub(" ", "_", rownames(bayes_medianLat_america)) # substitution
bayes_result_medianLat_america <- threshBayes(tree_rescaled_america, bayes_medianLat_america, types = c("discrete", "continuous"), trait = 2, burnin= 0.5, ngen=100000) # calculate with threshBayes the dependence to "meanLat" and the parietal eye state of American species using the phylogenetic tree
plot(bayes_result_medianLat_america)
plot(density(bayes_result_medianLat_america))
#saveRDS(bayes_result_medianLat_america, file="C:/Users/Daniel/Desktop/results_R/bayes_result_medianLat_america.RDS")
bayes_result_medianLat_america <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_result_medianLat_america.RDS")
bayes_result_medianLat_america <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_result_medianLat_america.RDS")
plot(bayes_result_medianLat_america)
Evaluate the results and plot them.
bayes_circadian <- readRDS("C:/Users/Daniel/Desktop/results_R/circadian.RDS")
bayes_result_circadian <- threshBayes(tree_squamata_circadian, bayes_circadian, types = c("discrete", "discrete"), trait = 2, burnin= 0.5, ngen=100000) # calculate with "threshBayes" the dependence to "meanLat" and the parietal eye state using the phylogenetic tree
plot(bayes_result_circadian)
plot(density(bayes_result_circadian))
#saveRDS(bayes_result_circadian, file="C:/Users/Daniel/Desktop/results_R/bayes_result_circadian.RDS")
bayes_result_circadian <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_result_circadian.RDS")
plot(bayes_result_circadian)
Evaluate the results and plot them.
bayes_habitat <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_habitat.RDS")
bayes_result_habitat <- threshBayes(tree_squamata_habitat, bayes_habitat, types = c("discrete", "discrete"), trait = 2, burnin= 0.5, ngen=100000) # calculate with "threshBayes" the dependence to "meanLat" and the parietal eye state using the phylogenetic tree
plot(bayes_result_habitat)
plot(density(bayes_result_habitat))
saveRDS(bayes_result_habitat, file="C:/Users/Daniel/Desktop/read in data/bayes_result_habitat.RDS")
bayes_result_habitat <- readRDS("C:/Users/Daniel/Desktop/results_R/bayes_result_habitat.RDS")
plot(bayes_result_habitat)
The correlation coefficient can be calculated with a t-test for its significance It can be calculated with the following formula:
t_value_bayes_median <- r * sqrt((n - 2) / (1 - r^2))
# for |r| = correlation value of "threshBayes" and n= sample size
The correlation coefficient can be calculated with a t-test for its significance
#n= 415, |r|= 0.00038
t_value_bayes_medianLat <- 0.00038 * sqrt((415 - 2) / (1 - (0.00038)^2))
t_value_bayes_medianLat
## [1] 0.007722513
The correlation coefficient can be calculated with a t-test for its significance
#n= 113, |r|= 0.27716
t_value_bayes_medianLat_america <- 0.27716 * sqrt((113 - 2) / (1 - (0.27716)^2))
t_value_bayes_medianLat_america
## [1] 3.039123
The correlation coefficient can be calculated with a t-test for its significance
#n= 120, |r|= 0.34057
t_value_bayes_circadian <-0.34057 * sqrt((120 - 2) / (1 - (0.34057)^2))
t_value_bayes_circadian
## [1] 3.93476
The correlation coefficient can be calculated with a t-test for its significance
#n= 100, |r|= -0.28822
t_value_bayes_habitat1 <-0.28822 * sqrt((100 - 2) / (1 - (0.28822)^2))
t_value_bayes_habitat1
## [1] 2.979677