library(zipcodeR) # This package is needed for population density comparison
## Warning: package 'zipcodeR' was built under R version 4.1.2
First we read in the data, check column names, and create new matrices for the monthly data concerning live bat observations and bat drownings.
batdata <- read.csv("master.csv.1.7.2016_dup.csv", header=TRUE, row.names = 1, stringsAsFactors = TRUE)
batdata <- batdata[-which(duplicated(batdata) == TRUE),] # duplicated entry
colnames(batdata)
## [1] "Response.Year" "Country"
## [3] "Zip.Code" "Home.Area"
## [5] "Home.Area.Other" "Pool.Type"
## [7] "X..Months.in.Use" "January..In.Use."
## [9] "February..In.Use." "March..In.Use."
## [11] "April..In.Use." "May..In.Use."
## [13] "June..In.Use." "July..In.Use."
## [15] "August..In.Use." "September..In.Use."
## [17] "October..In.Use." "November..In.Use."
## [19] "December..In.Use." "Pool.Shape"
## [21] "Pool.Length..ft." "Pool.Width..ft."
## [23] "Circular.Pool.Diameter..ft." "Surface.Area..ft.2."
## [25] "Above.Below.Ground" "Liner.Type"
## [27] "Obstruction.Type" "Obstruction.Type.Other"
## [29] "Obstruction.Height..ft." "Obstruction.Distance..ft."
## [31] "Pool.Lit...Night" "How.Often.Around.Pool"
## [33] "Level.of.Wildlife.Observance" "Observe.Wildlife"
## [35] "Dawn..Obs.Wildlife." "Daytime..Obs.Wildlife."
## [37] "Dusk..Obs.Wildlife." "Nighttime..Obs.Wildlife."
## [39] "January..Obs.Wildlife." "February..Obs.Wildlife."
## [41] "March..Obs.Wildlife." "April..Obs.Wildlife."
## [43] "May..Obs.Wildlife." "June..Obs.Wildlife."
## [45] "July..Obs.Wildlife." "August..Obs.Wildlife."
## [47] "September..Obs.Wildlife." "October..Obs.Wildlife."
## [49] "November..Obs.Wildlife." "December..Obs.Wildlife."
## [51] "Wildlife.Observed" "Observe.Bats."
## [53] "Observe.Bats...Y.N." "Dawn..Obs.Bats."
## [55] "Daytime..Obs.Bats." "Dusk..Obs.Bats."
## [57] "Nighttime..Obs.Bats." "January..Obs.Bats."
## [59] "February..Obs.Bats." "March..Obs.Bats."
## [61] "April..Obs.Bats." "May..Obs.Bats."
## [63] "June..Obs.Bats." "July..Obs.Bats."
## [65] "August..Obs.Bats." "September..Obs.Bats."
## [67] "October..Obs.Bats." "November..Obs.Bats."
## [69] "December..Obs.Bats." "Observe.Bats.Drink."
## [71] "Bat.Mortality" "Bat.Mortality..Y.N."
## [73] "January..Bat.Mortality." "February..Bat.Mortality."
## [75] "March..Bat.Mortality." "April..Bat.Mortality."
## [77] "May..Bat.Mortality." "June..Bat.Mortality."
## [79] "July..Bat.Mortality." "August..Bat.Mortality."
## [81] "September..Bat.Mortality." "October..Bat.Mortality."
## [83] "November..Bat.Mortality." "December..Bat.Mortality."
## [85] "Distance.to.Natural.Water" "Bat.Type"
## [87] "Heard.of.Management.Action" "Implemented.Management.Action"
## [89] "Management.Action" "ZIP"
## [91] "Area" "State"
## [93] "Temp.Group" "Average.Summer.Temp..deg.C."
use <- batdata[,8:19]
observations <- batdata[,58:69]
drownings <- batdata[,73:84]
use[is.na(use)] <- 0 # fill out data with 0's where NAs (no responses) are present
observations[is.na(observations)] <- 0 # fill out data with 0's where NAs (no responses) are present
drownings[is.na(drownings)] <- 0 # fill out data with 0's where NAs (no responses) are present
table(rowSums(observations)) # effective year length
##
## 0 1 2 3 4 5 6 7 8 9 12
## 117 37 77 115 87 45 27 12 5 3 4
Preliminary data analysis. Are the cases in which the respondents seldom visited the pool important? Do people use the pool more frequently than they observe bats (last line gives proportion of cases in which this obtains)?
rmx <- which(batdata$How.Often.Around.Pool == "Few times")
rowSums(observations[rmx,])
## 3 4 10 11 33 90 101 163 171 204 240 241 262 271 281
## 4 4 7 3 3 5 0 0 7 7 0 5 3 1 3
mean(rowSums(observations[rmx,]))
## [1] 3.466667
mean(rowSums(observations))
## [1] 2.778828
length(which(rowSums(use) - rowSums(observations) > 0)) / length(rowSums(use))
## [1] 0.879017
How large is the average pool in the data set?
mean(batdata$Surface.Area..ft.2., na.rm = TRUE)
## [1] 678.4202
Here we look at how many cases there were in which no monthly observations of live bats were made at all, and in how many of these had drowned bats were reported.
noObs <- which(rowSums(observations) == 0)
noObs <- unname(noObs)
length(noObs) # cases in which no monthly observations of live bats were made at all
## [1] 117
o <- rowSums(observations)[-noObs]
d <- rowSums(drownings)[-noObs]
names(which(o - d == 0)) # cases in which drowned bats were observed in every month live bats were
## [1] "86" "167" "171" "234" "273" "315" "421" "498" "528"
which(rowSums(drownings)[noObs] > 0) # cases in which drowned bats were reported but no live ones
## 163 206 468 492 493 513
## 44 56 107 110 111 113
Now we initiate the vectors that will contain mortality data responses. “lambdas” is the vector of λ parameters from Poisson probability mass function. “yearlength” is the effective year length (as defined in text), namely the number of months in the year during which bats were active (+1, for statistical reasons: see text). “mortality” is the estimated annual mortality by drowning for each pool individually, calculated as nλ.
lambdas <- rep(NA, length = nrow(observations))
yearlength <- rep(NA, length = nrow(observations))
mortality <- rep(NA, length = nrow(observations))
for (i in 1:nrow(observations)) {
q <- which(observations[i,] != 0)
if (length(q) > 0) { # this covers cases in which bats were observed (p > 0)
qmin <- min(q)
qmax <- max(q)
yearlength[i] <- qmax - qmin + 1 + 1 # the first +1 is arithmetic, the second +1 is our workaround for p0 = 0
d <- drownings[i,qmin:qmax] # restrict contemplation of drownings to those same months
if (sum(d) > 0) {
p0 <- (yearlength[i] - sum(d))/yearlength[i]
lambdas[i] <- -log(p0)
mortality[i] <- (yearlength[i]-1) * lambdas[i]
} else { # if no drowned bats were observed
mortality[i] <- 0
}
} else { # for cases in which no live bats were ever observed
q <- which(drownings[i,] != 0)
if (length(q) > 0) { # for cases in which drowned bats were reported (without live ones)
qmin <- min(q)
qmax <- max(q)
yearlength[i] <- qmax - qmin + 1 + 1
d <- drownings[i,qmin:qmax]
p0 <- (yearlength[i] - sum(d))/yearlength[i]
lambdas[i] <- -log(p0)
mortality[i] <- (yearlength[i]-1) * lambdas[i]
} else { # for cases in which neither live bats nor drowned bats were observed
mortality[i] <- 0
}
}
}
Here we can examine effective year length (-1, so it corresponds with calendar months),
table(yearlength - 1)
##
## 1 2 3 4 5 6 7 8 9 10 12
## 43 73 116 84 46 29 11 6 4 2 4
summary(mortality)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2497 0.0000 14.5561
summary(mortality[mortality > 0])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6931 0.8630 0.8926 2.0320 2.0188 14.5561
summary(lambdas)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.1542 0.2231 0.2877 0.5089 0.6931 2.0794 464
As the vast majority of respondents reported no drowned bats (mortality = 0), we divide the cases into two outcomes (mortality = 0, mortality > 0) and conduct logistic regression on parameters explained in text.
mortalityLogit <- ceiling(mortality)
length(mortality == 0) == length(mortalityLogit == 0) # make sure there are no rounding errors
## [1] TRUE
mortalityLogit[mortalityLogit > 0] <- 1
mortalityLogit # check result
## [1] 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## [38] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [75] 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
## [223] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1
## [408] 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0
## [445] 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1
## [482] 1 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0
## [519] 0 0 0 0 0 0 1 0 0 0 0
Now test mortality against pool surface area. We use the natural logarithm of surface area (the default base of the function log()
is e) because the distribution is skewed (see text).
pred <- glm(mortalityLogit ~ log(batdata$Surface.Area..ft.2.), family = binomial(link = "logit"))
summary(pred)
##
## Call:
## glm(formula = mortalityLogit ~ log(batdata$Surface.Area..ft.2.),
## family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6024 -0.5140 -0.5079 -0.5013 2.1515
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.53705 1.19083 -1.291 0.197
## log(batdata$Surface.Area..ft.2.) -0.07084 0.19348 -0.366 0.714
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 279.41 on 375 degrees of freedom
## Residual deviance: 279.28 on 374 degrees of freedom
## (153 observations deleted due to missingness)
## AIC: 283.28
##
## Number of Fisher Scoring iterations: 4
Now test mortality against distance to obstruction:
pred <- glm(mortalityLogit ~ batdata$Obstruction.Distance..ft., family = binomial(link = "logit"))
summary(pred)
##
## Call:
## glm(formula = mortalityLogit ~ batdata$Obstruction.Distance..ft.,
## family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6746 -0.5368 -0.5192 -0.4987 2.0900
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.07887 0.26322 -7.898 2.84e-15 ***
## batdata$Obstruction.Distance..ft. 0.01429 0.01799 0.794 0.427
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 315.97 on 410 degrees of freedom
## Residual deviance: 315.37 on 409 degrees of freedom
## (118 observations deleted due to missingness)
## AIC: 319.37
##
## Number of Fisher Scoring iterations: 4
Now test mortality against height of obstruction (creating first an appropriate numerical vector):
pred <- glm(mortalityLogit ~ batdata$Obstruction.Height..ft., family = binomial(link = "logit"))
summary(pred)
##
## Call:
## glm(formula = mortalityLogit ~ batdata$Obstruction.Height..ft.,
## family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6868 -0.5369 -0.5294 -0.5255 2.0266
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.924214 0.198756 -9.681 <2e-16 ***
## batdata$Obstruction.Height..ft. 0.002000 0.005233 0.382 0.702
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 324.10 on 412 degrees of freedom
## Residual deviance: 323.96 on 411 degrees of freedom
## (116 observations deleted due to missingness)
## AIC: 327.96
##
## Number of Fisher Scoring iterations: 4
Here we test the combination of distance to and height of obstruction:
pred <- glm(mortalityLogit ~ batdata$Obstruction.Distance..ft. + batdata$Obstruction.Height..ft., family = binomial(link = "logit"))
summary(pred)
##
## Call:
## glm(formula = mortalityLogit ~ batdata$Obstruction.Distance..ft. +
## batdata$Obstruction.Height..ft., family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7614 -0.5447 -0.5193 -0.4914 2.1124
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1399800 0.2865090 -7.469 8.07e-14 ***
## batdata$Obstruction.Distance..ft. 0.0196417 0.0196184 1.001 0.317
## batdata$Obstruction.Height..ft. 0.0006797 0.0057797 0.118 0.906
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 314.30 on 404 degrees of freedom
## Residual deviance: 313.19 on 402 degrees of freedom
## (124 observations deleted due to missingness)
## AIC: 319.19
##
## Number of Fisher Scoring iterations: 4
To examine the influence of pool area lighting, we use Fisher’s exact test (as we have a contingency table). The last two columns carry data, not NA’s.
fisher.test(table(as.factor(mortalityLogit), batdata$Pool.Lit...Night)[,2:3])
##
## Fisher's Exact Test for Count Data
##
## data: table(as.factor(mortalityLogit), batdata$Pool.Lit...Night)[, 2:3]
## p-value = 0.1231
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.8454741 2.8475919
## sample estimates:
## odds ratio
## 1.570599
To examine the influence of liner type, we use Fisher’s exact test (as we have a contingency table). The last two columns carry data, not NA’s.
fisher.test(table(as.factor(mortalityLogit), batdata$Liner.Type)[,2:3])
##
## Fisher's Exact Test for Count Data
##
## data: table(as.factor(mortalityLogit), batdata$Liner.Type)[, 2:3]
## p-value = 0.04313
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.001235 3.163464
## sample estimates:
## odds ratio
## 1.76647
Whether respondents considered themselves to be highly observant of nature or not is more closely associated with bat mortality in pools:
pred <- glm(mortalityLogit ~ batdata$Level.of.Wildlife.Observance, family = binomial(link = "logit"))
summary(pred)
##
## Call:
## glm(formula = mortalityLogit ~ batdata$Level.of.Wildlife.Observance,
## family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5630 -0.5630 -0.5226 -0.4494 2.2636
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.56162 0.42929 -5.967 2.42e-09 ***
## batdata$Level.of.Wildlife.Observance 0.07997 0.05232 1.528 0.126
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 392.90 on 523 degrees of freedom
## Residual deviance: 390.42 on 522 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 394.42
##
## Number of Fisher Scoring iterations: 4
To examine whether the “naturalness” of the surrounding environment, as inferred from the population density of the zip code of the respondent, is related to bat mortality, we return to logistic regression. Note that in one zip code, 92384, a P.O. Box in Inyo County, California, has a population so low, compared to its area, that it rounds to 0 in the data set. (There are surprisingly two responses for different pools: one commercial, the other private.) Hence, we record it as NA. No drowned bats were observed in this zip code. Note also that 34 respondents gave an incorrect zip code, or their zip code had no associated information in the database. Population density for the area surrounding these 36 respondents is unknown (NA). The mean mortality for pools from 61 duplicated zip codes was 0.23, nearly identical to the overall average.
densities1 <- rep(NA, length = nrow(batdata))
zip_code_all <- zip_code_db
for (i in 1:length(densities1)) {
x <- which(zip_code_all$zipcode == batdata$Zip.Code[i]) # gives integer(0) if no match, hence:
if (length(x) > 0) {
densities1[i] <- zip_code_all$population_density[x]
}
}
densities1[3:4] <- NA
summary(densities1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 3 176 674 1599 2318 30841 36
mean(mortality[duplicated(batdata$Zip.Code)])
## [1] 0.2337046
pred <- glm(mortalityLogit ~ log(densities1), family = binomial(link = "logit"))
summary(pred)
##
## Call:
## glm(formula = mortalityLogit ~ log(densities1), family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7335 -0.5483 -0.4911 -0.4526 2.1801
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.01137 0.49739 -2.033 0.0420 *
## log(densities1) -0.14946 0.07908 -1.890 0.0588 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 372.95 on 492 degrees of freedom
## Residual deviance: 369.43 on 491 degrees of freedom
## (36 observations deleted due to missingness)
## AIC: 373.43
##
## Number of Fisher Scoring iterations: 4
Here we examine distance to water sources using
water <- batdata$Distance.to.Natural.Water
water[water == "Unsure"] <- NA
water <- factor(water, c("<1 mile", "1-5 miles", "5-10 miles", "10-15 miles", ">15 miles"), ordered = TRUE)
a <- which(water == "<1 mile")
b <- which(water == "1-5 miles")
c <- which(water == "5-10 miles")
d <- which(water == "10-15 miles")
e <- which(water == ">15 miles")
ma <- mean(mortality[a])
mna <- sum(mortalityLogit[a])
mb <- mean(mortality[b])
mnb <- sum(mortalityLogit[b])
mc <- mean(mortality[c])
mnc <- sum(mortalityLogit[c])
md <- mean(mortality[d])
mnd <- sum(mortalityLogit[d])
me <- mean(mortality[e])
mne <- sum(mortalityLogit[e])
mna/length(a) # proportion of pools with drowned bats for water category "<1 mile"
## [1] 0.1082474
mnb/length(b) # proportion of pools with drowned bats for water category "1-5 miles"
## [1] 0.1288344
mnc/length(c) # proportion of pools with drowned bats for water category "5-10 miles"
## [1] 0.1333333
mnd/length(d) # proportion of pools with drowned bats for water category "10-15 miles"
## [1] 0.05
mne/length(e) # proportion of pools with drowned bats for water category ">15 miles"
## [1] 0.173913
The above numbers allow us to apply Spearman rank correlation:
waterDistanceRank <- c(1,2,3,4,5)
mortalityRank <- c(2,3,4,1,5)
cor.test(waterDistanceRank, mortalityRank, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: waterDistanceRank and mortalityRank
## S = 12, p-value = 0.5167
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4
For the 42 sections whose vertebrate fossil content was studied by Schaal & Möller (1991), we enter the proportion of bat and bird fossils, and the total fossil counts, and estimate the counts of bats and birds. We then use this information to test whether any one section has a composition that diverges significantly from the average (by the binomial test).
batProc <- c(0,0,0,0,0,0.9,2.4,1.2,2.2,0.7,1,0.9,0.3,1.2,0,0,0,0,0.8,3.7,0,0,0.7,0.9,0,2.3,1.5,1.2,1.3,2.3,0,2.5,3.2,1.7,0,1.7,0,1.2,3,0,0,0.9)
birdProc <- c(0,0,4.4,0.6,1.1,3.7,0,3.1,1.6,5,5.1,1.9,1.1,5,0,0,2.4,0,3.2,0,0,0,1.6,1.5,0,3.1,0,1.2,0,1.7,0,0,3.2,1.7,1.3,0,3.8,3.8,0,0.7,0,0.9)
No <- c(73,85,293,163,425,106,41,160,306,258,98,101,261,80,14,44,81,183,123,27,52,60,530,328,30,128,65,81,75,169,10,40,31,57,144,112,26,77,33,136,29,111)
batCount <- round(No*batProc/100)
birdCount <- round(No*birdProc/100)
pbat <- sum(batCount)/sum(No)
pbat # this is the overall proportion of bat fossils in all sections
## [1] 0.008387343
pbird <- sum(birdCount)/sum(No)
pbird # this is the overall proportion of bird fossils in all sections
## [1] 0.01849028
Now we estimate the probabilities of the actual bat fossil counts
batProbs <- rep(NA, length = length(No))
for (i in 1:length(batProbs)) {
q <- binom.test(x = batCount[i], n = No[i], p = pbat) # we take p to be the mean total proportion of bats
batProbs[i] <- q$p.value
}
batProbs
## [1] 1.00000000 1.00000000 0.18713490 0.65067797 0.05660435 0.59049564
## [7] 0.29201423 0.38847828 0.01568827 1.00000000 0.56195166 0.57288166
## [13] 0.73058201 0.49024097 1.00000000 1.00000000 1.00000000 0.41354956
## [19] 1.00000000 0.20340851 1.00000000 1.00000000 1.00000000 0.75781209
## [25] 1.00000000 0.09354975 0.42159238 0.49451649 0.46831474 0.05494669
## [31] 1.00000000 0.28602589 0.22979930 0.38127522 0.63783899 0.24185898
## [37] 1.00000000 0.47719619 0.24266499 0.63411752 1.00000000 0.60738324
which(batProbs < 0.05/20)
## integer(0)
Now we estimate the probabilities of the actual bird fossil counts
birdProbs <- rep(NA, length = length(No))
for (i in 1:length(birdProbs)) {
q <- binom.test(x = birdCount[i], n = No[i], p = pbird) # we take p to be the mean total proportion of bats
birdProbs[i] <- q$p.value
}
birdProbs
## [1] 0.647890365 0.412972349 0.003585067 0.379822132 0.370375447 0.133998020
## [7] 1.000000000 0.226662972 1.000000000 0.001188608 0.035790286 0.711114743
## [13] 0.640993924 0.061312412 1.000000000 1.000000000 0.663490823 0.054234119
## [19] 0.294771984 1.000000000 1.000000000 0.631127374 0.745531322 0.837945433
## [25] 1.000000000 0.304850171 0.635991470 1.000000000 0.651497800 1.000000000
## [31] 1.000000000 1.000000000 0.439297138 1.000000000 1.000000000 0.277912833
## [37] 0.384455089 0.170942964 1.000000000 0.526010724 1.000000000 0.727049074
which(birdProbs < 0.05/20)
## [1] 10
We calculate the number of individuals based on the percentages and the total counts given in Schaal & Möller (1991). We estimate that fishes are 115x as common as bats.
# Fish counts
Bowfin <- c(76.7,55.2,53.5,46.0,28.9,26.4,58.5,73.1,55.8,52.3,50.0,71.2,52.4,43.7,21.4,13.6,14.8,28.4,63.4,44.4,7.6,20.0,33.7,40.2,36.6,49.2,38.4,35.8,58.6,71,30,47.5,51.6,57.8,59,69.6,38.4,37.6,33.3,60.2,44.8,64.8)
Thauma <- c(1.3,2.3,1.3,0,0.2,0,21.9,12.5,28.7,29.8,23.4,13.8,32.1,41.2,57.1,65.9,67.9,61.7,27.6,51.8,90.3,71.6,56.4,48.4,46.6,30.4,50.7,51.8,25.3,2.9,70,45.8,19.3,22.8,22.2,17.8,42.3,40.2,54.5,0,0,0)
Gar <- c(15,7,15.3,16.5,8.9,5.6,17,8.7,10.4,10,15.3,9.9,11.8,8.7,14.2,9,9.8,5.4,3.2,0,1.9,6.6,5.2,5.7,13.3,11.7,9.2,7.4,10.6,18.3,0,2.5,22.5,12.2,11.8,9.8,11.5,14.2,3,10.2,13.7,13.5)
Other <- c(5.4,35.2,24.5,36.1,59.2,60.3,0,0.6,0,0.7,1,1.9,8.3,0,7.1,9,4.9,3.8,0.8,0,0,0,1.1,1.5,0,0.7,0,1.2,0,0.5,0,2.5,0,1.7,4.8,0,0,1.2,0,27.9,37.0,19.8)
fishProc <- Bowfin + Thauma + Gar + Other
fishCount <- round(No*fishProc/100)
sum(fishCount)/sum(batCount) # Proportion of all fish compared to bats
## [1] 114.8636