library(zipcodeR) # This package is needed for population density comparison

## Warning: package 'zipcodeR' was built under R version 4.1.2

Bat pool drownings

First we read in the data, check column names, and create new matrices for the monthly data concerning live bat observations and bat drownings.

batdata <- read.csv("master.csv.1.7.2016_dup.csv", header=TRUE, row.names = 1, stringsAsFactors = TRUE)
batdata <- batdata[-which(duplicated(batdata) == TRUE),] # duplicated entry
colnames(batdata)

##  [1] "Response.Year"                 "Country"                      
##  [3] "Zip.Code"                      "Home.Area"                    
##  [5] "Home.Area.Other"               "Pool.Type"                    
##  [7] "X..Months.in.Use"              "January..In.Use."             
##  [9] "February..In.Use."             "March..In.Use."               
## [11] "April..In.Use."                "May..In.Use."                 
## [13] "June..In.Use."                 "July..In.Use."                
## [15] "August..In.Use."               "September..In.Use."           
## [17] "October..In.Use."              "November..In.Use."            
## [19] "December..In.Use."             "Pool.Shape"                   
## [21] "Pool.Length..ft."              "Pool.Width..ft."              
## [23] "Circular.Pool.Diameter..ft."   "Surface.Area..ft.2."          
## [25] "Above.Below.Ground"            "Liner.Type"                   
## [27] "Obstruction.Type"              "Obstruction.Type.Other"       
## [29] "Obstruction.Height..ft."       "Obstruction.Distance..ft."    
## [31] "Pool.Lit...Night"              "How.Often.Around.Pool"        
## [33] "Level.of.Wildlife.Observance"  "Observe.Wildlife"             
## [35] "Dawn..Obs.Wildlife."           "Daytime..Obs.Wildlife."       
## [37] "Dusk..Obs.Wildlife."           "Nighttime..Obs.Wildlife."     
## [39] "January..Obs.Wildlife."        "February..Obs.Wildlife."      
## [41] "March..Obs.Wildlife."          "April..Obs.Wildlife."         
## [43] "May..Obs.Wildlife."            "June..Obs.Wildlife."          
## [45] "July..Obs.Wildlife."           "August..Obs.Wildlife."        
## [47] "September..Obs.Wildlife."      "October..Obs.Wildlife."       
## [49] "November..Obs.Wildlife."       "December..Obs.Wildlife."      
## [51] "Wildlife.Observed"             "Observe.Bats."                
## [53] "Observe.Bats...Y.N."           "Dawn..Obs.Bats."              
## [55] "Daytime..Obs.Bats."            "Dusk..Obs.Bats."              
## [57] "Nighttime..Obs.Bats."          "January..Obs.Bats."           
## [59] "February..Obs.Bats."           "March..Obs.Bats."             
## [61] "April..Obs.Bats."              "May..Obs.Bats."               
## [63] "June..Obs.Bats."               "July..Obs.Bats."              
## [65] "August..Obs.Bats."             "September..Obs.Bats."         
## [67] "October..Obs.Bats."            "November..Obs.Bats."          
## [69] "December..Obs.Bats."           "Observe.Bats.Drink."          
## [71] "Bat.Mortality"                 "Bat.Mortality..Y.N."          
## [73] "January..Bat.Mortality."       "February..Bat.Mortality."     
## [75] "March..Bat.Mortality."         "April..Bat.Mortality."        
## [77] "May..Bat.Mortality."           "June..Bat.Mortality."         
## [79] "July..Bat.Mortality."          "August..Bat.Mortality."       
## [81] "September..Bat.Mortality."     "October..Bat.Mortality."      
## [83] "November..Bat.Mortality."      "December..Bat.Mortality."     
## [85] "Distance.to.Natural.Water"     "Bat.Type"                     
## [87] "Heard.of.Management.Action"    "Implemented.Management.Action"
## [89] "Management.Action"             "ZIP"                          
## [91] "Area"                          "State"                        
## [93] "Temp.Group"                    "Average.Summer.Temp..deg.C."

use <- batdata[,8:19]
observations <- batdata[,58:69]
drownings <- batdata[,73:84]
use[is.na(use)] <- 0 # fill out data with 0's where NAs (no responses) are present
observations[is.na(observations)] <- 0 # fill out data with 0's where NAs (no responses) are present
drownings[is.na(drownings)] <- 0 # fill out data with 0's where NAs (no responses) are present
table(rowSums(observations)) # effective year length

## 
##   0   1   2   3   4   5   6   7   8   9  12 
## 117  37  77 115  87  45  27  12   5   3   4

Preliminary data analysis. Are the cases in which the respondents seldom visited the pool important? Do people use the pool more frequently than they observe bats (last line gives proportion of cases in which this obtains)?

rmx <- which(batdata$How.Often.Around.Pool == "Few times")
rowSums(observations[rmx,])

##   3   4  10  11  33  90 101 163 171 204 240 241 262 271 281 
##   4   4   7   3   3   5   0   0   7   7   0   5   3   1   3

mean(rowSums(observations[rmx,]))

## [1] 3.466667

mean(rowSums(observations))

## [1] 2.778828

length(which(rowSums(use) - rowSums(observations) > 0)) / length(rowSums(use))

## [1] 0.879017

How large is the average pool in the data set?

mean(batdata$Surface.Area..ft.2., na.rm = TRUE)

## [1] 678.4202

Here we look at how many cases there were in which no monthly observations of live bats were made at all, and in how many of these had drowned bats were reported.

noObs <- which(rowSums(observations) == 0)
noObs <- unname(noObs)
length(noObs) # cases in which no monthly observations of live bats were made at all

## [1] 117

o <- rowSums(observations)[-noObs]
d <- rowSums(drownings)[-noObs]
names(which(o - d == 0)) # cases in which drowned bats were observed in every month live bats were

## [1] "86"  "167" "171" "234" "273" "315" "421" "498" "528"

which(rowSums(drownings)[noObs] > 0) # cases in which drowned bats were reported but no live ones

## 163 206 468 492 493 513 
##  44  56 107 110 111 113

Now we initiate the vectors that will contain mortality data responses. “lambdas” is the vector of \(\lambda\) parameters from Poisson probability mass function. “yearlength” is the effective year length (as defined in text), namely the number of months in the year during which bats were active (+1, for statistical reasons: see text). “mortality” is the estimated annual mortality by drowning for each pool individually, calculated as \(n \lambda\).

lambdas <- rep(NA, length = nrow(observations))
yearlength <- rep(NA, length = nrow(observations))
mortality <- rep(NA, length = nrow(observations))
for (i in 1:nrow(observations)) {
  q <- which(observations[i,] != 0)
  if (length(q) > 0) { # this covers cases in which bats were observed (p > 0)
    qmin <- min(q)
    qmax <- max(q)
    yearlength[i] <- qmax - qmin + 1 + 1 # the first +1 is arithmetic, the second +1 is our workaround for p0 = 0
    d <- drownings[i,qmin:qmax] # restrict contemplation of drownings to those same months
    if (sum(d) > 0) {
      p0 <- (yearlength[i] - sum(d))/yearlength[i]
      lambdas[i] <- -log(p0)
      mortality[i] <- (yearlength[i]-1) * lambdas[i]
    } else { # if no drowned bats were observed
      mortality[i] <- 0
    }
  } else { # for cases in which no live bats were ever observed
    q <- which(drownings[i,] != 0)
    if (length(q) > 0) { # for cases in which drowned bats were reported (without live ones)
      qmin <- min(q)
      qmax <- max(q)
      yearlength[i] <- qmax - qmin + 1 + 1
      d <- drownings[i,qmin:qmax]
      p0 <- (yearlength[i] - sum(d))/yearlength[i]
      lambdas[i] <- -log(p0)
      mortality[i] <- (yearlength[i]-1) * lambdas[i]
    } else { # for cases in which neither live bats nor drowned bats were observed
      mortality[i] <- 0
    }
  }
}

Here we can examine effective year length (-1, so it corresponds with calendar months),

table(yearlength - 1)

## 
##   1   2   3   4   5   6   7   8   9  10  12 
##  43  73 116  84  46  29  11   6   4   2   4

summary(mortality)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2497  0.0000 14.5561

summary(mortality[mortality > 0])

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6931  0.8630  0.8926  2.0320  2.0188 14.5561

summary(lambdas)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1542  0.2231  0.2877  0.5089  0.6931  2.0794     464

As the vast majority of respondents reported no drowned bats (mortality = 0), we divide the cases into two outcomes (mortality = 0, mortality > 0) and conduct logistic regression on parameters explained in text.

mortalityLogit <- ceiling(mortality)
length(mortality == 0) == length(mortalityLogit == 0) # make sure there are no rounding errors

## [1] TRUE

mortalityLogit[mortalityLogit > 0] <- 1
mortalityLogit # check result

##   [1] 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
##  [38] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [75] 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
## [223] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1
## [408] 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0
## [445] 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1
## [482] 1 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0
## [519] 0 0 0 0 0 0 1 0 0 0 0

Now test mortality against pool surface area. We use the natural logarithm of surface area (the default base of the function log() is \(e\)) because the distribution is skewed (see text).

pred <- glm(mortalityLogit ~ log(batdata$Surface.Area..ft.2.), family = binomial(link = "logit"))
summary(pred)

## 
## Call:
## glm(formula = mortalityLogit ~ log(batdata$Surface.Area..ft.2.), 
##     family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6024  -0.5140  -0.5079  -0.5013   2.1515  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)                      -1.53705    1.19083  -1.291    0.197
## log(batdata$Surface.Area..ft.2.) -0.07084    0.19348  -0.366    0.714
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 279.41  on 375  degrees of freedom
## Residual deviance: 279.28  on 374  degrees of freedom
##   (153 observations deleted due to missingness)
## AIC: 283.28
## 
## Number of Fisher Scoring iterations: 4

Now test mortality against distance to obstruction:

pred <- glm(mortalityLogit ~ batdata$Obstruction.Distance..ft., family = binomial(link = "logit"))
summary(pred)

## 
## Call:
## glm(formula = mortalityLogit ~ batdata$Obstruction.Distance..ft., 
##     family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6746  -0.5368  -0.5192  -0.4987   2.0900  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       -2.07887    0.26322  -7.898 2.84e-15 ***
## batdata$Obstruction.Distance..ft.  0.01429    0.01799   0.794    0.427    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 315.97  on 410  degrees of freedom
## Residual deviance: 315.37  on 409  degrees of freedom
##   (118 observations deleted due to missingness)
## AIC: 319.37
## 
## Number of Fisher Scoring iterations: 4

Now test mortality against height of obstruction (creating first an appropriate numerical vector):

pred <- glm(mortalityLogit ~ batdata$Obstruction.Height..ft., family = binomial(link = "logit"))
summary(pred)

## 
## Call:
## glm(formula = mortalityLogit ~ batdata$Obstruction.Height..ft., 
##     family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6868  -0.5369  -0.5294  -0.5255   2.0266  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -1.924214   0.198756  -9.681   <2e-16 ***
## batdata$Obstruction.Height..ft.  0.002000   0.005233   0.382    0.702    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 324.10  on 412  degrees of freedom
## Residual deviance: 323.96  on 411  degrees of freedom
##   (116 observations deleted due to missingness)
## AIC: 327.96
## 
## Number of Fisher Scoring iterations: 4

Here we test the combination of distance to and height of obstruction:

pred <- glm(mortalityLogit ~ batdata$Obstruction.Distance..ft. + batdata$Obstruction.Height..ft., family = binomial(link = "logit"))
summary(pred)

## 
## Call:
## glm(formula = mortalityLogit ~ batdata$Obstruction.Distance..ft. + 
##     batdata$Obstruction.Height..ft., family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7614  -0.5447  -0.5193  -0.4914   2.1124  
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       -2.1399800  0.2865090  -7.469 8.07e-14 ***
## batdata$Obstruction.Distance..ft.  0.0196417  0.0196184   1.001    0.317    
## batdata$Obstruction.Height..ft.    0.0006797  0.0057797   0.118    0.906    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 314.30  on 404  degrees of freedom
## Residual deviance: 313.19  on 402  degrees of freedom
##   (124 observations deleted due to missingness)
## AIC: 319.19
## 
## Number of Fisher Scoring iterations: 4

To examine the influence of pool area lighting, we use Fisher’s exact test (as we have a contingency table). The last two columns carry data, not NA’s.

fisher.test(table(as.factor(mortalityLogit), batdata$Pool.Lit...Night)[,2:3])

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(as.factor(mortalityLogit), batdata$Pool.Lit...Night)[, 2:3]
## p-value = 0.1231
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.8454741 2.8475919
## sample estimates:
## odds ratio 
##   1.570599

To examine the influence of liner type, we use Fisher’s exact test (as we have a contingency table). The last two columns carry data, not NA’s.

fisher.test(table(as.factor(mortalityLogit), batdata$Liner.Type)[,2:3])

## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(as.factor(mortalityLogit), batdata$Liner.Type)[, 2:3]
## p-value = 0.04313
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.001235 3.163464
## sample estimates:
## odds ratio 
##    1.76647

Whether respondents considered themselves to be highly observant of nature or not is more closely associated with bat mortality in pools:

pred <- glm(mortalityLogit ~ batdata$Level.of.Wildlife.Observance, family = binomial(link = "logit"))
summary(pred)

## 
## Call:
## glm(formula = mortalityLogit ~ batdata$Level.of.Wildlife.Observance, 
##     family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5630  -0.5630  -0.5226  -0.4494   2.2636  
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          -2.56162    0.42929  -5.967 2.42e-09 ***
## batdata$Level.of.Wildlife.Observance  0.07997    0.05232   1.528    0.126    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 392.90  on 523  degrees of freedom
## Residual deviance: 390.42  on 522  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 394.42
## 
## Number of Fisher Scoring iterations: 4

To examine whether the “naturalness” of the surrounding environment, as inferred from the population density of the zip code of the respondent, is related to bat mortality, we return to logistic regression. Note that in one zip code, 92384, a P.O. Box in Inyo County, California, has a population so low, compared to its area, that it rounds to 0 in the data set. (There are surprisingly two responses for different pools: one commercial, the other private.) Hence, we record it as NA. No drowned bats were observed in this zip code. Note also that 34 respondents gave an incorrect zip code, or their zip code had no associated information in the database. Population density for the area surrounding these 36 respondents is unknown (NA). The mean mortality for pools from 61 duplicated zip codes was 0.23, nearly identical to the overall average.

densities1 <- rep(NA, length = nrow(batdata))
zip_code_all <- zip_code_db
for (i in 1:length(densities1)) {
  x <- which(zip_code_all$zipcode == batdata$Zip.Code[i]) # gives integer(0) if no match, hence:
  if (length(x) > 0) {
    densities1[i] <- zip_code_all$population_density[x]
  }
}
densities1[3:4] <- NA
summary(densities1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       3     176     674    1599    2318   30841      36

mean(mortality[duplicated(batdata$Zip.Code)])

## [1] 0.2337046

pred <- glm(mortalityLogit ~ log(densities1), family = binomial(link = "logit"))
summary(pred)

## 
## Call:
## glm(formula = mortalityLogit ~ log(densities1), family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7335  -0.5483  -0.4911  -0.4526   2.1801  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)  
## (Intercept)     -1.01137    0.49739  -2.033   0.0420 *
## log(densities1) -0.14946    0.07908  -1.890   0.0588 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 372.95  on 492  degrees of freedom
## Residual deviance: 369.43  on 491  degrees of freedom
##   (36 observations deleted due to missingness)
## AIC: 373.43
## 
## Number of Fisher Scoring iterations: 4

Here we examine distance to water sources using

water <- batdata$Distance.to.Natural.Water
water[water == "Unsure"] <- NA
water <- factor(water, c("<1 mile", "1-5 miles", "5-10 miles", "10-15 miles", ">15 miles"), ordered = TRUE)
a <- which(water == "<1 mile")
b <- which(water == "1-5 miles")
c <- which(water == "5-10 miles")
d <- which(water == "10-15 miles")
e <- which(water == ">15 miles")
ma <- mean(mortality[a])
mna <- sum(mortalityLogit[a])
mb <- mean(mortality[b])
mnb <- sum(mortalityLogit[b])
mc <- mean(mortality[c])
mnc <- sum(mortalityLogit[c])
md <- mean(mortality[d])
mnd <- sum(mortalityLogit[d])
me <- mean(mortality[e])
mne <- sum(mortalityLogit[e])
mna/length(a) # proportion of pools with drowned bats for water category "<1 mile"

## [1] 0.1082474

mnb/length(b) # proportion of pools with drowned bats for water category "1-5 miles"

## [1] 0.1288344

mnc/length(c) # proportion of pools with drowned bats for water category "5-10 miles"

## [1] 0.1333333

mnd/length(d) # proportion of pools with drowned bats for water category "10-15 miles"

## [1] 0.05

mne/length(e) # proportion of pools with drowned bats for water category ">15 miles"

## [1] 0.173913

The above numbers allow us to apply Spearman rank correlation:

waterDistanceRank <- c(1,2,3,4,5)
mortalityRank <- c(2,3,4,1,5)
cor.test(waterDistanceRank, mortalityRank, method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  waterDistanceRank and mortalityRank
## S = 12, p-value = 0.5167
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho 
## 0.4

Messel flying vertebrate counts

For the 42 sections whose vertebrate fossil content was studied by Schaal & Möller (1991), we enter the proportion of bat and bird fossils, and the total fossil counts, and estimate the counts of bats and birds. We then use this information to test whether any one section has a composition that diverges significantly from the average (by the binomial test).

batProc <- c(0,0,0,0,0,0.9,2.4,1.2,2.2,0.7,1,0.9,0.3,1.2,0,0,0,0,0.8,3.7,0,0,0.7,0.9,0,2.3,1.5,1.2,1.3,2.3,0,2.5,3.2,1.7,0,1.7,0,1.2,3,0,0,0.9)
birdProc <- c(0,0,4.4,0.6,1.1,3.7,0,3.1,1.6,5,5.1,1.9,1.1,5,0,0,2.4,0,3.2,0,0,0,1.6,1.5,0,3.1,0,1.2,0,1.7,0,0,3.2,1.7,1.3,0,3.8,3.8,0,0.7,0,0.9)
No <- c(73,85,293,163,425,106,41,160,306,258,98,101,261,80,14,44,81,183,123,27,52,60,530,328,30,128,65,81,75,169,10,40,31,57,144,112,26,77,33,136,29,111)
batCount <- round(No*batProc/100)
birdCount <- round(No*birdProc/100)
pbat <- sum(batCount)/sum(No)
pbat # this is the overall proportion of bat fossils in all sections

## [1] 0.008387343

pbird <- sum(birdCount)/sum(No)
pbird # this is the overall proportion of bird fossils in all sections

## [1] 0.01849028

Now we estimate the probabilities of the actual bat fossil counts

batProbs <- rep(NA, length = length(No))
for (i in 1:length(batProbs)) {
  q <- binom.test(x = batCount[i], n = No[i], p = pbat) # we take p to be the mean total proportion of bats
  batProbs[i] <- q$p.value
}
batProbs

##  [1] 1.00000000 1.00000000 0.18713490 0.65067797 0.05660435 0.59049564
##  [7] 0.29201423 0.38847828 0.01568827 1.00000000 0.56195166 0.57288166
## [13] 0.73058201 0.49024097 1.00000000 1.00000000 1.00000000 0.41354956
## [19] 1.00000000 0.20340851 1.00000000 1.00000000 1.00000000 0.75781209
## [25] 1.00000000 0.09354975 0.42159238 0.49451649 0.46831474 0.05494669
## [31] 1.00000000 0.28602589 0.22979930 0.38127522 0.63783899 0.24185898
## [37] 1.00000000 0.47719619 0.24266499 0.63411752 1.00000000 0.60738324

which(batProbs < 0.05/20)

## integer(0)

Now we estimate the probabilities of the actual bird fossil counts

birdProbs <- rep(NA, length = length(No))
for (i in 1:length(birdProbs)) {
  q <- binom.test(x = birdCount[i], n = No[i], p = pbird) # we take p to be the mean total proportion of bats
  birdProbs[i] <- q$p.value
}
birdProbs

##  [1] 0.647890365 0.412972349 0.003585067 0.379822132 0.370375447 0.133998020
##  [7] 1.000000000 0.226662972 1.000000000 0.001188608 0.035790286 0.711114743
## [13] 0.640993924 0.061312412 1.000000000 1.000000000 0.663490823 0.054234119
## [19] 0.294771984 1.000000000 1.000000000 0.631127374 0.745531322 0.837945433
## [25] 1.000000000 0.304850171 0.635991470 1.000000000 0.651497800 1.000000000
## [31] 1.000000000 1.000000000 0.439297138 1.000000000 1.000000000 0.277912833
## [37] 0.384455089 0.170942964 1.000000000 0.526010724 1.000000000 0.727049074

which(birdProbs < 0.05/20)

## [1] 10

Messel fish counts

We calculate the number of individuals based on the percentages and the total counts given in Schaal & Möller (1991). We estimate that fishes are 115x as common as bats.

# Fish counts
Bowfin <- c(76.7,55.2,53.5,46.0,28.9,26.4,58.5,73.1,55.8,52.3,50.0,71.2,52.4,43.7,21.4,13.6,14.8,28.4,63.4,44.4,7.6,20.0,33.7,40.2,36.6,49.2,38.4,35.8,58.6,71,30,47.5,51.6,57.8,59,69.6,38.4,37.6,33.3,60.2,44.8,64.8)
Thauma <- c(1.3,2.3,1.3,0,0.2,0,21.9,12.5,28.7,29.8,23.4,13.8,32.1,41.2,57.1,65.9,67.9,61.7,27.6,51.8,90.3,71.6,56.4,48.4,46.6,30.4,50.7,51.8,25.3,2.9,70,45.8,19.3,22.8,22.2,17.8,42.3,40.2,54.5,0,0,0)
Gar <- c(15,7,15.3,16.5,8.9,5.6,17,8.7,10.4,10,15.3,9.9,11.8,8.7,14.2,9,9.8,5.4,3.2,0,1.9,6.6,5.2,5.7,13.3,11.7,9.2,7.4,10.6,18.3,0,2.5,22.5,12.2,11.8,9.8,11.5,14.2,3,10.2,13.7,13.5)
Other <- c(5.4,35.2,24.5,36.1,59.2,60.3,0,0.6,0,0.7,1,1.9,8.3,0,7.1,9,4.9,3.8,0.8,0,0,0,1.1,1.5,0,0.7,0,1.2,0,0.5,0,2.5,0,1.7,4.8,0,0,1.2,0,27.9,37.0,19.8)
fishProc <- Bowfin + Thauma + Gar + Other
fishCount <- round(No*fishProc/100)
sum(fishCount)/sum(batCount) # Proportion of all fish compared to bats

## [1] 114.8636