Set up and read Data and shapefiles

  1. Set things up
library(RandomFields)
library(INLA)
library(inlabru)
library(sp)
library(spatstat)
library(sf)
library(scico)
library(tidyverse)
library(patchwork)
library(maptools)

theme_map =  
  theme_light() + 
    theme(axis.ticks.x = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank()) 

# function that computes the value of the covariate in any point of the area of interest.
# this is required by inlabru

f.fill = function(x,y, covar) 
{
  spp <- SpatialPoints(data.frame(x=x,y=y),
                       proj4string = CRS(my_crs))
  v <- over(spp, covar) 
  v[is.na(v)] <- 0 # NAs are a problem! Remove them
  return(v[,1])
}
  1. Read the shapefile (area of interest, ferry tracks)
# read shapefiles -------------------------------------------

poly = read_sf("shape_interest/shape_interest.shp")
poly_sp = as(poly,"Spatial")

ferry_sp = read_sf("ferry_lines/ferry_lines.shp") %>%
  st_set_crs(proj4string(poly_sp))
ferry_sp = as(ferry_sp, "Spatial")

my_crs = proj4string(poly_sp)

depth =  read.table("depth.csv") 
depth_SPDF = SpatialPixelsDataFrame(points = cbind(depth$x,depth$y), 
                                    data = data.frame(depth = depth$z),
                                    proj4string = CRS(my_crs)) 


int_social = read.table("intensity.dat", header = T)
int_social_sp = SpatialPixelsDataFrame(cbind(x = int_social$x, y = int_social$y), 
                                  data = data.frame(cov = int_social$z),
                                  proj4string = CRS(my_crs))


win <- as.owin(poly)

Create the mesh for the SPDE model

# CREATE THE MESH
boundary = as(st_simplify(poly, dTolerance = 1.3),"Spatial") 
max.edge = 10
bound.outer = 50
mesh = inla.mesh.2d(boundary = boundary,
                          max.edge = c(1,4)*max.edge,
                          # - use 5 times max.edge in the outer extension/offset/boundary
                          crs = CRS(my_crs),
                          offset = c(max.edge, bound.outer))

A = inla.spde.make.A(mesh = mesh, loc = as.matrix(depth[,c(1,2)]))

Data Simulation

We simulate a point process. This is the “real” process that we then thin according to two different observation process

# simulate point process -------------------------------------------------
RFoptions(seed=67356453)


# create the intensity function
# linear part
beta0 = -4
beta_depth = -1
lambda = beta0 + beta_depth * depth$z
im = as.im(data.frame(x = depth$x, y = depth$y, z = lambda), W = win)


# parameters for the Gaussian field
sigma2x <- .5^2
range <- 100

# Simulate the sightings process
data <- rLGCP(model="matern", 
              mu=im, 
              var=sigma2x, 
              scale=range/sqrt(8), 
              nu=1, 
              win = win,
              saveLambda = TRUE)
data.sp <- SpatialPoints(cbind(data$x,data$y),
                         proj4string = CRS(my_crs)) 
# Save the intensity surface for later reference
Lambda = raster::raster(attributes(data)$Lambda)
Left: observed point process, in the background is the density of the LGCP. Right: density for the SM observation process

Left: observed point process, in the background is the density of the LGCP. Right: density for the SM observation process

Thin the true process and create two observed datasets

Here we imagine two observation process, one on a transect (as the ferry data) and the other similar to the SM data.

  • For the SM data we imagine that the probability of keeping an observation in the dataset is higher where the density in Figure @ref(int:plot) (right) is higher. The detection function is:

\[\begin{equation} g_{1}(s) = \Phi(\frac{1}{\xi_{1}} d_{1}(s)-\mu_1) (\#eq:g1) \end{equation}\] where \(d_{1}(s)\) is the density in Figure @ref(int:plot) (right) , and \(\Phi\) is the normal cumulative distribution function (cdf) with \(\mu_1=\) 0 and $_1=$0.5 as location and scale parameters, respectively.

  • For the ferry data, the detection function is

\[\begin{equation} g_{2}(s)=\exp\left(-\frac{d_2(s)^2}{2\ \xi_2^2}\right) (\#eq:g1) \end{equation}\]

where \(d_2(\cdot)\) is the is the perpendicular distance to the ferry track and \(\xi=\) 2 is a scale parameter.

# function to compute the distance between a point in space and the nearest ferry track
surf2 = function(x,y)
{
  
  d = SpatialPoints(cbind(x,y),
                    proj4string = CRS(my_crs))
  d1 = rgeos::gDistance(d, ferry_sp, byid=TRUE)
  z = apply(d1,2,min)
  return(z)
}

# thinning the data points

# complete dataset
all_data = data.frame(x = data$x,
                      y = data$y)
  

# dataset from SM observation process
data1 = all_data %>% 
  mutate(surf = f.fill(x,y,int_social_sp)) %>%
  mutate(pdetect = pnorm(surf, mean = location_detect1, sd = scale_detect1)) %>%
  mutate(u = runif(data$n)) %>%
  mutate(detect = u<pdetect) %>%
  dplyr::filter(detect)

# dataset from ferry observation process
data2 = all_data %>% 
  mutate(surf = surf2(x,y)) %>%
  mutate(pdetect = exp(-0.5*(surf/ sig_detect2)^2),
         u = runif(data$n)) %>%
  mutate(detect = u<pdetect) %>%
  dplyr::filter(detect)

inlabru requires the dataset to be SpatialPointsDataFrame objects

# create the datasets as SP and all maps as SPDF --------------------------
data1_sp = SpatialPointsDataFrame(cbind(x = data1$x, y = data1$y), 
                                           data = data.frame(d = rep(1,length(data1$x))),
                                           proj4string = CRS(my_crs)) 

data2_sp = SpatialPointsDataFrame(cbind(x = data2$x, y = data2$y), 
                                  data = data.frame(d = rep(1,length(data2$x))),
                                  proj4string =CRS(my_crs))  

# we need to add the distance from the ferry track for the ferry data
d1 = rgeos::gDistance(data2_sp, ferry_sp, byid=TRUE)
d1 = apply(d1,2,min)
data2_sp$dist_obs = d1

Model Setup

Our model assumes that the “true” process is LGCP with log-intensity:

\[ \lambda(s) = \alpha + \beta x(s) + \omega(s) \]

where \(\alpha\) is a common itercept, \(\beta\) is the regression coefficient for the covariate \(x(s)\) (the depth) and \(\omega(s)\) is a 0 mean Gaussian process with Matern covariance of order 1 with range \(\rho\) and standard deviation \(\sigma\).

We assume the folowing priors - \(\alpha,\beta\sim\mathbf{N}(0,0.1^2)\)

We assume further that the “true” process is observed in two different ways (conditionally independent given \(\lambda(s)\)) so that the two observed log-itesity are \[ \lambda_1(s) = \lambda(s) + g_1(s) \lambda_2(s) = \lambda(s) + g_2(s) \] where \(g_1(s)\) and \(g_2(s)\) are defined in Equations @ref(eq:g1) and @ref(eq:g2) respectively.

We use the following prior for the parameters \(\xi_1, \mu_1, \xi_2\) in \(g_1(s)\) and \(g_2(s)\)

# Create the SPDE model 
matern <-  inla.spde2.pcmatern(mesh,
                                   prior.sigma = c(.7, 0.01),
                                   prior.range = c(100, 0.5))


# Transformation function the the \xi paramters
qexppnorm <- function(x, rate) {
  qexp(pnorm(x, lower.tail = FALSE, log.p = TRUE),
       rate = rate, 
       log.p = TRUE, 
       lower.tail = FALSE)
}

# Define the two detection functions --------------------------------------
log_detect_social = function(x,y, t1, t2)
{
  dens = f.fill(x,y,int_social_sp)
  pnorm(dens/qexppnorm(t1, rate = rate_detect1) +
          t2,
        log.p = TRUE)
}

log_detect_ferry = function(dist, sig)
{ 
  -0.5*(dist/ sig)^2
}
# Define the model components


cmp = ~ 
  SPDE(main = coordinates, model = matern) +
  depth(depth_SPDF) +
  Intercept(1)+
  sig_detect2(main = 1, model = "linear",
             mean.linear = 0,
             prec.linear = 1) +
  scale_detect1(1, 
                mean.linear = 0,
                prec.linear = 1)+
  location_detect1(1,  
                   mean.linear = 0,
                   prec.linear = 10) 

# Define the formula for the two observed processes
formula_data1 = coordinates   ~ 
  Intercept +
  depth  + 
  SPDE +
  log_detect_social(x,y,location_detect1, scale_detect1)
  
formula_data2 = coordinates + dist_obs   ~ 
  Intercept +
  depth  + 
  SPDE +
  log_detect_ferry(dist_obs,qexppnorm(sig_detect2,rate =rate_detect2))

# Define the likelihood for the two observed processes
lik_data1 <- like("cp",
                        formula = formula_data1,
                        samplers = poly_sp, 
                        domain = list(coordinates = mesh),
                        data = data1_sp)

lik_data2 <- like("cp",
                  formula = formula_data2,
                  samplers = ferry_sp,
                  domain = list(coordinates = mesh,
                                dist_obs = inla.mesh.1d(seq(0, 7, length.out = 5),
                                                        degree = 1)),
                  data = data2_sp)

# Finally set things together and run the model
bru_options_set(bru_verbose = 1)



fit <- bru(components = cmp,  
         lik_data1,
         lik_data2,
         options = list(verbose = F,
                        num.threads = "1:1"))

Results

Lets now look at some results.

Linear effects and parameters of the Gaussian field

Estimated posterior marginals for \(\beta_0\) and \(\beta_1\) and for the parameters \(\rho\) and \(\sigma\) of the Gaussian field \(\omega(s)\)

Detection function for the SM data

Estimated detection function for the SM data with 95% credible interval. The red line indicates the true detection function.

# detection function for ferry data
scale_social = function(...)
{
  qexppnorm(sig_detect2, rate = 1)
}

# we sample from the model
samples = inla.posterior.sample(500, fit)

scale1 = inla.posterior.sample.eval(fun = scale_social,samples = samples)

df = data.frame(scale = as.numeric(scale1),
           loc = as.numeric(inla.posterior.sample.eval("location_detect1", samples)))

 mapply(function(x,y){pnorm(q = ((seq(-4,3,0.1)/qexppnorm(x, rate = 1) + y)))}, x = df$scale, y = df$loc) %>%
  as.data.frame() %>%
  mutate(x = seq(-4,3,0.1)) %>%
  pivot_longer(-x) %>%
  group_by(x) %>% summarise(m = median(value),
                            q1 = quantile(value, 0.025),
                            q2 = quantile(value, 0.975)) %>%
  ggplot() + geom_line(aes(x,m)) +
  geom_ribbon(aes(x,ymin =  q1, ymax = q2), alpha = 0.2) +
  geom_line(data = data.frame(x = seq(-4,3,0.1)) %>%
              mutate(y = pnorm(seq(-4,3,0.1)/qexppnorm(scale_detect1, rate = 1)) +
          location_detect1),
            aes(x,y),color = "red") +
  coord_cartesian(ylim=c(0,1))

Detection function for the Ferry data

Estimated detection function for the ferry data with 95% credible interval. THe red line indicates the true detection function.

# detection function for ferry data
detection_ferry = function(...)
{
  sig = qexppnorm(sig_detect2, rate = rate_detect2)
  return(sig)
}


scale2 = inla.posterior.sample.eval(fun = detection_ferry,samples = samples)
data.frame(apply(scale2, 2, function(x) exp(-0.5*(seq(0,7,0.1)/ x)^2))) %>%
  mutate(x = seq(0,7,0.1)) %>%
  pivot_longer(-x) %>%
  group_by(x) %>% summarise(m = median(value),
                            q1 = quantile(value, 0.025),
                            q2 = quantile(value, 0.975)) %>%
  ggplot() + geom_line(aes(x,m)) +
  geom_ribbon(aes(x,ymin =  q1, ymax = q2), alpha = 0.2) +
  geom_line(data = data.frame(x = seq(0,7,0.1)) %>%
              mutate(y = exp(-0.5*(x/sig_detect2 )^2)),
            aes(x,y),color = "red")

Estimated intensity surface

The function generate in inlabru can generate from the fitted model

Posterior median and RWPCI

pxl = pixels(mesh ,nx = 200, ny = 200, mask =poly_sp)

samples_fit = generate(fit,  data = pxl, ~ exp(Intercept + depth +  SPDE))

## compute the posterior median of the intensity
p1 = data.frame(x = coordinates(pxl)[,1],
           y = coordinates(pxl)[,2],
           z = apply(samples_fit,1,median)) %>%
  ggplot() + geom_tile(aes(x,y,fill = z)) + scale_fill_scico() +
  theme_map + theme(legend.position = "none") +
  geom_sf(data = poly, alpha = 0) +
  ggtitle("post median")


## compute the  RWPCI of the intensity
p2 = data.frame(x = coordinates(pxl)[,1],
           y = coordinates(pxl)[,2],
           m = apply(samples_fit,1,median),
           q1 = apply(samples_fit,1,quantile, 0.25),
           q2 = apply(samples_fit,1,quantile, 0.75)) %>%
  mutate(z = (q2-q1)/m) %>%
  ggplot() + geom_tile(aes(x,y,fill = z)) + scale_fill_scico() +
  theme_map + theme(legend.position = "none") +
  geom_sf(data = poly, alpha = 0)+
  ggtitle("RWPCI")


p1+p2

Posterior samples

We can also have a look at some simulated posterior samples surfaces, these are all coherent with the estimated model

ss = sample(1:100,4)
p1 = data.frame(x = coordinates(pxl)[,1],
           y = coordinates(pxl)[,2],
           z = samples_fit[,ss[1]]) %>%
  ggplot() + geom_tile(aes(x,y,fill = z)) + scale_fill_scico() +
  theme_map + theme(legend.position = "none") +
  geom_sf(data = poly, alpha = 0) 
p2 = data.frame(x = coordinates(pxl)[,1],
           y = coordinates(pxl)[,2],
           z = samples_fit[,ss[2]]) %>%
  ggplot() + geom_tile(aes(x,y,fill = z)) + scale_fill_scico() +
  theme_map + theme(legend.position = "none") +
  geom_sf(data = poly, alpha = 0) 

p3 = data.frame(x = coordinates(pxl)[,1],
           y = coordinates(pxl)[,2],
           z = samples_fit[,ss[3]]) %>%
  ggplot() + geom_tile(aes(x,y,fill = z)) + scale_fill_scico() +
  theme_map + theme(legend.position = "none") +
  geom_sf(data = poly, alpha = 0)

p4 = data.frame(x = coordinates(pxl)[,1],
           y = coordinates(pxl)[,2],
           z = samples_fit[,ss[4]]) %>%
  ggplot() + geom_tile(aes(x,y,fill = z)) + scale_fill_scico() +
  theme_map + theme(legend.position = "none") +
  geom_sf(data = poly, alpha = 0) 


p5 = ggplot() + gg(r3, aes(color = layer)) + scale_color_scico() +
 theme_map + geom_sf(data = poly, alpha = 0) + theme(legend.position = "none") +
  ggtitle("True intensity")


p1+p2+p4+p4 + p5  + plot_layout(ncol = 2)