How well do the simulation methods work with larger rasters?

Here are the demographic parameters:

germination_fun <- vital( 
        function (N,carrying.capacity,...) {
            # note this is 'sum' applied to a RasterBrick object, which acts like rowSums
            r0 / ( 1 + migrate_raster(sum(N),competition)/carrying.capacity )
        },
        r0 = 0.01,
        competition = migration(
                                kern="gaussian",
                                sigma=100,
                                radius=300,
                                normalize=1
                            )
     )

this.demography <- demography(
        prob.seed = 0.2,
        fecundity = 200,
        prob.germination = germination_fun,
        prob.survival = 0.9,
        pollen.migration = migration(
                            kern = function (x) { exp(-sqrt(x)) },
                            sigma = 100,
                            radius = 1000,
                            normalize = NULL
                     ),
        seed.migration = migration(
                            kern = "gaussian",
                            sigma = 20,
                            radius = 400,
                            normalize = 1
                     ),
        genotypes = c("aa","aA","AA"),
        mating = mating_tensor( c("aa","aA","AA") )
    )

Raster extent

We will time a generation on rasters of different sizes. Here is a function to create a \(k\)-kilometer square random raster. (But note that the base units of the raster are in meters.)

habitat_size <- function (k) {
    habitat <- raster(xmn=-k*1000/2, xmx=k*1000/2, ymn=-k*1000/2, ymx=k*1000/2, 
          resolution=100, #fixed resolution
          crs="+proj=utm +zone=11 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
    values(habitat) <- sample( 100*c(1,2,5,NA), length(habitat), replace=TRUE )
    migrate_raster(habitat,kern="gaussian",sigma=200,radius=1000)
}

Here’s timing for one generation on rasters of various sizes:

sizes <- 2^(0:6)
size.results <- lapply( sizes, function (k) {
                  habitat <- habitat_size(k)
                  N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) )
                  names(N) <- this.demography$genotypes
                  gen.time <- system.time( NN <- generation_raster(N,this.demography,carrying.capacity=habitat) )
                  return( list( time=gen.time, N=N, NN=NN ) )
          } )

Here’s the results:

size.tab <- data.frame( area=sizes^2, ncell=sapply(lapply(size.results,"[[","N"),ncell), t(sapply( size.results, "[[", "time" )) )
size.tab
##   area  ncell user.self sys.self elapsed user.child sys.child
## 1    1    100     0.812    0.000   0.813          0         0
## 2    4    400     0.796    0.000   0.797          0         0
## 3   16   1600     0.836    0.000   0.836          0         0
## 4   64   6400     0.980    0.000   0.978          0         0
## 5  256  25600     1.588    0.004   1.592          0         0
## 6 1024 102400     3.960    0.028   3.991          0         0
## 7 4096 409600    13.520    0.012  13.538          0         0
layout(t(1:2))
with(size.tab, {
     plot(area, user.self, log='xy', xlab='total area', ylab='computation time (sec)') 
     plot(ncell, user.self, log='xy', xlab='number of raster cells', ylab='computation time (sec)') 
     } )

The computation time will depend critically on the number of cells within the radii of the migration operations; but at these settings, adding 3.216310^{4} cells increases the run time by a second.

Raster resolution

Changing the resolution at a fixed size also changes the number of cells, but affects the running time quite differently. Here is a function to create a 10-kilometer square random raster, with different resolutions. (But note that the base units of the raster are in meters.)

habitat_res <- function (resolution) {
    habitat <- raster(xmn=-10*1000/2, xmx=10*1000/2, ymn=-10*1000/2, ymx=10*1000/2, 
          resolution=resolution,
          crs="+proj=utm +zone=11 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
    values(habitat) <- sample( 100*c(1,2,5,NA), length(habitat), replace=TRUE )
    migrate_raster(habitat,kern="gaussian",sigma=200,radius=1000)
}

Here’s timing for one generation on rasters of various sizes:

resolutions <- 100*2^(-2:2)
res.results <- lapply( resolutions, function (resolution) {
                  habitat <- habitat_res(resolution)
                  N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) )
                  names(N) <- this.demography$genotypes
                  gen.time <- system.time( NN <- generation_raster(N,this.demography,carrying.capacity=habitat) )
                  return( list( time=gen.time, N=N, NN=NN ) )
          } )

Here’s the results:

res.tab <- data.frame( resolution=resolutions, ncell=sapply(lapply(res.results,"[[","N"),ncell), t(sapply( res.results, "[[", "time" )) )
res.tab
##   resolution  ncell user.self sys.self elapsed user.child sys.child
## 1         25 160000    48.888        0  48.908          0         0
## 2         50  40000     4.220        0   4.221          0         0
## 3        100  10000     1.096        0   1.097          0         0
## 4        200   2500     0.812        0   0.814          0         0
## 5        400    625     0.784        0   0.783          0         0
layout(t(1:2))
with(res.tab, {
     plot(resolution, user.self, log='xy', xlab='total area', ylab='computation time (sec)') 
     plot(ncell, user.self, log='xy', xlab='number of raster cells', ylab='computation time (sec)') 
     } )

The computation time will depend critically on the number of cells within the radii of the migration operations; but at these settings, adding 2995 cells increases the run time by a second.

Two implementations

Based on the above, we have two implementations of the method, one that works with Raster objects (so can take advantage of methods for those that work on layers too big to fit in memory); and one that precomputes a migration matrix and works directly with numeric matrices.

Let’s compare the speeds.

habitat <- habitat_size(6)
N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) )
names(N) <- this.demography$genotypes
raster.time <- microbenchmark( generation_raster(N,this.demography,carrying.capacity=habitat), times=10 )

pop <- population( 
                  habitat = habitat,
                  genotypes = this.demography$genotypes,
                  N = matrix(values(N)[!is.na(values(habitat))],ncol=3)
             )
matrix.demography <- this.demography
matrix.demography$seed.migration <- migration( matrix.demography$seed.migration, do.M=TRUE, population=habitat )
matrix.demography$pollen.migration <- migration( matrix.demography$pollen.migration, do.M=TRUE, population=habitat )
matrix.demography$prob.germination <- vital( 
        function (N,carrying.capacity,...) {
            # and this is rowSums, for the matrix
            r0 / ( 1 + migrate(competition,x=rowSums(N))/carrying.capacity )
        },
        r0 = 0.01,
        competition = migration(
                                kern="gaussian",
                                sigma=100,
                                radius=300,
                                normalize=1,
                                do.M=TRUE,
                                population=habitat
                            )
     )
matrix.time <- microbenchmark( generation(pop,matrix.demography,carrying.capacity=values(habitat)[!is.na(values(habitat))]), times=10 )

rbind( raster.time, matrix.time )
## Unit: milliseconds
##                                                                                              expr
##                                generation_raster(N, this.demography, carrying.capacity = habitat)
##  generation(pop, matrix.demography, carrying.capacity = values(habitat)[!is.na(values(habitat))])
##        min        lq      mean    median        uq       max neval
##  883.85203 887.59848 891.80543 888.30197 892.70221 909.04268    10
##   18.03591  18.07289  18.10637  18.10385  18.11742  18.23959    10

That’s a speedup of 49 times, pretty good.