Animal
Spatial
Ecology
Members
Mathieu Basille
Clément Calenge
Jodie Martin
Bram van Moorter
Vincent Tolon
Resources
EcoTeX
Training courses
Functions
Webmail

Additional material

by Mathieu Basille.

A set of functions to split year-round space-use measurements into biological seasons, completed with additional functions to explore and simplify these seasons. Reference: Basille M., Fortin D., Dussault C., Ouellet J.-P., Courtois R. Ecologically based definition of seasons clarifies predator-prey interactions. Ecography, in press.

The 'Gap' function correctly compute the gap statistic (weighted or not, weighted by default). The function works on a matrix of data and returns a data frame with different components:

Gap <- function (data, from = 1, to = 10, nsim = 50, ref.dist = "pc", 
    clust.method = "k-means", dist.method = "euclidean", weighted = TRUE, 
    tol = 1) 
{
    clust <- function(mat, nclust, clust.method) {
        set.seed(1)
        if (nclust == 1) 
            return(rep(1, nrow(mat)))
        else if (clust.method == "pam") 
            return(pam(mat, nclust)$cluster)
        else if (clust.method == "k-means") 
            return(kmeans(mat, nclust, 100)$cluster)
        else if (clust.method == "diana") 
            return(cutree(as.hclust(diana(dist(mat))), k = nclust))
        else if (clust.method %in% c("single", "complete", "average", 
            "ward", "mcquitty", "median", "centroid")) 
            return(cutree(hclust(dist(mat), method = method), 
                nclust))
        else stop("Wrong clustering method")
    }
    computeWk <- function(mat, cl, dist.method = "euclidean", 
        weighted = TRUE) {
        Wk <- 0
        for (cli in unique(cl)) {
            if (!weighted) {
                if (dist.method == "euclidean") 
                  Wk <- Wk + sum(diag(var(mat[cl == cli, ]))) * 
                    (length(cl[cl == cli]) - 1)
                else {
                  D <- sum(dist(mat[cl == cli, ], method = dist.method)^2)
                  Wk <- Wk + D/(nrow(mat[cl == cli, , drop = FALSE]))
                }
            }
            else {
                if (dist.method == "euclidean") 
                  Wk <- Wk + sum(diag(var(mat[cl == cli, ])))
                else {
                  D <- sum(dist(mat[cl == cli, ], method = dist.method)^2)
                  Wk <- Wk + D/(nrow(mat[cl == cli, , drop = FALSE]) * 
                    (nrow(mat[cl == cli, , drop = FALSE]) - 1))
                }
            }
        }
        return(Wk)
    }
    simX <- function(mat, seed = seed, ref.dist = ref.dist) {
        set.seed(seed)
        simunif <- function(x) return(apply(x, 2, function(w) runif(length(w), 
            min = min(w), max = max(w))))
        simpc <- function(x) {
            x <- as.matrix(x)
            xmm <- apply(x, 2, mean)
            for (k in (1:dim(x)[2])) x[, k] <- x[, k] - xmm[k]
            ss <- svd(x)
            xsim <- simunif(x %*% ss$v) %*% t(ss$v)
            for (k in (1:dim(x)[2])) xsim[, k] <- xsim[, k] + 
                xmm[k]
            return(xsim)
        }
        if (ref.dist == "unif") 
            return(simunif(mat))
        if (ref.dist == "pc") 
            return(simpc(mat))
    }
    Gap <- data.frame(matrix(NA, nrow = to - from + 1, ncol = 9))
    names(Gap) <- c("nCluster", "logWk0", "logWk", "Gap", "sdGap", 
        "k", "D", "DD", "DDk")
    for (i in from:to) {
        Gap[i, "nCluster"] <- i
        Gap[i, "logWk0"] <- log(computeWk(data, clust(data, i, 
            clust.method = clust.method), dist.method = dist.method, 
            weighted = weighted))
        Sim <- lapply(1:nsim, function(seed) simX(data, seed = seed, 
            ref.dist = ref.dist))
        logWksim <- log(unlist(lapply(Sim, function(xl) computeWk(xl, 
            clust(xl, i, clust.method = clust.method), dist.method = dist.method, 
            weighted = weighted))))
        Gap[i, "logWk"] <- mean(logWksim)
        Gap[i, "Gap"] <- Gap[i, "logWk"] - Gap[i, "logWk0"]
        Gap[i, "sdGap"] <- sqrt(1 + 1/nsim) * sqrt(var(logWksim) * 
            (nsim - 1)/nsim)
    }
    Gap$k <- ""
    Gap$k[min(which(Gap$Gap[-nrow(Gap)] >= c(Gap$Gap - (tol * 
        Gap$sdGap))[-1]))] <- "*"
    Gap$D[2:nrow(Gap)] <- diff(Gap$Gap)
    Gap$DD[2:(nrow(Gap) - 1)] <- Gap$D[2:(nrow(Gap) - 1)] - Gap$D[3:nrow(Gap)]
    Gap$DDk <- ""
    Gap$DDk[which(Gap$DD == max(Gap$DD, na.rm = TRUE))] <- "*"
    return(Gap)
}

The function 'GapPlot' plots W_k, the gap statistic, DGap and DDGap of a given clustering, with the estimated number of clusters for each method.

GapPlot <- function (x) 
{
    old.par <- par(no.readonly = TRUE)
    on.exit(par(old.par))
    par(mfrow = c(2, 2))
    k <- seq(1:nrow(x))
    plot(k, exp(x$logWk0), xlab = "Number of clusters k", ylab = "Wk", 
        type = "b")
    plot(k, x$Gap, xlab = "Number of clusters k", ylab = "GAP", 
        type = "b")
    segments(k, c(x$Gap - x$sdGap), k, c(x$Gap + x$sdGap))
    points(x$nCluster[x$k == "*"], x$Gap[x$k == "*"], col = "black", 
        pch = 19, cex = 1.5)
    plot(k, x$D, xlab = "Number of clusters k", ylab = "D", type = "b")
    plot(k, x$DD, xlab = "Number of clusters k", ylab = "DD", 
        type = "b")
    points(x$nCluster[x$DDk == "*"], x$DD[x$DDk == "*"], col = "black", 
        pch = 19, cex = 1.5)
}

In addition, given a clustering, the function 'Seasons' simplify it to remove the smallest seasons, by checking for each day the values of the next 'win' days: if they have the same value (with a tolerance of 'tol' days), the day is kept as is; otherwise, the given day takes the value of the day before. 'SPlot' and 'Sformat' produces a plot and a human-readable output of the clustering, while 'SBoxplot' displays boxplots for every season (in their order by default).

Seasons <- function (clust, win = 3, tol = 1) 
{
    clust <- c(clust[length(clust)], clust, clust[1:win])
    for (i in 2:(length(clust) - win)) if (sum(clust[(i + 1):(i + 
        win)] == clust[i]) < (win - tol)) 
        clust[i] <- clust[i - 1]
    for (i in 2:(length(clust) - win)) if (sum(clust[(i + 1):(i + 
        win)] == clust[i]) < (win - tol)) 
        clust[i] <- clust[i - 1]
    return(clust[-c(1, (length(clust) - win + 1):length(clust))])
}

SPlot <- function (seasons) 
{
    old.par <- par(no.readonly = TRUE)
    on.exit(par(old.par))
    par(mfrow = c(1, 1), mar = c(4, 3, 4, 2) + 0.1)
    plot(seasons, pch = 15, cex = 0.5, axes = FALSE, main = "Seasons", 
        xlab = "")
    abline(v = as.numeric(names(which(diff(seasons) != 0))), 
        lty = 3, lwd = 0.5)
    axis(1, at = as.numeric(names(which(diff(seasons) != 0))), 
        label = format(strptime(names(which(diff(seasons) != 
            0)), format = "%j"), "%d/%m"), cex.axis = 0.8, las = 2)
    axis(2)
    box()
}

SFormat <- function (seasons, ndays = FALSE, nseasons = FALSE, trunk = FALSE) 
{
    br <- names(which(diff(seasons) != 0))
    if (trunk) {
        br <- c(names(seasons)[1], br, names(seasons)[length(seasons)])
        brn <- c("start", seasons[br[-c(1, length(br))]], "end")
    }
    else brn <- seasons[br]
    Sbr <- format(strptime(br, format = "%j"), "%d/%m")
    names(Sbr) <- brn
    if (ndays) 
        return(as.numeric(br))
    if (nseasons) 
        return(seasons[br])
    else return(Sbr)
}

SBoxplot <- function (data, seasons, order = TRUE, months = c("rectangles", 
    "lines"), cluster = TRUE, multi = FALSE, samescale = TRUE) 
{
    old.par <- par(no.readonly = TRUE)
    if (multi) 
        par(mfcol = c(ncol(data[[1]]), length(data)), mar = c(1, 
            2, 3, 0) + 0.1)
    else {
        par(mfrow = n2mfrow(ncol(data)), mar = c(1, 2, 3, 0) + 
            0.1)
        data <- list(data)
        seasons <- list(seasons)
    }
    on.exit(par(old.par))
    months <- match.arg(months)
    if (order) {
        for (j in 1:length(data)) {
            datarb <- do.call(rbind, data)
            datatmp <- data[[j]]
            seasonstmp <- seasons[[j]]
            changes <- as.numeric(c(names(seasonstmp)[1], names(which(diff(seasonstmp) != 
                0)), names(seasonstmp)[length(seasonstmp)]))
            seas <- seasonstmp[as.character(changes[-length(changes)])]
            at <- changes[-length(changes)] + diff(changes)/2
            for (i in 1:ncol(datatmp)) {
                summ <- do.call(rbind, lapply(1:max(seasonstmp), 
                  function(j) summary(datatmp[seasonstmp == j, 
                    i])))
                if (samescale) {
                  plot(1:365, datatmp[, i], type = "n", xlim = c(1, 
                    365), ylim = range(datarb[, i]), axes = FALSE, 
                    main = names(datatmp)[i])
                  par(usr = c(1, 365, min(datarb[, i]), max(datarb[, 
                    i])))
                }
                else {
                  plot(1:365, datatmp[, i], type = "n", xlim = c(1, 
                    365), ylim = range(datatmp[, i]), axes = FALSE, 
                    main = names(datatmp)[i])
                  par(usr = c(1, 365, min(datatmp[, i]), max(datatmp[, 
                    i])))
                }
                if (months == "lines") 
                  abline(v = c(32, 60, 91, 121, 152, 182, 213, 
                    244, 274, 305, 335), lty = 3, col = grey(0.6))
                if (months == "rectangles") 
                  rect(c(32, 91, 152, 213, 274, 335), min(datarb[, 
                    i]), c(60, 121, 182, 244, 305, 365), max(datarb[, 
                    i]), col = grey(0.9), border = NA)
                segments(at, summ[seas, "Min."], y1 = summ[seas, 
                  "Max."], lty = 2)
                for (j in 1:length(seas)) polygon(x = changes[c(j, 
                  j + 1, j + 1, j)] + c(1, -1, -1, 1), y = rep(summ[seas[j], 
                  c("1st Qu.", "3rd Qu.")], each = 2), col = "white")
                segments(changes[-length(changes)] + 1, summ[seas, 
                  "Median"], changes[-1] - 1, lwd = 3)
                axis(2)
                axis(1, at = c(16, 46, 75.5, 106, 136.5, 167, 
                  197.5, 228.5, 259, 289.5, 320, 350), labels = c("J", 
                  "F", "M", "A", "M", "J", "J", "A", "S", "O", 
                  "N", "D"), tick = FALSE, line = -1, cex.axis = 0.8)
                if (cluster) 
                  axis(3, at = at, labels = seas, tick = FALSE, 
                    line = -1, cex.axis = 0.8)
                box()
            }
        }
    }
    else {
        for (j in 1:length(data)) {
            datatmp <- data[[j]]
            seasonstmp <- seasons[[j]]
            for (i in 1:ncol(data)) {
                boxplot(data[, i] ~ seasons, axes = FALSE, main = names(data)[i], 
                  ylim = c(0, 1))
                axis(1)
                box()
            }
        }
    }
}
[last update: 2011/12/22]