# Stochastic surplus Production model in Continuous-Time (SPiCT)
#    Copyright (C) 2015  Martin Waever Pedersen, mawp@dtu.dk or wpsgodd@gmail.com
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


#' An S4 class to represent output from a SPiCT fit.
#' @name spictcls
#' @aliases spictcls-class
#' @exportClass spictcls
setClass("spictcls")


#' @name fd
#' @title Format date
#' @param d Point in time in years as decimal number.
#' @param dec Number of decimals.
#' @return Correctly formatted date.
fd <- function(d, dec=2) sprintf('%4.2f', round(d, dec)) # Format date function


#' @name pol
#' @title Fisheries data included in Polacheck et al. (1993).
#' @details Fisheries data for south Atlantic albacore, northern Namibian hake, and New Zealand rock lobster.
#' @docType data
#' @keywords datasets
#' @usage data(pol)
#' @source Polacheck et al. (1993), Canadian Journal of Fisheries and Aquatic Science, vol 50, pp. 2597-2607.
#' @examples
#' data(pol)
#' rep <- fit.spict(inp=pol$albacore)
#' rep <- fit.spict(inp=pol$hake)
#' rep <- fit.spict(inp=pol$lobster)
#' @format Data are lists containing data and initial values for estimation formatted to be used as an input to fit.spict().
NULL


#' @name test.spict
#' @title Example of a spict analysis.
#' @details Loads a data set, fits the model, calculates one-step-ahead residuals, plots the results.
#' @param dataset Specify one of the three test data sets: 'albacore', 'hake', 'lobster'. These can be accessed with the command data(pol).
#' @return A result report as given by fit.spict().
#' @examples
#' rep <- test.spict()
#' @export
test.spict <- function(dataset='albacore'){
    # Load data
    data(pol)
    inp <- pol[[dataset]]
    if(dataset=='albacore'){
        inp$ffac <- 0.8
        nopredcyears <- 3 # Number of years to predict catch
        inp$dtpredc <- 1 # Time step when predicting catch
        inp$timepredi <- tail(inp$timeC, 1) + nopredcyears
        inp$timepredc <- tail(inp$timeC, 1) + nopredcyears
        inp$phases$logn <- -1
    }
    # Fit model
    rep <- fit.spict(inp)
    # Calculate one-step-ahead residuals
    rep <- calc.osa.resid(rep)
    # Plot results
    #graphics.off()
    dev.new(width=10, height=10)
    plot(rep)
    summary(rep)
    return(rep)
}


#' @name calc.gamma
#' @title Calculate gamma from n
#' @param n Exponent of the Pella-Tomlinson surplus production equation.
#' @export
calc.gamma <- function(n) n^(n/(n-1)) / (n-1)


#' @name get.par
#' @title Extract parameters from a result report as generated by fit.spict.
#' @details Helper function for extracting the value and uncertainty of a specific model parameter, random effect or derived quantity.
#' @param parname Character string containing the name of the variable of interest.
#' @param rep A result report as generated by running fit.spict.
#' @param exp Take exp of the variable? TRUE/FALSE.
#' @param random DUMMY not used anymore. (Is the variable a random effect? TRUE/FALSE.)
#' @param fixed DUMMY not used anymore. (Is the variable a fixed effect? TRUE/FALSE.)
#' @return A matrix with four columns containing respectively: 1) the lower 95% confidence limit; 2) the parameter estimate; 3) the upper 95% confidence limit; 4) the parameter standard deviation in the domain it was estimated (log or non-log).
#' @export
#' @examples
#' data(pol)
#' rep <- fit.spict(pol$albacore)
#' Bmsy <- get.par('logBmsy', rep, exp=TRUE)
#' Best <- get.par('logB', rep, exp=TRUE)
#' K <- get.par('logK', rep, exp=TRUE)
get.par <- function(parname, rep=rep, exp=FALSE, random=FALSE, fixed=FALSE){
    if(!'sderr' %in% names(rep)){
        indran <- which(names(rep$par.random)==parname)
        indfix <- which(names(rep$par.fixed)==parname)
        indsdr <- which(names(rep$value)==parname)
        indopt <- which(names(rep$opt$par)==parname)
        est <- NULL
        if(length(indran)>0){
            est <- rep$par.random[indran]
            sd <- sqrt(rep$diag.cov.random[indran])
            ll <- est - 1.96*sd
            ul <- est + 1.96*sd
        }
        if(length(indfix)>0){
            est <- rep$par.fixed[indfix]
            sd <- sqrt(diag(rep$cov.fixed))[indfix]
            ll <- est - 1.96*sd
            ul <- est + 1.96*sd
        }
        if(length(indsdr)>0){
            est <- rep$value[indsdr]
            sd <- rep$sd[indsdr]
            ll <- est - 1.96*sd
            ul <- est + 1.96*sd
        }
        if(length(est)==0){
            ll <- NA
            ul <- NA
            sd <- NA
            est <- NA
            if(length(indopt)>0){
                est <- rep$opt$par[indopt]
            } else {
                if('phases' %in% names(rep$inp)){
                    if(parname %in% names(rep$inp$phases)){
                        if(rep$inp$phases[[parname]] == -1){
                            est <- rep$inp$parlist[[parname]]
                            ll <- est
                            ul <- est
                        }
                    }else {
                        if(parname == 'P'){
                            B <- get.par('logB', rep, exp=TRUE)
                            C <- get.par('logCpred', rep, exp=TRUE)
                            ic <- rep$inp$ic
                            nc <- rep$inp$nc
                            B0 <- B[ic, 2]
                            B1 <- B[ic+nc, 2]
                            T0 <- rep$inp$time[ic]
                            T1 <- rep$inp$time[ic+nc]
                            # Get annual average
                            #est <- (B1 - B0 + C[, 2]) / (rep$inp$dteuler*rep$inp$nc) 
                            est <- (B1 - B0 + C[, 2]) / (T1-T0)
                        }
                    }
                } else {
                    warning('get.par WARNING: could not extract ', parname)
                }
            }
        }
        if(exp==TRUE){
            cv <- sqrt(exp(sd^2) - 1)
        } else {
            cv <- sd/est
        }
        if(exp){
            out <- cbind(ll=exp(ll), est=exp(est), ul=exp(ul), sd, cv)
        } else {
            out <- cbind(ll, est, ul, sd, cv)
        }
        if(parname %in% c('logB', 'logF', 'logBBmsy', 'logFFmsy')) rownames(out) <- rep$inp$time
        return(out)
    }
}


#' @name get.msyvec
#' @title If multiple growth rates (r) are used (e.g. for a seasonal model), return specified reference point for all instances of r.
#' @param inp An input list as validated by check.inp().
#' @param msy Matrix containing reference point values as given by get.par().
#' @return A list containing reference point estimates with upper and lower CI bounds.
get.msyvec <- function(inp, msy){
    vec <- rep(0, inp$ns)
    ul <- rep(0, inp$ns)
    ll <- rep(0, inp$ns)
    nr <- length(inp$ini$logr)
    for(i in 1:nr){
        vec[inp$ir==i] <- msy[i, 2]
        ul[inp$ir==i] <- msy[i, 3]
        ll[inp$ir==i] <- msy[i, 1]
    }
    return(list(msy=vec, ll=ll, ul=ul))
}


#' @name make.splinemat
#' @title Make a spline design matrix
#' @param nseasons Number of seasons
#' @param order Order of the spline
#' @param dtfine Time between points where spline is evaluated
#' @return Spline design matrix.
#' @export
make.splinemat <- function(nseasons, order, dtfine=1/100){
    if(dtfine==1){
        d <- matrix(1, 1, 1)
    } else {
        dtspl <- 1/nseasons
        knots <- seq(0, 1, by=dtspl)
        x <- seq(0, 1-dtfine, by=dtfine)
        if(order > 1){
            #require(mgcv)
            d <- mgcv::cSplineDes(x, knots, ord=order)
        } else {
            if(order < 1){
                warning('Specified spline order (', order, ') not valid!')
                order <- 1
            }
            nx <- length(x)
            nknots <- length(knots)
            d <- matrix(0, nx, nknots-1)
            for(i in 1:(nknots-1)){
                inds <- which(x >= knots[i] & x < knots[i+1])
                d[inds, i] <- 1
            }
        }
    }
    return(d)
}


#' @name get.spline
#' @title Get the values of the seasonal spline for F.
#' @param logphi Values of the phi vector.
#' @param order Order of the spline.
#' @param dtfine Time between points where spline is evaluated.
#' @return Spline values at the points between 0 and 1 with dtfine as time step.
#' @export
get.spline <- function(logphi, order, dtfine=1/100){
    logphipar <- c(0, logphi)
    nseasons <- length(logphipar)
    d <- make.splinemat(nseasons, order, dtfine)
    spline <- as.vector(d %*% logphipar)
    spline <- c(spline, spline[1])
    return(spline)
}


#' @name get.AIC
#' @title Calculate AIC from a rep list.
#' @param rep A result report as generated by running fit.spict.
#' @return AIC
#' @export
get.AIC <- function(rep){
    negloglik <- rep$opt$objective
    numpars <- length(rep$opt$par)
    AIC <- 2*numpars + 2*negloglik
    return(AIC)
}


#' @name invlogit
#' @title Inverse logit transform.
#' @param a Value to take inverse logit of.
#' @return Inverse logit.
invlogit <- function(a) 1/(1+exp(-a))


#' @name invlogp1
#' @title Inverse log "plus one" transform
#' @details If a = log(b-1), then the inverse transform is b = 1 + exp(a). Useful for values with lower bound at 1.
#' @param a Value to take inverse logp1 of.
#' @return Inverse logp1.
invlogp1 <- function(a) 1 + exp(a)


#' @name guess.m
#' @title Use a simple linear regression to guess m (MSY).
#' @details Equations 9.1.7 and 9.1.8 on page 284 of FAO's tropical assessment book are used to guess MSY.
#' @param inp An input list containing data.
#' @param all.return If true also return a guess on Emsy (effort at MSY) and components of the linear regression.
#' @return The guess on MSY.
#' @export
guess.m <- function(inp, all.return=FALSE){
    y <- inp$obsC
    if(class(inp$obsI)=='list'){
        z <- inp$obsI[[1]]
    } else {
        z <- inp$obsI
    }
    if(length(y) == length(z)){
        x <- y/z
        mod0 <- lm(z ~ x)
        a <- mod0$coefficients[1]
        b <- mod0$coefficients[2]
        MSY <- -0.25*a^2/b # p. 284 in FAO's book on tropical stock assessment
        if(MSY <= 0) MSY <- mean(y) # Mean catch
        if(all.return){
            Emsy <- -0.5*a/b # p. 284 in FAO's book on tropical stock assessment
            return(list(MSY=MSY, Emsy=Emsy, a=a, b=b, x=x, y=y, z=z, mod0=mod0))
        } else {
            return(MSY)
        }
    } else {
        return(mean(y))
    }
}


#' @name calc.EBinf
#' @title Calculate E(Binfinity), i.e. the fished equilibrium.
#' @details If a seasonal pattern in F is imposed the annual average F is used for calculating the expectation. Max() is used to avoid negative values.
#' @param K The carrying capacity.
#' @param n Pella-Tomlinson exponent.
#' @param Fl Average fishing mortality of the last year.
#' @param Fmsy Fishing mortality at MSY.
#' @param sdb2 Standard deviation squared (variance) of B process.
#' @return E(Binf).
calc.EBinf <- function(K, n, Fl, Fmsy, sdb2) max(c(0, K*(1 - (n-1)/n * Fl/Fmsy)^(1/(n-1)) * (1 - n/2/(1 - (1-n*Fmsy + (n-1)*Fl))*sdb2)))


#' @name get.EBinf
#' @title Calculate E(Binfinity) the fished equilibrium.
#' @details If a seasonal pattern in F is imposed the annual average F is used for calculating the expectation.
#' @param rep A result of fit.spict.
#' @return E(Binf).
get.EBinf <- function(rep){
    K <- get.par('logK', rep, exp=TRUE)[2]
    n <- get.par('logn', rep, exp=TRUE)[2]
    sdb2 <- get.par('logsdb', rep, exp=TRUE)[2]^2
    Fmsyall <- get.par('logFmsy', rep, exp=TRUE)
    Fmsy <- tail(Fmsyall, 1)
    logFest <- get.par('logFs', rep)
    if(min(rep$inp$dtc) < 1){
        alf <- annual(rep$inp$time, logFest[, 2])
        fff <- exp(alf$annvec)
    } else {
        #fff <- exp(logFest[rep$inp$indest,2])
        fff <- exp(logFest[, 2])
    }
    Fl <- tail(unname(fff), 1)
    EBinf <- calc.EBinf(K, n, Fl, Fmsy[2], sdb2)
}


#' @name acf.signf
#' @title Check whether ACF of residuals is significant in any lags.
#' @details This corresponds to plotting the ACF using acf() and checking whether any lags has an acf value above the CI limit.
#' @param resid Vector of residuals.
#' @param lag.max Only check from lag 1 until lag.max.
#' @param return.p Return p-values of the calculated lags.
#' @return Vector of TRUE and FALSE indicating whether significant lags were present. If return.p is TRUE then p-values are returned instead.
acf.signf <- function(resid, lag.max=4, return.p=FALSE){
    calc.pval <- function(corval, acf) 2-2*pnorm(abs(corval)*sqrt(acf$n.used))
    calc.limval <- function(p, acf) qnorm((2-p)/2)/sqrt(acf$n.used)
    inds <- which(is.na(resid))
    if(length(inds)>0) resid <- resid[-inds]
    if(length(inds)>1){
        warning(length(inds), 'NAs in residuals! this could create problems with the calculated ACF.')
    }
    acfC <- acf(resid, plot=FALSE, lag.max=lag.max, na.action=na.pass)
    if(return.p){
        corvals <- acfC$acf[-1]
        #out <- paste(round(calc.pval(corvals, acfC), 3), collapse=', ')
        out <- calc.pval(corvals, acfC)
    } else {
        p <- 0.05
        acflim <- calc.limval(p, acfC)
        out <- abs(acfC$acf[-1]) > acflim
    }
    return(out)
}


#' @name get.osar.pvals
#' @title Check whether ACF of catch and index residuals is significant in any lags.
#' @param rep Result of fit.spict(), but requires that also residuals have been calculated using calc.osa.resic().
#' @return Vector of p-values of length equal to the number of data series.
get.osar.pvals <- function(rep){
    pvals <- numeric(rep$inp$nindex+1)
    if('osarC' %in% names(rep)) pvals[1] <- round(min(acf.signf(rep$osarC$residual, return.p=TRUE)), 3)
    if('osarI' %in% names(rep)){
        ni <- length(rep$osarI)
        for(i in 1:ni) pvals[i+1] <- round(min(acf.signf(rep$osarI[[i]]$residual, return.p=TRUE)), 3)
    }
    return(pvals)
}


#' @name get.cov
#' @title Get covariance matrix of two reported quantities not of fixed model parameters. Covariance of fixed model parameters can be found in rep$cov.fixed.
#' @param rep Result of fit.spict().
#' @param parname1 Name first parameter.
#' @param parname2 Name second parameter.
#' @param cor If TRUE correlation matrix is reported instead of covariance matrix
#' @return Covariance matrix of specified parameters.
#' @export
get.cov <- function(rep, parname1, parname2, cor=FALSE){
    inds <- match(c(parname1, parname2), names(rep$value))
    if(cor){
        return(cov2cor(rep$cov[inds, inds]))
    } else {
        return(rep$cov[inds, inds])
    }
}
