maxent工作流1

### model red-bellied lemurs
data(mad0)
data(lemurs)

# climate data
bios <- c(1, 5, 12, 15)
clim <- raster::getData('worldclim', var='bio', res=10)
clim <- raster::subset(clim, bios)
clim <- raster::crop(clim, mad0)

# occurrence data
occs <- lemurs[lemurs$species == 'Eulemur rubriventer', ]
occsEnv <- raster::extract(clim, occs[ , c('longitude', 'latitude')])

# background sites
bg <- 2000 # too few cells to locate 10000 background points
bgSites <- dismo::randomPoints(clim, 2000)
bgEnv <- extract(clim, bgSites)

# collate
presBg <- rep(c(1, 0), c(nrow(occs), nrow(bgSites)))
env <- rbind(occsEnv, bgEnv)
env <- cbind(presBg, env)
env <- as.data.frame(env)

preds <- paste0('bio', bios)

regMult <- 1:3 # default values are probably better, but these will be faster

# calibrate MaxEnt model
ent <- trainMaxEnt(
  data=env,
  resp='presBg',
  preds=preds,
  regMult=regMult,
  classes='lpq',
  verbose=TRUE
)

# default: logistic output
predEnt <- raster::predict(ent, presBg)
# default: cloglog output; see ?maxnet for more
predNet <- predict(net, newdata=presBg)
# default: cloglog output
predEnt <- predictMaxEnt(ent, presBg)
# default: cloglog output
predEnt <- predictEnmSdm(ent, presBg)
# default: cloglog output
predNet <- predictEnmSdm(net, presBg)

# calibrate MaxNet model
net <- trainMaxNet(
  data=env,
  resp='presBg',
  preds=preds,
  regMult=regMult,
  classes='lpq',
  verbose=TRUE
)

# prediction rasters
mapEnt <- predict(ent, clim, type='logistic')
mapNet <- predict(clim, net, type='logistic')

par(mfrow=c(1, 2))
plot(mapEnt, main='MaxEnt')
points(occs[ , c('longitude', 'latitude')])
plot(mapNet, main='MaxNet')
points(occs[ , c('longitude', 'latitude')])

# note the differences between the tuning of the two models...
# this is because maxnet() (used by trainMaxNet())
# uses an approximation:
# (note maxnet() calculates hinges and thresholds differently
# so we will turn them off)

data(bradypus, package='maxnet')
p <- bradypus$presence
data <- bradypus[ , 2:3] # easier to inspect betas
mn <- maxnet::maxnet(p, data,
                     maxnet::maxnet.formula(p, data, classes='lpq'))
mx <- dismo::maxent(data, p,
                    args=c('linear=true', 'product=true', 'quadratic=true', 'hinge=false',
                           'threshold=false'))

predMx <- dismo::predict(mx, data)
predMn <- predict(mn, data, type='logistic')

par(mfrow=c(1, 1))
plot(predMx, predMn)
abline(0, 1)


## 计算:jk
pred <- envs
options(java.parameters = "-Xmx2g" )
args <- c('linear=true', 'product=true', 'quadratic=true', 'hinge=false',
          'threshold=false',
          "Jackknife=true","applythresholdrule=Maximum training sensitivity plus specificity","threads=10")
mx2 <- dismo::maxent(pred, p=occs,a=bg_xy,
                     args= args,path="D:/XH/MAX")
## 利用mx2 查看二值化输出:
Maximum.training.sensitivity.plus.specificity.Cloglog.threshold

pred_me = predict(mx2, envs) # generate the predict
pred_me

工作流2

po_sa <- pp(xh_sa)

envs_sa <- mask(crop(envs$BIO4,po_sa),po_sa) %>% mask(crop(envs,.),.)

## 生成不包含研究区域的随机背景点:
bg.xy <- dismo::randomPoints(envs_sa$BIO4, 5000,p= xh_sa[,2:3])
bg_sa <- as.data.frame(bg.xy,colnames(c("long","lat")))
## 准备建模数据集:
data_sa <- prepareSWD(species = "xh_sa species", 
                      p =xh_sa[,2:3] , a = bg_sa, 
                      env = envs_sa)


## 利用sdmTUNE构建最优参数选择:
sa_folds <- get.checkerboard1(occ = data_sa@coords[data_sa@pa == 1, ],
                              env = envs_sa,
                              bg.coords = data_sa@coords[data_sa@pa == 0, ],
                              aggregation.factor = 4)

model <- train(method = "Maxnet", data = data_sa,folds = sa_folds)

h <- list(reg = seq(0.5, 4, 0.5), fc = c("lq", "lh", "lqp", "lqph", "lqpht"))
om <- optimizeModel(model, hypers = h, metric = "auc", seed = 4,gen=40)
head(om@results)[1,]  # Best combinations

## 利用优化后参数运行模拟结果:
options(java.parameters = "-Xmx10g" )

## 构建发生点和背景点的点值提取:
pa_sa <- data_sa@pa
da_sa <- data_sa@data
## 注意设置参数时最大迭代数需要设置在maxent模型中,而不能在参数中设置;
args <-  c('linear=true', 'product=false', 'quadratic=false', 'hinge=true',
           'threshold=false','betamultiplier= 1.5','randomseed=true','writebackgroundpredictions=true',
           "applythresholdrule=Maximum training sensitivity plus specificity","threads=10",
           'pictures=false','outputformat=cloglog','replicates=10','replicatetype=Bootstrap')

dir.create("C:/Users/admin/Desktop/sa_AP")

mx_sa <- dismo::maxent(x=da_sa,p=pa_sa,args= args,maximumiterations=1000,path="./sa_AP")
mx_sa@models[[1]]

工作流计划

## 包括以下几个部分:
## 导入数据
## 构建环境图层

## 数据预实验;即数据进行参数优化,使用sdmtune包;
# 采用随机抽样的方式进行运算,比enmeval效率更很多倍!

## 数据预实验之后,使用sdmtune自带的函数或者使用dismo包的函数设置参数解耦

## 跑结果十次;注意这里环境数据中背景点的设置和评估集的选择都很重要;

## 然后将测试评估结果,这次要学习,使用Proc、boyoc等等指数;

## 然后进行预测分析;

## 计算sdm投影的增加、缺失等等;

## 甚至计算质心的转移等等;

导入环境数据:

## 导入环境数据:
xh_na <- read.csv("D:/XH/xh/na.csv")[,1:3]
xh_as <- read.csv("D:/XH/xh/as.csv")
xh_eu <- read.csv("D:/XH/xh/eu.csv")[,1:3]
xh_au <- read.csv("D:/XH/xh/au.csv")[,1:3]
xh_sa <- read.csv("D:/XH/xh/sa.csv")

## 筛选环境变量:
## 加载所有环境变量:
tiffs <- list.files(path="D:/XH/third_env/envsV5tif/",pattern = "tif",full.names = T)
envs <- raster::stack(tiffs)

加载R包:

## 加载R包:
library(SDMtune)
library(ENMeval)
library(raster)
library(rgdal)
library(maps)
library(mapdata)
library(dismo)
library(rJava)
library(maptools)
library(jsonlite)
library(glmnet)
library(maxnet)
library(rasterVis)
library(ggplot2)
##检查maxent是否可用:
dismo::maxent()
library(tidyverse )

为减少运算时间,构建范围:

## 为减少运算时间,构建范围:
## 可以构建5度buffer范围;####
pp <- function(x){
  occs <- x
  xmin <- min(occs$longitude)
  xmax <- max(occs$longitude)
  ymin <- min(occs$latitude)
  ymax <- max(occs$latitude)
  bb <- matrix(c(xmin-5, xmin-5, xmax+5, xmax+5, xmin-5, ymin-5, ymax+5, ymax+5, ymin, ymin), ncol=2)
  bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))
  return(bgExt)
}

参数优化,及模拟预实验;

## 参数优化,及模拟预实验;
po_sa <- pp(xh_sa)

envs_sa <- mask(crop(envs$BIO4,po_sa),po_sa) %>% mask(crop(envs,.),.)

## 生成不包含研究区域的随机背景点:
bg.xy <- dismo::randomPoints(envs_sa$BIO4, 5000,p= xh_sa[,2:3])
bg_sa <- as.data.frame(bg.xy,colnames(c("long","lat")))
## 准备建模数据集:
data_sa <- prepareSWD(species = "xh_sa species", 
                      p =xh_sa[,2:3] , a = bg_sa, 
                      env = envs_sa)


## 利用sdmTUNE构建最优参数选择:
sa_folds <- get.checkerboard1(occ = data_sa@coords[data_sa@pa == 1, ],
                              env = envs_sa,
                              bg.coords = data_sa@coords[data_sa@pa == 0, ],
                              aggregation.factor = 4)

model <- train(method = "Maxnet", data = data_sa,folds = sa_folds)

h <- list(reg = seq(0.5, 4, 0.5), fc = c("lq", "lh", "lqp", "lqph", "lqpht"))
om <- optimizeModel(model, hypers = h, metric = "auc", seed = 4,gen=40)
head(om@results)[1,]  # Best combinations

## 利用优化后参数运行模拟结果:
options(java.parameters = "-Xmx10g" )

## 构建发生点和背景点的点值提取:
pa_sa <- data_sa@pa
da_sa <- data_sa@data
## 注意设置参数时最大迭代数需要设置在maxent模型中,而不能在参数中设置;
args <-  c('linear=true', 'product=false', 'quadratic=false', 'hinge=true',
           'threshold=false','betamultiplier= 1.5','randomseed=true','writebackgroundpredictions=true',
           "applythresholdrule=Maximum training sensitivity plus specificity","threads=10",
           'pictures=false','outputformat=cloglog','replicates=10','replicatetype=Bootstrap')

dir.create("C:/Users/admin/Desktop/sa_AP")

mx_sa <- dismo::maxent(x=da_sa,p=pa_sa,args= args,maximumiterations=1000,path="./sa_AP")
mx_sa@models[[1]]

评估模型:

## 评估模型:
## 计算AUC/TSS
## 计算阈值:获取最大特异性与敏感性之和;

## 计算AUC:
e=list()
for(i in 1:10){
  x <- evaluate(p=data_sa@coords[data_sa@pa==1,], a=data_sa@coords[data_sa@pa==0,],
                model = mx_sa@models[[i]],x=envs_sa)
  e <- rbind(x@auc,e)
}
aucc <- data.frame(matrix(unlist(e)))
sd(aucc$matrix.unlist.e..)
mean(aucc$matrix.unlist.e..)

## ## 重新构建评估TSS和kappa:
##获取最大特异性与敏感性之和;
t <- evaluate(p=data_sa@coords[data_sa@pa==1,], a=data_sa@coords[data_sa@pa==0,],
              model = mx_sa@models[[1]],x=envs_sa)
t__这里t值的结果能给出最大特异性与敏感性之和;

## max TPR+TNR at : 0.278496 
nems1 <- c("species_0_backgroundPredictions",paste0("species_",1:9,"_backgroundPredictions"))
nems_1 <- paste0("C:/Users/admin/Desktop/sa_AP/",nems1,".csv")
nems2 <- c("species_0_samplePredictions",paste0("species_",1:9,"_samplePredictions"))
nems_2 <- paste0("C:/Users/admin/Desktop/sa_AP/",nems2,".csv")


ge
setwd("")
## 下面这个函数只需要修改set threshold value
calc_tss <- function(y){
  backgroundpredictions <- read.csv(nems_1[y])

  #we need the last column so will set the number as x
  x <- length(backgroundpredictions)

  #extract the cloglog/logistic results
  backgroundclog <- backgroundpredictions[,x]

  #now read in the sample predictions for testing
  samplepredictions <- read.csv(nems_2[y])

  #we need the last column again of logistic or cloglog predictions so set a second x
  x2 <- length(samplepredictions)

  #extract the cloglog/logistic results for sample
  sampleclog <- samplepredictions[,x2]

  #set n the number of pseuabsences used for backgroudn predictions by MaxEnt
  n <- 5000
  #set threshold value
  th <- 0.27
  TSS_calculations <- function (sample_clog, prediction_clog, n, th) {

    xx <- sum(sample_clog > th)
    yy <- sum(prediction_clog > th)
    xxx <- sum(sample_clog < th)
    yyy <- sum(prediction_clog < th)

    ncount <- sum(xx,yy,xxx,yyy)

    overallaccuracy <- (xx + yyy)/ncount 
    sensitivity <- xx / (xx + xxx)
    specificity <- yyy / (yy + yyy)
    tss <- sensitivity + specificity - 1

    #kappa calculations
    a <- xx + xxx
    b <- xx + yy
    c <- yy + yyy
    d <- xxx + yyy
    e <- a * b
    f <- c * d
    g <- e + f
    h <- g / (ncount * ncount)
    hup <- overallaccuracy - h
    hdown <- 1 - h

    kappa <- hup/hdown
    Po <- (xx + yyy) / ncount
    Pe <- ((b/ncount) * (a/ncount)) + ((d/ncount) * (c/ncount))
    Px1 <- Po - Pe
    Px2 <- 1 - Pe
    Px3 <- Px1/Px2

    tx1 <- xx + yyy
    tx2 <- 2 * a * c
    tx3 <- a - c
    tx4 <- xx - yyy
    tx5 <- ncount * (( 2 * a ) - tx4)
    tx6 <- ncount * tx1

    kappamax <- (tx6 - tx2 - (tx3 * tx4)) / ((tx5 - tx3) - (tx3 * tx4))

    cat(" Maxent results for model with/n",a,"test sample predictions/n",c ,"background predicitons/n/n TSS value:        ", tss,"/n Overall accuracy: ",overallaccuracy,"/n Sensitivity:      ",sensitivity,"/n Specificity:      ",specificity,"/n Kappa:            ",kappa,"/n Kappa max:        ",kappamax)


  }
  #run the function, the input values are the sampleclog values, then the background clog values, the sample number for the pseudo absences and then threshold value
  TSS_calculations(sampleclog,backgroundclog,n,th)
}  

sink("tss_sa.txt")
lapply(1:10,calc_tss)
sink()

模型预测

## 可视化模拟结果:
map_sa <- predict(maxnet_model_sa, data = envs_sa, type = "cloglog")
plotPred(map_sa)
## 输出栅格图层:
predict(maxnet_model_sa, data = envs_sa, 
        type = "cloglog", file = "C:/Users/admin/Desktop/sa_ap", format = "GTiff")

## 更漂亮的可视化结果图,参见可视化文件夹下的方法

变量二分类

## 变量二分类
## 基于分类变量构建二维生态位分析背景:
order(as.data.frame(aucs$Training.AUC),decreasing=T)
ped_sa <- predict(mx_sa@models[[1]], envs_sa)

## 基于ENMTOOLS计算生态位宽度:

raster.breadth(ped_sa)

plot(ped_sa)
## 查看评估结果:
mean(aucs$Maximum.training.sensitivity.plus.specificity.Cloglog.threshold)


## 重分类:
ef_sa <- reclassify(ped_sa,rcl= c(0,0.27,NA,0.27,Inf,1))


setwd("C:/Users/Administrator/Desktop/")
bg_enm_sa <- dismo::randomPoints(ef_sa,3000,p=xh_sa[,2:3])
bg_enm_sa <- as.data.frame(bg_enm_sa)

write.csv(bg_enm_sa,"./sa_enm_bg.csv")

writeRaster(ped_sa,"./sa_sdm.tif",format="GTiff")

工作流的一些补充

##  MAXENT
# 设置数据:
rasters.final<-subset(rasters.selected,c("calcite", "chlorange","chlomean","cloudmean","cloudmin","damean", 
                                         "temperature", "nitrate", "parmax", "parmean", "phosphate", 
                                         "salinity", "silicate"))
rasters.final
fold <- kfold(location.data, k=5)
test <- location.data[fold == 1, ]
train <- location.data[fold != 1, ]
# 运行参数:
model.maxent <- maxent(
  x=rasters.final,
  p=spoints,
  a=backgr,
  args=c(
    'randomtestpoints=30',
    'betamultiplier=1',
    'linear=true',
    'quadratic=true',
    'product=true',
    'threshold=true',
    'hinge=true',
    'threads=2',
    'responsecurves=true',
    'jackknife=true',
    'askoverwrite=false'
  )
)

# 查看html网页:
model.maxent
# variable contribution
plot(model.maxent)

采样偏差文件制作

## 加载R包及环境
setwd("C:/Users/admin/Desktop/enmevaluate/enmevaluate")
pakegs = c("rJava","ENMeval","raster","MASS")
slapply(pakegs,library,character.only =T)
##加载数据:
env <- stack(bio1, bio5, bio6, bio7, bio8, bio12, bio16, bio17, biocateg1, biocateg2)
occ <- read.csv("occ.csv")[,-1]
##使用raster包的rasterrize提取栅格值,并转换为栅格数据;
occur.ras <- rasterize(occ, env, 1)
## plot(occur.ras)
presences <- which(values(occur.ras) == 1)   ##表示提取的点值对应的空间坐标排序位置;
pres.locs <- coordinates(occur.ras)[presences, ] ##提取对应的栅格的经纬度信息;
## 执行核平滑,构建点栅格的核平滑数据;
dens <- kde2d(pres.locs[,1], pres.locs[,2], n = c(nrow(occur.ras), ncol(occur.ras)), lims = c(extent(env)[1], extent(env)[2], extent(env)[3], extent(env)[4]))
dens.ras <- raster(dens, env)
dens.ras2 <- resample(dens.ras, env)  ##重采样至env数据相等栅格大小;
#plot(dens.ras2)
##下面这个偏差文件可以用于在maxnetGUI中使用;也可以如下提取环境背景点;
writeRaster(dens.ras2, "biasfile.asc", overwrite = TRUE)
## 基于偏差栅格,随机选取分布值以外的栅格区域内的随机环境分布点:
## 调参:prob:a vector of probability weights for obtaining the elements of the vector being sampled. 即表示给予每个sample数据集对应一个概率值,非随机采样;
bg <- xyFromCell(dens.ras2, sample(which(!is.na(values(subset(env, 1)))), 5000, prob=values(dens.ras2)[!is.na(values(subset(env, 1)))]))

results matching ""

    No results matching ""