maxent工作流1
### model red-bellied lemurs
data(mad0)
data(lemurs)
# climate data
bios <- c(1, 5, 12, 15)
clim <- raster::getData('worldclim', var='bio', res=10)
clim <- raster::subset(clim, bios)
clim <- raster::crop(clim, mad0)
# occurrence data
occs <- lemurs[lemurs$species == 'Eulemur rubriventer', ]
occsEnv <- raster::extract(clim, occs[ , c('longitude', 'latitude')])
# background sites
bg <- 2000 # too few cells to locate 10000 background points
bgSites <- dismo::randomPoints(clim, 2000)
bgEnv <- extract(clim, bgSites)
# collate
presBg <- rep(c(1, 0), c(nrow(occs), nrow(bgSites)))
env <- rbind(occsEnv, bgEnv)
env <- cbind(presBg, env)
env <- as.data.frame(env)
preds <- paste0('bio', bios)
regMult <- 1:3 # default values are probably better, but these will be faster
# calibrate MaxEnt model
ent <- trainMaxEnt(
data=env,
resp='presBg',
preds=preds,
regMult=regMult,
classes='lpq',
verbose=TRUE
)
# default: logistic output
predEnt <- raster::predict(ent, presBg)
# default: cloglog output; see ?maxnet for more
predNet <- predict(net, newdata=presBg)
# default: cloglog output
predEnt <- predictMaxEnt(ent, presBg)
# default: cloglog output
predEnt <- predictEnmSdm(ent, presBg)
# default: cloglog output
predNet <- predictEnmSdm(net, presBg)
# calibrate MaxNet model
net <- trainMaxNet(
data=env,
resp='presBg',
preds=preds,
regMult=regMult,
classes='lpq',
verbose=TRUE
)
# prediction rasters
mapEnt <- predict(ent, clim, type='logistic')
mapNet <- predict(clim, net, type='logistic')
par(mfrow=c(1, 2))
plot(mapEnt, main='MaxEnt')
points(occs[ , c('longitude', 'latitude')])
plot(mapNet, main='MaxNet')
points(occs[ , c('longitude', 'latitude')])
# note the differences between the tuning of the two models...
# this is because maxnet() (used by trainMaxNet())
# uses an approximation:
# (note maxnet() calculates hinges and thresholds differently
# so we will turn them off)
data(bradypus, package='maxnet')
p <- bradypus$presence
data <- bradypus[ , 2:3] # easier to inspect betas
mn <- maxnet::maxnet(p, data,
maxnet::maxnet.formula(p, data, classes='lpq'))
mx <- dismo::maxent(data, p,
args=c('linear=true', 'product=true', 'quadratic=true', 'hinge=false',
'threshold=false'))
predMx <- dismo::predict(mx, data)
predMn <- predict(mn, data, type='logistic')
par(mfrow=c(1, 1))
plot(predMx, predMn)
abline(0, 1)
## 计算:jk
pred <- envs
options(java.parameters = "-Xmx2g" )
args <- c('linear=true', 'product=true', 'quadratic=true', 'hinge=false',
'threshold=false',
"Jackknife=true","applythresholdrule=Maximum training sensitivity plus specificity","threads=10")
mx2 <- dismo::maxent(pred, p=occs,a=bg_xy,
args= args,path="D:/XH/MAX")
## 利用mx2 查看二值化输出:
Maximum.training.sensitivity.plus.specificity.Cloglog.threshold
pred_me = predict(mx2, envs) # generate the predict
pred_me
工作流2
po_sa <- pp(xh_sa)
envs_sa <- mask(crop(envs$BIO4,po_sa),po_sa) %>% mask(crop(envs,.),.)
## 生成不包含研究区域的随机背景点:
bg.xy <- dismo::randomPoints(envs_sa$BIO4, 5000,p= xh_sa[,2:3])
bg_sa <- as.data.frame(bg.xy,colnames(c("long","lat")))
## 准备建模数据集:
data_sa <- prepareSWD(species = "xh_sa species",
p =xh_sa[,2:3] , a = bg_sa,
env = envs_sa)
## 利用sdmTUNE构建最优参数选择:
sa_folds <- get.checkerboard1(occ = data_sa@coords[data_sa@pa == 1, ],
env = envs_sa,
bg.coords = data_sa@coords[data_sa@pa == 0, ],
aggregation.factor = 4)
model <- train(method = "Maxnet", data = data_sa,folds = sa_folds)
h <- list(reg = seq(0.5, 4, 0.5), fc = c("lq", "lh", "lqp", "lqph", "lqpht"))
om <- optimizeModel(model, hypers = h, metric = "auc", seed = 4,gen=40)
head(om@results)[1,] # Best combinations
## 利用优化后参数运行模拟结果:
options(java.parameters = "-Xmx10g" )
## 构建发生点和背景点的点值提取:
pa_sa <- data_sa@pa
da_sa <- data_sa@data
## 注意设置参数时最大迭代数需要设置在maxent模型中,而不能在参数中设置;
args <- c('linear=true', 'product=false', 'quadratic=false', 'hinge=true',
'threshold=false','betamultiplier= 1.5','randomseed=true','writebackgroundpredictions=true',
"applythresholdrule=Maximum training sensitivity plus specificity","threads=10",
'pictures=false','outputformat=cloglog','replicates=10','replicatetype=Bootstrap')
dir.create("C:/Users/admin/Desktop/sa_AP")
mx_sa <- dismo::maxent(x=da_sa,p=pa_sa,args= args,maximumiterations=1000,path="./sa_AP")
mx_sa@models[[1]]
工作流计划
## 包括以下几个部分:
## 导入数据
## 构建环境图层
## 数据预实验;即数据进行参数优化,使用sdmtune包;
# 采用随机抽样的方式进行运算,比enmeval效率更很多倍!
## 数据预实验之后,使用sdmtune自带的函数或者使用dismo包的函数设置参数解耦
## 跑结果十次;注意这里环境数据中背景点的设置和评估集的选择都很重要;
## 然后将测试评估结果,这次要学习,使用Proc、boyoc等等指数;
## 然后进行预测分析;
## 计算sdm投影的增加、缺失等等;
## 甚至计算质心的转移等等;
导入环境数据:
## 导入环境数据:
xh_na <- read.csv("D:/XH/xh/na.csv")[,1:3]
xh_as <- read.csv("D:/XH/xh/as.csv")
xh_eu <- read.csv("D:/XH/xh/eu.csv")[,1:3]
xh_au <- read.csv("D:/XH/xh/au.csv")[,1:3]
xh_sa <- read.csv("D:/XH/xh/sa.csv")
## 筛选环境变量:
## 加载所有环境变量:
tiffs <- list.files(path="D:/XH/third_env/envsV5tif/",pattern = "tif",full.names = T)
envs <- raster::stack(tiffs)
加载R包:
## 加载R包:
library(SDMtune)
library(ENMeval)
library(raster)
library(rgdal)
library(maps)
library(mapdata)
library(dismo)
library(rJava)
library(maptools)
library(jsonlite)
library(glmnet)
library(maxnet)
library(rasterVis)
library(ggplot2)
##检查maxent是否可用:
dismo::maxent()
library(tidyverse )
为减少运算时间,构建范围:
## 为减少运算时间,构建范围:
## 可以构建5度buffer范围;####
pp <- function(x){
occs <- x
xmin <- min(occs$longitude)
xmax <- max(occs$longitude)
ymin <- min(occs$latitude)
ymax <- max(occs$latitude)
bb <- matrix(c(xmin-5, xmin-5, xmax+5, xmax+5, xmin-5, ymin-5, ymax+5, ymax+5, ymin, ymin), ncol=2)
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))
return(bgExt)
}
参数优化,及模拟预实验;
## 参数优化,及模拟预实验;
po_sa <- pp(xh_sa)
envs_sa <- mask(crop(envs$BIO4,po_sa),po_sa) %>% mask(crop(envs,.),.)
## 生成不包含研究区域的随机背景点:
bg.xy <- dismo::randomPoints(envs_sa$BIO4, 5000,p= xh_sa[,2:3])
bg_sa <- as.data.frame(bg.xy,colnames(c("long","lat")))
## 准备建模数据集:
data_sa <- prepareSWD(species = "xh_sa species",
p =xh_sa[,2:3] , a = bg_sa,
env = envs_sa)
## 利用sdmTUNE构建最优参数选择:
sa_folds <- get.checkerboard1(occ = data_sa@coords[data_sa@pa == 1, ],
env = envs_sa,
bg.coords = data_sa@coords[data_sa@pa == 0, ],
aggregation.factor = 4)
model <- train(method = "Maxnet", data = data_sa,folds = sa_folds)
h <- list(reg = seq(0.5, 4, 0.5), fc = c("lq", "lh", "lqp", "lqph", "lqpht"))
om <- optimizeModel(model, hypers = h, metric = "auc", seed = 4,gen=40)
head(om@results)[1,] # Best combinations
## 利用优化后参数运行模拟结果:
options(java.parameters = "-Xmx10g" )
## 构建发生点和背景点的点值提取:
pa_sa <- data_sa@pa
da_sa <- data_sa@data
## 注意设置参数时最大迭代数需要设置在maxent模型中,而不能在参数中设置;
args <- c('linear=true', 'product=false', 'quadratic=false', 'hinge=true',
'threshold=false','betamultiplier= 1.5','randomseed=true','writebackgroundpredictions=true',
"applythresholdrule=Maximum training sensitivity plus specificity","threads=10",
'pictures=false','outputformat=cloglog','replicates=10','replicatetype=Bootstrap')
dir.create("C:/Users/admin/Desktop/sa_AP")
mx_sa <- dismo::maxent(x=da_sa,p=pa_sa,args= args,maximumiterations=1000,path="./sa_AP")
mx_sa@models[[1]]
评估模型:
## 评估模型:
## 计算AUC/TSS
## 计算阈值:获取最大特异性与敏感性之和;
## 计算AUC:
e=list()
for(i in 1:10){
x <- evaluate(p=data_sa@coords[data_sa@pa==1,], a=data_sa@coords[data_sa@pa==0,],
model = mx_sa@models[[i]],x=envs_sa)
e <- rbind(x@auc,e)
}
aucc <- data.frame(matrix(unlist(e)))
sd(aucc$matrix.unlist.e..)
mean(aucc$matrix.unlist.e..)
## ## 重新构建评估TSS和kappa:
##获取最大特异性与敏感性之和;
t <- evaluate(p=data_sa@coords[data_sa@pa==1,], a=data_sa@coords[data_sa@pa==0,],
model = mx_sa@models[[1]],x=envs_sa)
t__这里t值的结果能给出最大特异性与敏感性之和;
## max TPR+TNR at : 0.278496
nems1 <- c("species_0_backgroundPredictions",paste0("species_",1:9,"_backgroundPredictions"))
nems_1 <- paste0("C:/Users/admin/Desktop/sa_AP/",nems1,".csv")
nems2 <- c("species_0_samplePredictions",paste0("species_",1:9,"_samplePredictions"))
nems_2 <- paste0("C:/Users/admin/Desktop/sa_AP/",nems2,".csv")
ge
setwd("")
## 下面这个函数只需要修改set threshold value
calc_tss <- function(y){
backgroundpredictions <- read.csv(nems_1[y])
#we need the last column so will set the number as x
x <- length(backgroundpredictions)
#extract the cloglog/logistic results
backgroundclog <- backgroundpredictions[,x]
#now read in the sample predictions for testing
samplepredictions <- read.csv(nems_2[y])
#we need the last column again of logistic or cloglog predictions so set a second x
x2 <- length(samplepredictions)
#extract the cloglog/logistic results for sample
sampleclog <- samplepredictions[,x2]
#set n the number of pseuabsences used for backgroudn predictions by MaxEnt
n <- 5000
#set threshold value
th <- 0.27
TSS_calculations <- function (sample_clog, prediction_clog, n, th) {
xx <- sum(sample_clog > th)
yy <- sum(prediction_clog > th)
xxx <- sum(sample_clog < th)
yyy <- sum(prediction_clog < th)
ncount <- sum(xx,yy,xxx,yyy)
overallaccuracy <- (xx + yyy)/ncount
sensitivity <- xx / (xx + xxx)
specificity <- yyy / (yy + yyy)
tss <- sensitivity + specificity - 1
#kappa calculations
a <- xx + xxx
b <- xx + yy
c <- yy + yyy
d <- xxx + yyy
e <- a * b
f <- c * d
g <- e + f
h <- g / (ncount * ncount)
hup <- overallaccuracy - h
hdown <- 1 - h
kappa <- hup/hdown
Po <- (xx + yyy) / ncount
Pe <- ((b/ncount) * (a/ncount)) + ((d/ncount) * (c/ncount))
Px1 <- Po - Pe
Px2 <- 1 - Pe
Px3 <- Px1/Px2
tx1 <- xx + yyy
tx2 <- 2 * a * c
tx3 <- a - c
tx4 <- xx - yyy
tx5 <- ncount * (( 2 * a ) - tx4)
tx6 <- ncount * tx1
kappamax <- (tx6 - tx2 - (tx3 * tx4)) / ((tx5 - tx3) - (tx3 * tx4))
cat(" Maxent results for model with/n",a,"test sample predictions/n",c ,"background predicitons/n/n TSS value: ", tss,"/n Overall accuracy: ",overallaccuracy,"/n Sensitivity: ",sensitivity,"/n Specificity: ",specificity,"/n Kappa: ",kappa,"/n Kappa max: ",kappamax)
}
#run the function, the input values are the sampleclog values, then the background clog values, the sample number for the pseudo absences and then threshold value
TSS_calculations(sampleclog,backgroundclog,n,th)
}
sink("tss_sa.txt")
lapply(1:10,calc_tss)
sink()
模型预测
## 可视化模拟结果:
map_sa <- predict(maxnet_model_sa, data = envs_sa, type = "cloglog")
plotPred(map_sa)
## 输出栅格图层:
predict(maxnet_model_sa, data = envs_sa,
type = "cloglog", file = "C:/Users/admin/Desktop/sa_ap", format = "GTiff")
## 更漂亮的可视化结果图,参见可视化文件夹下的方法
变量二分类
## 变量二分类
## 基于分类变量构建二维生态位分析背景:
order(as.data.frame(aucs$Training.AUC),decreasing=T)
ped_sa <- predict(mx_sa@models[[1]], envs_sa)
## 基于ENMTOOLS计算生态位宽度:
raster.breadth(ped_sa)
plot(ped_sa)
## 查看评估结果:
mean(aucs$Maximum.training.sensitivity.plus.specificity.Cloglog.threshold)
## 重分类:
ef_sa <- reclassify(ped_sa,rcl= c(0,0.27,NA,0.27,Inf,1))
setwd("C:/Users/Administrator/Desktop/")
bg_enm_sa <- dismo::randomPoints(ef_sa,3000,p=xh_sa[,2:3])
bg_enm_sa <- as.data.frame(bg_enm_sa)
write.csv(bg_enm_sa,"./sa_enm_bg.csv")
writeRaster(ped_sa,"./sa_sdm.tif",format="GTiff")
工作流的一些补充
## MAXENT
# 设置数据:
rasters.final<-subset(rasters.selected,c("calcite", "chlorange","chlomean","cloudmean","cloudmin","damean",
"temperature", "nitrate", "parmax", "parmean", "phosphate",
"salinity", "silicate"))
rasters.final
fold <- kfold(location.data, k=5)
test <- location.data[fold == 1, ]
train <- location.data[fold != 1, ]
# 运行参数:
model.maxent <- maxent(
x=rasters.final,
p=spoints,
a=backgr,
args=c(
'randomtestpoints=30',
'betamultiplier=1',
'linear=true',
'quadratic=true',
'product=true',
'threshold=true',
'hinge=true',
'threads=2',
'responsecurves=true',
'jackknife=true',
'askoverwrite=false'
)
)
# 查看html网页:
model.maxent
# variable contribution
plot(model.maxent)
采样偏差文件制作
setwd("C:/Users/admin/Desktop/enmevaluate/enmevaluate")
pakegs = c("rJava","ENMeval","raster","MASS")
slapply(pakegs,library,character.only =T)
env <- stack(bio1, bio5, bio6, bio7, bio8, bio12, bio16, bio17, biocateg1, biocateg2)
occ <- read.csv("occ.csv")[,-1]
occur.ras <- rasterize(occ, env, 1)
presences <- which(values(occur.ras) == 1)
pres.locs <- coordinates(occur.ras)[presences, ]
dens <- kde2d(pres.locs[,1], pres.locs[,2], n = c(nrow(occur.ras), ncol(occur.ras)), lims = c(extent(env)[1], extent(env)[2], extent(env)[3], extent(env)[4]))
dens.ras <- raster(dens, env)
dens.ras2 <- resample(dens.ras, env)
writeRaster(dens.ras2, "biasfile.asc", overwrite = TRUE)
bg <- xyFromCell(dens.ras2, sample(which(!is.na(values(subset(env, 1)))), 5000, prob=values(dens.ras2)[!is.na(values(subset(env, 1)))]))