SDMTune 工作流


## test:
occs <- xh_as[,2:3]

## 定制研究范围和bg_points(bg_xy):############
## 使用box功能构建范围和背景点:
# Background selection technique chosen as Bounding Box.
occs <- xh_as[,2:3]
xmin <- min(occs$longitude)
xmax <- max(occs$longitude)
ymin <- min(occs$latitude)
ymax <- max(occs$latitude)
bb <- matrix(c(xmin, xmin, xmax, xmax, xmin, ymin, ymax, ymax, ymin, ymin), ncol=2)
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))
# 增加buffer:0.5度~50km etc
bgExt <- rgeos::gBuffer(bgExt, width = 0.5)
## 利用环境数据掩膜:去除水面分布背景范围:
## 仅加载其中一张环境图层,用于裁剪-掩膜:
worldmap_env <- boundaries(envs$GDM10,type="outer")
env_bgExt <- raster::mask(crop(worldmap_env, bgExt),bgExt)

## 生成不包含研究区域的随机背景点:
bg.xy <- dismo::randomPoints(env_bgExt, 5000,p= occs)
bg_xy <- as.data.frame(bg.xy,colnames(c("long","lat")))

## 另外需要构建一各mcp备用:
## install.packages("wallace")
source(system.file('shiny/funcs', 'functions.R', package = 'wallace'))
head(occs)
sp::coordinates(occs) <- ~ longitude + latitude
bg_mcp <- mcp(occs)
bg_mcp <- rgeos::gBuffer(bg_mcp, width = 0.5)






## 构建wsd ##########
## 如果存在分类变量,使用categorical
## prepareSWD会自动构建点值提取数据集;
data <- prepareSWD(species = "xh_as species", 
                   p = occs, a = bg_xy, 
                   env = envs)
### 关于swd数据的说明
## 数据构成:
# 使用@来查看结构;另外将存在点和北京店设置为1和0

## 保存SWD_DATA
# swd2csv(data, file_name = "data.csv")
# swd2csv(data, file_name = c("presence.csv", "background.csv"))


## 设置默认模型及投影: ##########
default_model <- train(method = "Maxent", 
                       data = data, fc = "lqph", reg = 1, iter = 500)


## 物种分布点——模型投影:
pred <- predict(default_model, data = data, type = "cloglog")

## 得到每个建模点的预测值:
p <- data@data[data@pa == 1, ]
pred <- predict(default_model, data = p, type = "cloglog")
head(pred)

## 分布背景环境投影:
## 注意这里的data = envs
map <- predict(default_model, data = envs, type = "cloglog")
## 导出为tif形式:
map <- predict(default_model, data = predictors, 
               type = "cloglog", file = "my_map", format = "GTiff")
## 投影可视化——连续型输出:
plotPred(map)
plotPred(map, lt = "Habitat\nsuitability", 
         colorramp = c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c"))

## 投影可视化--二值输出:
## 此部分暂时无法使用:
ths <- thresholds(default_model, type = "cloglog")
thresholds(default_model)
plotPA(map, th = ths[3, 2],
       filename = "my_pa_map", format = "GTiff")
envs
## 想办法:把二值化输出转化为1元输出,然后在enm上构建背景点
## 


## 评估模型auc/tss:#########
## 查看auc:
auc(default_model)
plotROC(default_model)
tss(default_model)
aicc(default_model, env = predictors)


## 分别使用数据层面、空间层面和环境层面的数据分割方法:
## 使用不同的策略优化模型: #### 
# install.packages("zeallot")
## 构建训练集和测试集:
library(zeallot)  # For unpacking assignment
c(train, test) %<-% trainValTest(data, test = 0.2, 
                                 only_presence = TRUE, seed = 25)

maxnet_model <- train("Maxnet", data = train)

## 输出AUC值:
cat("Training auc: ", auc(maxnet_model))
#> Training auc:  0.8752144
cat("Testing auc: ", auc(maxnet_model, test = test))
#> Testing auc:  0.8505888

## 同时可视化训练集和测试集的auc:
plotROC(maxnet_model, test = test)

## 另外分割位置决定了auc值,使用随机数来重新构建kfold测试:
output <- data.frame(matrix(NA, nrow = 10, ncol = 3)) # Create an empty data.frame
colnames(output) <- c("seed", "trainAUC", "testAUC")
set.seed(25)
seeds <- sample.int(1000, 10) # Create 10 different random seeds

for (i in 1:length(seeds)) { # Loop through the seeds
  c(train, test) %<-% trainValTest(data, test = 0.2, seed = seeds[i], only_presence = TRUE) # Make the train/test split
  m <- train("Maxnet", data = train) # train the model
  # Populate the output data.frame
  output[i, 1] <- seeds[i]
  output[i, 2] <- auc(m)
  output[i, 3] <- auc(m, test = test)
}
# Print the output
output
range(output[, 3])

## 数据交叉验证:
folds <- randomFolds(data, k = 4, only_presence = TRUE, seed = 25)
cv_model <- train("Maxnet", data = data, folds = folds)
cv_model
cat("Training AUC: ", auc(cv_model))
#> Training AUC:  0.8734151
cat("Testing AUC: ", auc(cv_model, test = TRUE))
#> Testing AUC:  0.8553538

## 空间交叉验证:
# install.packages("blockCV")
library(ENMeval)
## 除了block_folds,还提供get.checkerboard1
block_folds <- get.block(occ = data@coords[data@pa == 1, ], bg.coords = data@coords[data@pa == 0, ])
model <- train(method = "Maxnet", data = data, fc = "l", reg = 0.8, folds = block_folds)

## 环境交叉验证:
library(blockCV)
library(raster)
sp_df <- SpatialPointsDataFrame(data@coords, data = as.data.frame(data@pa), proj4string = crs(predictors))
e_folds <- envBlock(rasterLayer = predictors, speciesData = sp_df, species = "data@pa", k = 4, standardization = "standard", rasterBlock = FALSE, numLimit = 100)
model <- train(method = "Maxnet", data = data, fc = "l", reg = 0.8, folds = e_folds)




## 查看变量的重要性评估 ##########
## 
default_model @ model @ results 

## 查看maxent建模中贡献和置换重要性:
vi <- maxentVarImp(default_model)
vi
plotVarImp(vi[, 1:2])

## Permutation importance ——————这就是我需要的!
##sdmtune开发的方法是进行迭代置换重要性评估,
## 用于评估便利店 重要性对于物种建模分布而言:
library(zeallot)  # For unpacking assignment
c(train, test) %<-% trainValTest(data, test = 0.2, only_presence = TRUE, seed = 25)
maxnet_model <- train("Maxnet", data = train)
vi_maxnet <- varImp(maxnet_model, permut = 5)
vi_maxnet
plotVarImp(vi_maxnet)


## Jackknife test for variable importance
jk <- doJk(maxnet_model, metric = "auc", test = test)
jk
plotJk(jk, type = "train", ref = auc(maxnet_model))
plotJk(jk, type = "test", ref = auc(maxnet_model, test = test))

##绘制变量的响应曲线:
plotResponse(maxnet_model, var = "bio1", type = "cloglog",
             only_presence = TRUE, marginal = FALSE, rug = TRUE)

## 响应曲线显示预测的平均值以及一个标准偏差误差间隔
plotResponse(cv_model, var = "bio1", type = "cloglog", 
             only_presence = TRUE, marginal = TRUE, fun = mean, rug = TRUE)

## 输出建模结果:“
modelReport(maxnet_model, type = "cloglog", folder = "virtual-sp", test = test, 
            response_curves = TRUE, only_presence = TRUE, jk = TRUE, 
            env = predictors)




### 变量选择:#####
# 根据排列重要性或百分比贡献对变量进行排名(第二种方法仅适用于Maxent模型)
## 注意contribution具有非相同性,仅适用于maxent模型;
# 根据给定的方法和相关阈值,检查排名最重要的变量是否与其他变量高度相关。如果算法找到相关变量,则移至下一步,否则检查排名中的其他变量;
# 在相关变量之间进行留一刀切刀测试;
# 根据训练数据集上的给定指标,删除在删除后降低的模型性能越少的变量。
selected_variables_model <- varSel(maxnet_model,
                                   metric = "auc", test = test, 
                                   bg4cor = bg, method = "spearman",
                                   cor_th = 0.7, permut = 1)
#> Removed variables: bio16, bio6, bio7, bio8

##置换变量重要性:
### 注意这里是采用置换变量的方法设定阈值,并排除掉在反复迭代中阈值低于6的变量;
reduced_variables_model <- reduceVar(maxnet_model, th = 6, 
                                     metric = "auc", test = test, permut = 1)


## 此外,sdmtune还提供了其他方法用于训练模型,
model <- train("ANN", data = train, size = 10, folds = folds)

results matching ""

    No results matching ""