论文需要复现代码,并且在该代码上进行改动,请大家帮我把这两段代码转换成python谢谢大家!
package skeel.classification.decision_trees
/**
* Rank Entropy-Based Decision Trees for Monotonic Classification
*
* Hu, Q., Che, X., Zhang, L., Zhang, D., Guo, M., & Yu, D. (2011). Rank entropy-based decision trees for monotonic classification.
* IEEE Transactions on Knowledge and Data Engineering, 24(11), 2052-2064.
*
* @author sergiogvz
*/
class REMT(epsilon:Double=0.01, minSamplesLeaf:Int=2, prune:Boolean=false) extends MonDT(minSamplesLeaf, prune) {
protected var cutRMI:Double = -1
def stopCriterionCut():Boolean = { //停止标准
cutRMI < epsilon
}
def splittingRule():(Int,Double) = {
val attRMI = for(att <- 0 until nAttributes) yield{ //att的值为0到nAttributes的数量
val possibleCuts = orderedX(att).map(Xtrain(_)(att)).distinct.dropRight(1)
if(possibleCuts.nonEmpty)
possibleCuts.zip(possibleCuts.map(rmi(att,_))).maxBy(_._2)
else (-1.0,-1.0)
}
val cutRmiAtt = attRMI.zipWithIndex.maxBy(_._1._2)
cutRMI = cutRmiAtt._1._2
(cutRmiAtt._2,cutRmiAtt._1._1)
}
def generateTree():MonDT = {
new REMT(epsilon, minSamplesLeaf,prune)
}
private def rmi(att:Int, cutValue:Double):Double = {
#indices方法可以返回指定列表的所有有效索引值组成的类表:
val splitVector = for (i <- Xtrain.indices) yield // <-为变量i赋值 #将样本分裂成两类
if (Xtrain(i)(att) <= cutValue) 1
else 2
var countAtt, countClass, countAttClass:Int = 0
var rmi = 0.0
for(i <- Xtrain.indices) {
countAtt=0
countClass=0
countAttClass=0
for (j <- Xtrain.indices) { //单调一致
val ilsj = splitVector(i) <= splitVector(j)
val ilsjClass = Ytrain(i) <= Ytrain(j)
if (ilsj) countAtt += 1
if (ilsjClass) countClass += 1
if (ilsj && ilsjClass) countAttClass+=1
}
rmi += Math.log((countAtt*countClass) / (Xtrain.length*countAttClass).toDouble)
}
-rmi/Xtrain.length
}
}
package skeel.classification
/**
* Baseline classifier. Every classifier must extend this class.
*
* @author sergiogvz
*/
trait Classifier { //定义类(训练分类器)
protected var nClasses = 0 //可变变量,分类器个数? 只有调用Classifier才可被访问
protected var nAttributes = 0 // 属性的个数?
def fit(X: Array[Array[Double]], y:Array[Int], nClassesP:Int = -1){ //定义训练函数
nAttributes = X(0).length //属性数量为第一行的长度
nClasses = if(nClassesP == -1) y.max+1 else nClassesP //
//for ()
}
//def functionName ([参数列表]) : [return type] = {
def predict(x:Array[Double]):Int = { //定义预测函数
var maxIndex = 0 //最大索引初始化
val probs = predictProba(x) // 预测概率
for (i <- 1 until probs.length) //给i赋值,1-不包含probs.length
if (probs(i) > probs(maxIndex)) maxIndex=i
maxIndex
}
def predict(X:Array[Array[Double]]):Array[Int] = { //预测样本类别
X.map(predict(_)) //当函数参数,只在函数体中出现一次,而且函数体没有嵌套调用时,可以使用下划线来简化函数定义
}
def predictProba(x:Array[Double]):Array[Double]
def predictProba(X:Array[Array[Double]]):Array[Array[Double]] = { // 概率:叶中相同样本类别的训练样本的分数
X.map(predictProba(_))
}
}
from typing import List
class REMT(Classifier):
def __init__(self, epsilon:float=0.01, minSamplesLeaf:int=2, prune:bool=False):
self.epsilon = epsilon
self.minSamplesLeaf = minSamplesLeaf
self.prune = prune
self.cutRMI = -1
def stopCriterionCut(self) -> bool:
return self.cutRMI < self.epsilon
def splittingRule(self) -> Tuple[int, float]:
attRMI = []
for att in range(self.nAttributes):
possibleCuts = list(set([X[att] for X in self.orderedX]) - {self.orderedX[-1][att]})
if possibleCuts:
attRMI.append(max([(cutValue, self.rmi(att, cutValue)) for cutValue in possibleCuts], key=lambda x: x[1]))
else:
attRMI.append((-1.0, -1.0))
cutRmiAtt = max(enumerate(attRMI), key=lambda x: x[1][1])
self.cutRMI = cutRmiAtt[1][1]
return cutRmiAtt[0], cutRmiAtt[1][0]
def generateTree(self) -> "REMT":
return REMT(self.epsilon, self.minSamplesLeaf, self.prune)
def rmi(self, att:int, cutValue:float) -> float:
splitVector = [1 if X[att] <= cutValue else 2 for X in self.Xtrain]
countAtt = countClass = countAttClass = 0
rmi = 0.0
for i in range(len(self.Xtrain)):
countAtt = countClass = countAttClass = 0
for j in range(len(self.Xtrain)):
ilsj = splitVector[i] <= splitVector[j]
ilsjClass = self.Ytrain[i] <= self.Ytrain[j]
if ilsj:
countAtt += 1
if ilsjClass:
countClass += 1
if ilsj and ilsjClass:
countAttClass += 1
rmi += math.log((countAtt * countClass) / (len(self.Xtrain) * countAttClass))
return -rmi / len(self.Xtrain)
class Classifier:
def __init__(self):
self.nClasses = 0
self.nAttributes = 0
def fit(self, X:List[List[float]], y:List[int], nClassesP:int=-1):
self.nAttributes = len(X[0])
self.nClasses = max(y) + 1 if nClassesP == -1 else nClassesP
def predict(self, x:List[float]) -> int:
maxIndex = 0
probs = self.predictProba(x)
for i in range(1, len(probs)):
if probs[i] > probs[maxIndex]:
maxIndex = i
return maxIndex
def predictMultiple(self, X:List[List[float]]) -> List[int]:
return [self.predict(x) for x in X]
def predictProba(self, x:List[float]) -> List[float]:
pass
def predictProbaMultiple(self, X:List[List[float]]) -> List[List[float]]:
return [self.predictProba(x) for x in X]