将Scala转换为python

论文需要复现代码,并且在该代码上进行改动,请大家帮我把这两段代码转换成python谢谢大家!

package skeel.classification.decision_trees

/**
  * Rank Entropy-Based Decision Trees for Monotonic Classification
  *
  * Hu, Q., Che, X., Zhang, L., Zhang, D., Guo, M., & Yu, D. (2011). Rank entropy-based decision trees for monotonic classification. 
  * IEEE Transactions on Knowledge and Data Engineering, 24(11), 2052-2064.
  *
  * @author sergiogvz
  */
class REMT(epsilon:Double=0.01, minSamplesLeaf:Int=2, prune:Boolean=false) extends MonDT(minSamplesLeaf, prune) {

  protected var cutRMI:Double = -1   

  def stopCriterionCut():Boolean = {   //停止标准
    cutRMI < epsilon
  }

  def splittingRule():(Int,Double) = {

    val attRMI = for(att <- 0 until nAttributes) yield{   //att的值为0到nAttributes的数量
      val possibleCuts = orderedX(att).map(Xtrain(_)(att)).distinct.dropRight(1)

      if(possibleCuts.nonEmpty)
        possibleCuts.zip(possibleCuts.map(rmi(att,_))).maxBy(_._2)
      else (-1.0,-1.0)
    }

    val cutRmiAtt = attRMI.zipWithIndex.maxBy(_._1._2)

    cutRMI = cutRmiAtt._1._2

    (cutRmiAtt._2,cutRmiAtt._1._1)
  }

  def generateTree():MonDT = {
    new REMT(epsilon, minSamplesLeaf,prune)
  }

  private def rmi(att:Int, cutValue:Double):Double = {
     #indices方法可以返回指定列表的所有有效索引值组成的类表:
    val splitVector = for (i <- Xtrain.indices) yield  // <-为变量i赋值  #将样本分裂成两类
      if (Xtrain(i)(att) <= cutValue) 1  
      else 2

    var countAtt, countClass, countAttClass:Int = 0

    var rmi = 0.0

    for(i <- Xtrain.indices) {
      countAtt=0
      countClass=0
      countAttClass=0
      for (j <- Xtrain.indices) {   //单调一致
        val ilsj = splitVector(i) <= splitVector(j)

        val ilsjClass = Ytrain(i) <= Ytrain(j)

        if (ilsj) countAtt += 1
        if (ilsjClass) countClass += 1
        if (ilsj && ilsjClass) countAttClass+=1

      }

      rmi += Math.log((countAtt*countClass) / (Xtrain.length*countAttClass).toDouble)
    }

    -rmi/Xtrain.length
  }

}



package skeel.classification

/**
 * Baseline classifier. Every classifier must extend this class.
 * 
 * @author sergiogvz
 */
trait Classifier {      //定义类(训练分类器)
  protected var nClasses = 0    //可变变量,分类器个数?  只有调用Classifier才可被访问
  protected var nAttributes = 0    // 属性的个数?
  
  def fit(X: Array[Array[Double]], y:Array[Int], nClassesP:Int = -1){   //定义训练函数
    nAttributes = X(0).length       //属性数量为第一行的长度
    nClasses = if(nClassesP == -1) y.max+1 else nClassesP   //
    //for ()
  }
  //def functionName ([参数列表]) : [return type] = {
  def predict(x:Array[Double]):Int = {    //定义预测函数
    var maxIndex = 0     //最大索引初始化
    val probs = predictProba(x)   // 预测概率
    
    for (i <- 1 until probs.length)   //给i赋值,1-不包含probs.length 
      if (probs(i) > probs(maxIndex)) maxIndex=i
      
    maxIndex
  }
  
  def predict(X:Array[Array[Double]]):Array[Int] = {   //预测样本类别
     X.map(predict(_))   //当函数参数,只在函数体中出现一次,而且函数体没有嵌套调用时,可以使用下划线来简化函数定义
  }
  
  def predictProba(x:Array[Double]):Array[Double]   
  
  def predictProba(X:Array[Array[Double]]):Array[Array[Double]] = {   // 概率:叶中相同样本类别的训练样本的分数
     X.map(predictProba(_))
  }
  
}

基于Monster 组和GPT的调写:

from typing import List

class REMT(Classifier):
    def __init__(self, epsilon:float=0.01, minSamplesLeaf:int=2, prune:bool=False):
        self.epsilon = epsilon
        self.minSamplesLeaf = minSamplesLeaf
        self.prune = prune
        self.cutRMI = -1

    def stopCriterionCut(self) -> bool:
        return self.cutRMI < self.epsilon

    def splittingRule(self) -> Tuple[int, float]:
        attRMI = []
        for att in range(self.nAttributes):
            possibleCuts = list(set([X[att] for X in self.orderedX]) - {self.orderedX[-1][att]})
            if possibleCuts:
                attRMI.append(max([(cutValue, self.rmi(att, cutValue)) for cutValue in possibleCuts], key=lambda x: x[1]))
            else:
                attRMI.append((-1.0, -1.0))
        cutRmiAtt = max(enumerate(attRMI), key=lambda x: x[1][1])
        self.cutRMI = cutRmiAtt[1][1]
        return cutRmiAtt[0], cutRmiAtt[1][0]

    def generateTree(self) -> "REMT":
        return REMT(self.epsilon, self.minSamplesLeaf, self.prune)

    def rmi(self, att:int, cutValue:float) -> float:
        splitVector = [1 if X[att] <= cutValue else 2 for X in self.Xtrain]
        countAtt = countClass = countAttClass = 0
        rmi = 0.0
        for i in range(len(self.Xtrain)):
            countAtt = countClass = countAttClass = 0
            for j in range(len(self.Xtrain)):
                ilsj = splitVector[i] <= splitVector[j]
                ilsjClass = self.Ytrain[i] <= self.Ytrain[j]
                if ilsj:
                    countAtt += 1
                if ilsjClass:
                    countClass += 1
                if ilsj and ilsjClass:
                    countAttClass += 1
            rmi += math.log((countAtt * countClass) / (len(self.Xtrain) * countAttClass))
        return -rmi / len(self.Xtrain)

class Classifier:
    def __init__(self):
        self.nClasses = 0
        self.nAttributes = 0

    def fit(self, X:List[List[float]], y:List[int], nClassesP:int=-1):
        self.nAttributes = len(X[0])
        self.nClasses = max(y) + 1 if nClassesP == -1 else nClassesP

    def predict(self, x:List[float]) -> int:
        maxIndex = 0
        probs = self.predictProba(x)
        for i in range(1, len(probs)):
            if probs[i] > probs[maxIndex]:
                maxIndex = i
        return maxIndex

    def predictMultiple(self, X:List[List[float]]) -> List[int]:
        return [self.predict(x) for x in X]

    def predictProba(self, x:List[float]) -> List[float]:
        pass

    def predictProbaMultiple(self, X:List[List[float]]) -> List[List[float]]:
        return [self.predictProba(x) for x in X]