/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    CHAIDModelSelection.java
 *    Copyright (C) 2021 ALDAPA Team (http://www.aldapa.eus)
 *    Faculty of Informatics, Donostia, 20018
 *    University of the Basque Country (UPV/EHU), Basque Country
 *
 */

package weka.classifiers.trees.jchaidstar;

import java.util.Enumeration;

import weka.classifiers.trees.j48.ClassifierSplitModel;
import weka.classifiers.trees.j48.Distribution;
import weka.classifiers.trees.j48.NoSplit;
import weka.classifiers.trees.jchaid.CHAIDModelSelection;
import weka.core.Attribute;
import weka.core.Instances;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 * Class for selecting a CHAID*-type split for a given dataset.
 * 
 * @author Jes&uacute;s M. P&eacute;rez (txus.perez@ehu.eus)
 * @author Oscar Teixeira (oteixeira001@ikasle.ehu.es)
 * @version $Revision: 1.2 $
 */
public class CHAIDStarModelSelection extends CHAIDModelSelection {

  /** for serialization */
  private static final long serialVersionUID = -4401438489738631349L;

  /**
   * Initializes the split selection method with the given parameters.
   * 
   * @param minNoObj minimum number of instances that have to occur in at least
   *          two subsets induced by split
   * @param allData FULL training dataset (necessary for selection of split
   *          points).
   * @param doNotMakeSplitPointActualValue if true, split point is not relocated
   *          by scanning the entire dataset for the closest data value
   * @param sigLevelAtt Significance level for the selection of attributes
   * @param sigLevelMergeSplit Significance level for the best combination of categories
   * @param searchBestSplit true if the quest of the best binary split will be done
   * @param minNumObjSplit minimum number of instances to split a node
   * @param ordinalAtts List of ordinal attributes
   */
  public CHAIDStarModelSelection(int minNoObj, Instances allData,
    boolean doNotMakeSplitPointActualValue,
    double sigLevelAtt, double sigLevelMergeSplit, boolean searchBestSplit, int minNumObjSplit, Range ordinalAtts) {
    super(minNoObj, allData, doNotMakeSplitPointActualValue,
        sigLevelAtt, sigLevelMergeSplit, searchBestSplit, minNumObjSplit, ordinalAtts);
  }

  /**
   * Selects CHAID*-type split for the given dataset.
   */
  @Override
  public ClassifierSplitModel selectModel(Instances data) {

    double minResult;
    CHAIDStarSplit[] currentModel;
    CHAIDStarSplit bestModel = null;
    NoSplit noSplitModel = null;
    int validModels = 0;
    boolean multiVal = true;
    Distribution checkDistribution;
    Attribute attribute;
    int i;

    try {

      // Check if all Instances belong to one class or if not
      // enough Instances to split.
      checkDistribution = new Distribution(data);
      noSplitModel = new NoSplit(checkDistribution);
      if (Utils.sm(checkDistribution.total(), m_minNumObjSplit)
        || Utils.eq(checkDistribution.total(),
          checkDistribution.perClass(checkDistribution.maxClass()))) {
        return noSplitModel;
      }

      // Check if all attributes have a lot of values.
      if (m_allData != null) {
        Enumeration<Attribute> enu = data.enumerateAttributes();
        while (enu.hasMoreElements()) {
          attribute = enu.nextElement();
          if ((Utils.sm(attribute.numValues(),
              (0.3 * m_allData.numInstances())))) {
            multiVal = false;
            break;
          }
        }
      }

      currentModel = new CHAIDStarSplit[data.numAttributes()];

      // For each attribute.
      for (i = 0; i < data.numAttributes(); i++) {

        // Apart from class attribute.
        if (i != (data).classIndex()) {

          // Get models for current attribute.
          currentModel[i] = new CHAIDStarSplit(i, m_minNoObj,
            m_sigLevelAtt, m_sigLevelMergeSplit, m_searchBestSplit, isOrdinalAtt(i));
          currentModel[i].buildClassifier(data);

          // Check if useful split for current attribute
          // exists and check for enumerated attributes with
          // a lot of values.
          if (currentModel[i].checkModel()) {
            if (m_allData != null) {
              if ((data.attribute(i).isNumeric())
                || (multiVal || Utils.sm(data.attribute(i).numValues(),
                  (0.3 * m_allData.numInstances())))) {
                validModels++;
              }
            } else {
              validModels++;
            }
          }
        } else {
          currentModel[i] = null;
        }
      }

      // Check if any useful split was found.
      if (validModels == 0) {
        return noSplitModel;
      }

      // Find "best" attribute to split on.
      minResult = Double.MAX_VALUE;
      for (i = 0; i < data.numAttributes(); i++) {
        if ((i != (data).classIndex()) && (currentModel[i].checkModel())) {
          if (Utils.smOrEq(currentModel[i].chiSquaredProb(), minResult)) {
            bestModel = currentModel[i];
            minResult = currentModel[i].chiSquaredProb();
          }
        }
      }

      // Check if useful split was found.
      if (Utils.gr(minResult, m_sigLevelAtt)) {
        return noSplitModel;
      }

      // Set the split point analog to C45 if attribute numeric.
      if ((m_allData != null) && (!m_doNotMakeSplitPointActualValue)) {
        bestModel.setSplitPoint(m_allData);
      }
      return bestModel;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return null;
  }

  /**
   * Returns the revision string.
   *
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.1 $");
  }
}
