/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    CHAIDDistribution.java
 *    Copyright (C) 2021 ALDAPA Team (http://www.aldapa.eus)
 *    Faculty of Informatics, Donostia, 20018
 *    University of the Basque Country (UPV/EHU), Basque Country
 *
 */

package weka.classifiers.trees.jchaid;

import java.util.Enumeration;

import weka.classifiers.trees.j48.Distribution;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 * Class for handling a distribution of class values.
 * *************************************************************************************<br/>
 * 
 * @author Jes&uacute;s M. P&eacute;rez (txus.perez@ehu.eus)
 * @author Oscar Teixeira (oteixeira001@ikasle.ehu.es)
 * @version $Revision: 1.2 $
 */
public class CHAIDDistribution extends Distribution {

  /** for serialization */
  private static final long serialVersionUID = 7053184704038829399L;

  /** Index of Missing values in its original position */
  protected int m_missingOriginalIdx;

  /** Indicates if there are missing values or not */
  protected boolean m_hasMissingValues = false;

  /** Indicates the number of bags not empty in the initial distribution */
  protected int m_numBagsNotEmpty;

  /** The indicators used to map the old values.
   *  Indicates where is merged each category. 
   * (Based on the filter MergeNominalValues (package weka.filters.supervised.attribute))  
   */
  private int[] m_indicators;
  
  /**
   * Saves a copy of the original distribution
   */
  private Distribution m_distri_orig;

  /** Minimum number of objects in a split. */
  protected final int m_minNoObj;

  /** Set the significance level for the selection of the attribute to split a node. */
  protected double m_sigLevelAtt = 0.05;
  
  /** Set the significance level for the quest of the best combination of the categories of an attribute */
  protected double m_sigLevelMergeSplit = 0.05;
  
  /** Indicates if the quest of the best binary split will be done, after merging 3 or more categories
   *  This process could add a considerable latency and that is why it is optional.
   */
  protected boolean m_searchBestSplit = true;

  /** ChiSquared probability of split. */
  protected double m_chiSquaredProb;

  /** Indicate if the nature of the categories is ordered, that is to say,
   *  if the values have to be merged with contiguous categories (Ordinal attributes)
   *  or any grouping of categories is permissible (Nominal attributes)
   */
  private boolean m_ordered;

  /** Static reference to splitting criterion. */
  protected static ChiSquareSplitCrit chiSquareCrit = new ChiSquareSplitCrit();

  /**
   * Creates and initializes a new distribution.
   */
  public CHAIDDistribution(int numBags, int numClasses, int minNoObj, double sigLevelAtt,
      double sigLevelMergeSplit, boolean searchBestSplit, boolean ordered) {
    super(numBags + 1, numClasses); // One more reserved for missing values

    m_indicators = new int[numBags + 1]; // One more reserved for missing values
    m_missingOriginalIdx = numBags;
    m_minNoObj = minNoObj;
    m_sigLevelAtt = sigLevelAtt;
    m_sigLevelMergeSplit = sigLevelMergeSplit;
    m_searchBestSplit = searchBestSplit;
    m_ordered = ordered;
  }

  /**
   * Creates distribution with only one bag by merging all bags of given
   * distribution.
   */
  public CHAIDDistribution(CHAIDDistribution toMerge) {
    super((Distribution)toMerge);

    m_indicators = new int[toMerge.m_indicators.length];
    for (int i = 0; i < m_indicators.length; i++)
      if (toMerge.m_indicators[i] > -1)
        m_indicators[i] = 0;
      else
        m_indicators[i] = -1;
    m_missingOriginalIdx = toMerge.m_missingOriginalIdx;
    m_hasMissingValues = toMerge.m_hasMissingValues;
    m_numBagsNotEmpty = toMerge.m_numBagsNotEmpty;
    m_minNoObj = toMerge.m_minNoObj;
    m_distri_orig = toMerge.m_distri_orig;
    m_sigLevelAtt = toMerge.m_sigLevelAtt;
    m_sigLevelMergeSplit = toMerge.m_sigLevelMergeSplit;
    m_searchBestSplit = toMerge.m_searchBestSplit;
    m_chiSquaredProb = toMerge.m_chiSquaredProb;
    m_ordered = toMerge.m_ordered;
  }

  /**
   * Creates a distribution according to given instances and split model.
   * 
   * @exception Exception if something goes wrong
   */
  public CHAIDDistribution(Instances source, CHAIDSplit modelToUse) throws Exception {
    super(modelToUse.numSubsets(), source.numClasses());

    CHAIDDistribution dist = (CHAIDDistribution)(modelToUse.distribution());
    m_missingOriginalIdx = dist.m_missingOriginalIdx;
    m_hasMissingValues = dist.m_hasMissingValues;
    m_numBagsNotEmpty = dist.m_numBagsNotEmpty;
    m_indicators = dist.m_indicators;
    m_minNoObj = dist.m_minNoObj;
    m_distri_orig = dist.m_distri_orig;
    m_sigLevelAtt = dist.m_sigLevelAtt;
    m_sigLevelMergeSplit = dist.m_sigLevelMergeSplit;
    m_searchBestSplit = dist.m_searchBestSplit;
    m_chiSquaredProb = dist.m_chiSquaredProb;
    m_ordered = dist.m_ordered;

    int index;
    Instance instance;
    Enumeration<Instance> enu = source.enumerateInstances();
    while (enu.hasMoreElements()) {
      instance = enu.nextElement();
      index = modelToUse.whichSubset(instance);
      if ((index > -1) && (index < modelToUse.numSubsets())) {
        add(index, instance);
      } else {
        System.out.println("CHAIDDistribution(source,modelToUse): Unable to manage this instance!");
      }
    }
  }

  public CHAIDDistribution(double[][] matrix, CHAIDDistribution dist) {
	    super(matrix);
	    m_missingOriginalIdx = dist.m_missingOriginalIdx;
	    m_hasMissingValues = dist.m_hasMissingValues;
	    m_numBagsNotEmpty = dist.m_numBagsNotEmpty;
	    m_indicators = dist.m_indicators;
	    m_minNoObj = dist.m_minNoObj;
	    m_distri_orig = dist.m_distri_orig;
	    m_sigLevelAtt = dist.m_sigLevelAtt;
	    m_sigLevelMergeSplit = dist.m_sigLevelMergeSplit;
	    m_searchBestSplit = dist.m_searchBestSplit;
	    m_chiSquaredProb = dist.m_chiSquaredProb;
	    m_ordered = dist.m_ordered;
}

/**
   * Initializes m_indicators adequately.
   */
  public void initializeIndicators() {
    for (int i = 0; i < m_indicators.length; i++)
      m_indicators[i] = i;
  }

  /**
   * Eliminates empty rows from Distribution's structures
   * and initializes m_indicators adequately.
   */
  private void initializeAndEliminateEmptyBags() {
    // Indicate if there are missing values
    m_hasMissingValues = !Utils.eq(m_perBag[m_missingOriginalIdx], 0);
    // Calculate the number of bags originally not empty
    m_numBagsNotEmpty = actualNumBags();
    double[][] new_perClassPerBag = new double[m_numBagsNotEmpty][];
    double[] new_perBag = new double[m_numBagsNotEmpty];

    // Initially, each value is in its own subset, if is not empty. If empty, eliminate it.
    int i_bag = 0;
    for (int i = 0; i < m_indicators.length; i++) {
      if (Utils.eq(m_perBag[i], 0)) {
        // There are not examples for the i-th category
        m_indicators[i] = -1;
      } else {
        m_indicators[i] = i_bag;
        new_perClassPerBag[i_bag] = m_perClassPerBag[i];
        new_perBag[i_bag] = m_perBag[i];
        i_bag++;
      }
    }
    m_perClassPerBag = new_perClassPerBag;
    m_perBag = new_perBag;
  }

  /**
   * Merges values based on CHAID algorithm and returns list of subset indicators for the values.
   * (Initially based on the function mergeValues() of the filter MergeNominalValues (package weka.filters.supervised.attribute))  
   */
  public void mergeValues() {

    // Save the original distribution
    m_distri_orig = (Distribution)clone();

    // Initializes indicators and eliminates empty bags
    initializeAndEliminateEmptyBags();
    
    // Can't merge further if only one subset remains. First, without missing values
    while (numKnownBags() > 1) {
      // (Step 2) Find the pair of categories whose sub-table is least significantly different
      // Find two rows that differ the least according to chi-squared statistic
      double[][] reducedCounts = new double[2][]; // 2xd contingency sub-table
      double minVal = Double.MAX_VALUE;
      int toMergeOne = -1;
      int toMergeTwo = -1;
      for (int i = 0; i < numKnownBags(); i++) {
        reducedCounts[0] = getMatrixRow(i);
        int j_ini = i + 1;
        int j_fin = (m_ordered)? ((i == getIndexLastKnownBag())? -1 : i + 1) : getIndexLastKnownBag();
        for (int j = j_ini; j <= j_fin; j++) {
          reducedCounts[1] = getMatrixRow(j);
          double val = chiSquareCrit.chiVal(reducedCounts);

          if (Utils.sm(val, minVal)) {
            minVal = val;
            toMergeOne = i;
            toMergeTwo = j;
          }
        }
      }

      // if no merge is possible
      if(toMergeOne == -1)
        break;

      // Is least significant difference still significant?
      double chiSquaredProb = chiSquareCrit.splitCritValue(minVal, reducedCounts[0].length - 1);
      if (Utils.gr(chiSquaredProb, m_sigLevelMergeSplit)) {
        // Reduce table by merging
        mergeCategories(toMergeOne, toMergeTwo);
        mergeIndicators(toMergeOne, toMergeTwo);

        // (Step 3) Find the most significant binary split into which the merger may be resolved
        if (m_searchBestSplit)
          splitLargeGroups(toMergeOne);
      }
      else
        // No merges sufficiently less significant
        break;
    }

    // Checks too small groups of categories
    mergeSmallGroups();

    // Treat missing values
    if (m_hasMissingValues) {
      handleMissingValues();
    }

    // (Step 4) Calculate the significance of the optimally merged attribute
    if (numBags() == 1) {
      // All categories were merged
      m_chiSquaredProb = (double)1;
    } else {
      double originalSig = chiSquareCrit.splitCritValue(this);
      m_chiSquaredProb = originalSig * chiSquareCrit.bonferroniFactor(m_numBagsNotEmpty, numBags(), m_hasMissingValues, m_ordered);
      if (Utils.gr(m_chiSquaredProb, m_sigLevelAtt)) {
        // Not significant: merge all values
        mergeAll();
      }
    }
  }

  /**
   * Merges the rows of two categories; always the second one over the first one
   * @param toMergeOne Index of the first category to be merged
   * @param toMergeTwo Index of the second category to be merged
   */
  private void mergeCategories(int toMergeOne, int toMergeTwo) {

    // totaL and m_perClass remain unchanged
    // Reduce tables by merging
    double[][] new_perClassPerBag = new double[m_perClassPerBag.length - 1][];
    double[] new_perBag = new double[m_perClassPerBag.length - 1];
    for (int i = 0; i < m_perClassPerBag.length; i++) {
      if (i < toMergeTwo) {
        // Can simply copy reference
        new_perClassPerBag[i] = m_perClassPerBag[i];
        new_perBag[i] = m_perBag[i];
      } else if (i == toMergeTwo) {
        // Need to add counts
        for (int k = 0; k < m_perClassPerBag[i].length; k++) {
          new_perClassPerBag[toMergeOne][k] += m_perClassPerBag[i][k];
        }
        new_perBag[toMergeOne] += m_perBag[i];
      } else {
        // Need to shift row
        new_perClassPerBag[i - 1] = m_perClassPerBag[i];
        new_perBag[i - 1] = m_perBag[i];
      }
    }

    m_perClassPerBag = new_perClassPerBag;
    m_perBag = new_perBag;
  }

  /**
   * Updates membership indicators after merging two categories; always the second one over the first one
   * @param toMergeOne Index of the first category to be merged
   * @param toMergeTwo Index of the second category to be merged
   */
  private void mergeIndicators(int toMergeOne, int toMergeTwo) {

    for (int i = 0; i < m_indicators.length; i++) {
      if (m_indicators[i] == -1)
        continue;
      // All row indices < toMergeTwo remain unmodified
      if (m_indicators[i] >= toMergeTwo) {
        if (m_indicators[i] == toMergeTwo) {
          // Need to change index for *all* indicator fields corresponding to merged row
          m_indicators[i] = toMergeOne;
        } else {
          // We have one row less because toMergeTwo is gone
          m_indicators[i]--;
        }
      }
    }
  }

  /**
   * Finds the most significant binary split of the just merged bag (Step 3),
   * if compound category consisting of three or more
   * // In some implementations this step is optional:
   * // http://www-01.ibm.com/support/knowledgecenter/SSLVMB_21.0.0/com.ibm.spss.statistics.help/alg_tree-chaid_algorithm_merging.htm
   * @param mergedBagIndex index of the merged bag
   */
  private void splitLargeGroups(int mergedBagIndex) {

    // Number of categories merged in the bag
    int categoriesCount = 0; 
    for (int i = 0; i < m_indicators.length; i++)
      if (m_indicators[i] == mergedBagIndex)
        categoriesCount++;
    // Only if the bag is composed of 3 categories or more
    if (categoriesCount < 3)
      return;

    // Vector with the original indexes of the categories merged
    categoriesCount = 0; 
    int[] categoriesMerged = new int[m_indicators.length]; // 
    for (int i = 0; i < m_indicators.length; i++)
      if (m_indicators[i] == mergedBagIndex)
        categoriesMerged[categoriesCount++] = i;

    // Compute the weight of all instances of the merged bag per class
    double[] perClassMergedBag = new double[m_perClass.length];
    for (int i_catMerged = 0; i_catMerged < categoriesCount; i_catMerged++)
      for (int i_class = 0; i_class < m_perClass.length; i_class++)
        perClassMergedBag[i_class] += m_distri_orig.perClassPerBag(categoriesMerged[i_catMerged],i_class);
    
    // 2xd contingency sub-table
    double[][] reducedCounts = new double[2][];
    for (int i_subbag = 0; i_subbag < 2; i_subbag++)
      reducedCounts[i_subbag] = new double[m_perClass.length];
    
    double maxVal = Double.MIN_VALUE;
    // Find the most significant binary split
    if (m_ordered) { // Ordinal attribute
      int i_bestSplit = -1;
      // reducedCounts[0] initialized to 0 by default
      // reducedCounts[1] initialized to the weight of all instances of the merged bag per class
      System.arraycopy(perClassMergedBag, 0, reducedCounts[1], 0, m_perClass.length);
      // Since it is necessary to maintain the order between the categories,
      // there are so many possible splits as the number of categories minus 1
      // e.g.: abcde => (0) a|bcde, (1) ab|cde, (2) abc|de, (3) abcd|e
      for (int i_combi = 0; i_combi < categoriesCount - 1; i_combi++) {
        // In each iteration a category is added in sub-bag 1 and subtracted in sub-bag 2
        for (int i_class = 0; i_class < m_perClass.length; i_class++) {
          reducedCounts[0][i_class] += m_distri_orig.perClassPerBag(categoriesMerged[i_combi], i_class);
          reducedCounts[1][i_class] -= m_distri_orig.perClassPerBag(categoriesMerged[i_combi], i_class);
        }
        double val = chiSquareCrit.chiVal(reducedCounts);
        if (Utils.gr(val, maxVal)) {
          maxVal = val;
          i_bestSplit = i_combi;
        }
      }
      if (i_bestSplit > -1) {
        double chiSquaredProb = chiSquareCrit.splitCritValue(maxVal, reducedCounts[0].length - 1);
        if (Utils.sm(chiSquaredProb, m_sigLevelMergeSplit)) {
          // Convert i_bestSplit to the corresponding combination as a base 2 number
          // e.g.: abcde => (0) 00001, (1) 00011, (2) 00111, (3) 01111
          i_bestSplit = (int)Math.pow(2, i_bestSplit + 1) - 1;
          splitCategories(mergedBagIndex, categoriesMerged, categoriesCount, i_bestSplit);
          splitIndicators(mergedBagIndex, categoriesMerged, categoriesCount, i_bestSplit);
          System.out.println("Found a significant binary split for a ordinal attribute!");
        }
      }
    } else { // Nominal attribute
      int i_bestSplit = -1;
      // In order to check all possible combinations of the categories, each combination (subset) is
      // enumerated by the 1 digits of the set of base 2 numbers counting from 1 to (2^(n-1))-1 
      // where each digit position is an item from the set of n categories.
      // See "Number of k combinations for all k" on Wikipedia (https://en.wikipedia.org/wiki/Combination#Number_of_k-combinations_for_all_k)
      // This is used to determine the categories for the sub-bag 1 for each combination. The rest of
      // categories will fall into the sub-bag 2.
      // e.g.: abcde => (1)=[00001] a|bcde, (2)=[00010] b|acde, (3)=[00011] ab|cde, (4)=[00100] c|abde, ...
      // ... (the last one)=(15)=[01111] abcd|e
      // The last one is (2^(n-1))-1, instead of (2^n)-1, because, in other case, some combinations would be repeated:
      // e.g.: (16)=[10000] e|abcd was yet explored with the last one; i.e, abcd|e or e|abcd is the same split.  
      for (int i_combi = 1; i_combi < (int)Math.pow(2, categoriesCount - 1); i_combi++) {
        // reducedCounts[0] initialized to 0 in each iteration
        java.util.Arrays.fill(reducedCounts[0],0.0);
        // reducedCounts[1] initialized to the weight of all instances of the merged bag per class
        System.arraycopy(perClassMergedBag, 0, reducedCounts[1], 0, m_perClass.length);
        int res = i_combi;
        // sub-bag 1
        for (int i_cat = 0; i_cat < categoriesCount; i_cat++) {
          if (res % 2 == 1)
            for (int i_class = 0; i_class < m_perClass.length; i_class++)
              reducedCounts[0][i_class] += m_distri_orig.perClassPerBag(categoriesMerged[i_cat], i_class);
          res /= 2;
        }
        // sub-bag 2
        for (int i_class = 0; i_class < m_perClass.length; i_class++)
          reducedCounts[1][i_class] -= reducedCounts[0][i_class];
        double val = chiSquareCrit.chiVal(reducedCounts);
        if (Utils.gr(val, maxVal)) {
          maxVal = val;
          i_bestSplit = i_combi;
        }
      }
      if (i_bestSplit > -1) {
        double chiSquaredProb = chiSquareCrit.splitCritValue(maxVal, reducedCounts[0].length - 1);
        if (Utils.sm(chiSquaredProb, m_sigLevelMergeSplit)) {
          splitCategories(mergedBagIndex, categoriesMerged, categoriesCount, i_bestSplit);
          splitIndicators(mergedBagIndex, categoriesMerged, categoriesCount, i_bestSplit);
          System.out.println("Found a significant binary split for a nominal attribute!");
        }
      }
    }

  }

  /**
   * Splits the given bag by mergedBagIndex according to the best split computed
   * @param mergedBagIndex index of the bag to be split
   * @param categoriesMerged original indexes of the categories merged in the bag
   * @param categoriesCount number of categories merged
   * @param i_bestSplit indicates the best combination found to split the bag
   */
  private void splitCategories(int mergedBagIndex, int[] categoriesMerged,
      int categoriesCount, int i_bestSplit) {
    
    // totaL and m_perClass remain unchanged
    // Extend tables by splitting the given bag
    double[][] new_perClassPerBag = new double[m_perClassPerBag.length + 1][];
    double[] new_perBag = new double[m_perClassPerBag.length + 1];
    // Number of categories merged in the bag
    for (int i_bag = 0; i_bag < m_perClassPerBag.length; i_bag++)
        if (i_bag < mergedBagIndex) {
          // Can simply copy reference
          new_perClassPerBag[i_bag] = m_perClassPerBag[i_bag];
          new_perBag[i_bag] = m_perBag[i_bag];
        } else if (i_bag == mergedBagIndex) {
          new_perClassPerBag[i_bag] = new double[m_perClass.length];
          new_perClassPerBag[i_bag + 1] = new double[m_perClass.length];
          // Split bag
          int res = i_bestSplit;
          for (int i_cat = 0; i_cat < categoriesCount; i_cat++) {
            if (res % 2 == 1) { // sub-bag 1
              for (int i_class = 0; i_class < m_perClass.length; i_class++)
                new_perClassPerBag[i_bag][i_class] += m_distri_orig.perClassPerBag(categoriesMerged[i_cat], i_class);
              new_perBag[i_bag] += m_distri_orig.perBag(i_cat);
            }
            else { // sub-bag 2
              for (int i_class = 0; i_class < m_perClass.length; i_class++)
                new_perClassPerBag[i_bag + 1][i_class] += m_distri_orig.perClassPerBag(categoriesMerged[i_cat], i_class);
              new_perBag[i_bag + 1] += m_distri_orig.perBag(i_cat);
            }
            res /= 2;
          }
        } else {
          // Need to shift row
          new_perClassPerBag[i_bag + 1] = m_perClassPerBag[i_bag];
          new_perBag[i_bag + 1] = m_perBag[i_bag];
        }
    m_perClassPerBag = new_perClassPerBag;
    m_perBag = new_perBag;
  }

  /**
   * Updates membership indicators after splitting the given bag by mergedBagIndex
   * according to the best split computed 
   * @param mergedBagIndex index of the bag to be split
   * @param categoriesMerged original indexes of the categories merged in the bag
   * @param categoriesCount number of categories merged
   * @param i_bestSplit indicates the best combination found to split the bag
   */
  private void splitIndicators(int mergedBagIndex, int[] categoriesMerged,
      int categoriesCount, int i_bestSplit) {
    
    // All row indices > mergedBagIndex have to be shifted
    for (int i = 0; i < m_indicators.length; i++) {
      if (m_indicators[i] > mergedBagIndex)
        m_indicators[i]++;
    }
    int res = i_bestSplit;
    for (int i_cat = 0; i_cat < categoriesCount; i_cat++) {
      // sub-bag 1: All row indices for (res % 2 == 1) remain unmodified
      if (res % 2 == 0) // sub-bag 2
        m_indicators[categoriesMerged[i_cat]] = mergedBagIndex + 1;
      res /= 2;
    }
  }

  /**
   * Merges any category having fewer observations than the specification for the minimum
   * subgroup size with the most similar other category, as measured by the smallest pairwise
   * chi-square
   */
  private void mergeSmallGroups() {
    // 2xd contingency sub-table
    double[][] reducedCounts = new double[2][];
    int idx = 0;

    while ((numKnownBags() > 1) && (idx < numKnownBags())) {
      double minVal = Double.MAX_VALUE;
      int toMergeOne = -1;
      int toMergeTwo = -1;

      // Is the number of instances in the bag enough?
      if (Utils.grOrEq(m_perBag[idx], m_minNoObj)) {
        idx++;
        continue;
      }

      // Find the row that differs least to idx-th according to chi-squared statistic
      reducedCounts[0] = getMatrixRow(idx);
      int j_ini = (idx == 0) ? 1 : ((m_ordered) ? idx-1 : 0);
      int j_fin = (m_ordered) ? ((idx == getIndexLastKnownBag()) ? idx-1 : idx+1) : getIndexLastKnownBag();
      for (int j = j_ini; j <= j_fin; j++) {
        if (j == idx) continue;
        reducedCounts[1] = getMatrixRow(j);

        double val = chiSquareCrit.chiVal(reducedCounts);
        if (Utils.sm(val, minVal)) {
          minVal = val;
          if (idx < j) {
            toMergeOne = idx;
            toMergeTwo = j;
          } else {
            toMergeOne = j;
            toMergeTwo = idx;
          }
        }
      }

      // if no merge is possible
      if(toMergeOne == -1) {
        idx++;
        continue;
      }

      mergeCategories(toMergeOne, toMergeTwo);
      mergeIndicators(toMergeOne, toMergeTwo);
    }
  }

  /**
   * Merges missing values with the most similar other group, if sufficiently less significant
   */
  public void handleMissingValues() {
    double[][] reducedCounts = new double[2][];
    double minVal = Double.MAX_VALUE;
    int missingsIdx = m_indicators[m_missingOriginalIdx];
    int toMergeOne = -1;
    int toMergeTwo = missingsIdx;
    reducedCounts[0] = getMatrixRow(missingsIdx);
    for (int j = 0; j < missingsIdx; j++) {
      reducedCounts[1] = getMatrixRow(j);
      double val = chiSquareCrit.chiVal(reducedCounts);
      if (Utils.sm(val, minVal)) {
        minVal = val;
        toMergeOne = j;
      }
    }

    if (toMergeOne == -1)
      return;

    double chiSquaredProb = chiSquareCrit.splitCritValue(minVal, reducedCounts[0].length - 1);
    if (Utils.gr(chiSquaredProb, m_sigLevelMergeSplit)) {
      mergeCategories(toMergeOne, toMergeTwo);
      mergeIndicators(toMergeOne, toMergeTwo);
    }
  }

  /**
   * Merge all bags into only one bag
   */
  public void mergeAll() {
    // Create distribution with only one bag by merging all bags of given distribution.
    CHAIDDistribution merged = new CHAIDDistribution(this);
    m_perBag = merged.m_perBag;
    m_perClass = merged.m_perClass;
    m_perClassPerBag = merged.m_perClassPerBag;
    m_indicators = merged.m_indicators;
    //totaL = mergeg.total();
  }

  /**
   * Returns m_indicators vector 
   */
  public int[] getIndicators() {
    return m_indicators;
  }

  /**
   * Returns number of known bags.
   */
  private int numKnownBags() {
    if (m_hasMissingValues)
      return m_perBag.length - 1;
    else
      return m_perBag.length;
  }

  /** Returns the index of the last known bag */
  private int getIndexLastKnownBag() {
    return numKnownBags() - 1;
  }

  /** Returns the given row of m_perClassPerBag matrix */
  private double[] getMatrixRow(int row) {
    return m_perClassPerBag[row];
  }

  /**
   * Get the current index of the missing values
   */
  public int getMissingCurrentIndex() {
    return m_indicators[m_missingOriginalIdx];
  }

  /**
   * Get the ChiSquared probability of split.
   *
   * @return Probability value.
   */
  public double getChiSquaredProb() {
    return m_chiSquaredProb;
  }

  /**
   * Returns whether the split has missing values or not
   * @return true if has missing values
   */
  public boolean getHasMissingValues() {
    return m_hasMissingValues;
  }

  /**
   * Sets m_hasMissingValues true
   */
  public void setHasMissingValues() {
    m_hasMissingValues = true;
  }
  
  /**
   * Returns the revision string.
   *
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.1 $");
  }
}
