package weka.classifiers.trees.j48Consolidated;

import weka.classifiers.trees.j48.C45Split;
import weka.core.Instances;

/**
 * Class implementing a C4.5-type split on a consolidated attribute based on a set of samples.
 * *************************************************************************************
 * <p/>*** Attention! The visibility of the following members of the class 'C45Split' 
 *     changed to 'protected' instead of 'private' in order to use them here:
 * <ul>
 *    <li>protected int m_complexityIndex;</li>
 *    <li>protected int m_attIndex;</li>
 *    <li>protected int m_minNoObj;</li>
 *    <li>protected double m_splitPoint;</li>
 *    <li>protected double m_infoGain;</li>
 *    <li>protected double m_gainRatio;</li>
 *    <li>protected double m_sumOfWeights;</li>
 *    <li>protected int m_index;</li>
 *    <li>protected static InfoGainSplitCrit infoGainCrit = new InfoGainSplitCrit();</li>
 *    <li>protected static GainRatioSplitCrit gainRatioCrit = new GainRatioSplitCrit();</li>
 * </ul>
 * 
 * @author Jesús M. Pérez (txus.perez@ehu.es) 
 * @version $Revision: 1.0 $
 */
public class C45ConsolidatedSplit extends C45Split {

	/** for serialization */
	private static final long serialVersionUID = 1174832141695586851L;

	/**
	 * Creates a split model to be used to consolidate the decision around the set of samples,
	 *  but with a null distribution
	 *   
	 * @param attIndex attribute to split on
	 * @param minNoObj minimum number of objects
	 * @param sumOfWeights sum of the weights
	 * @param data the training sample. Only to get information about the attributes 
	 * @param splitPointConsolidated the split point to use to split, if numerical.
	 */
	public C45ConsolidatedSplit(int attIndex, int minNoObj, double sumOfWeights, 
			Instances data, double splitPointConsolidated) {
		super(attIndex, minNoObj, sumOfWeights);

		// Initialize the remaining instance variables.
		m_splitPoint = splitPointConsolidated;
		m_infoGain = 0;
		m_gainRatio = 0;
		m_distribution = null;

		// Different treatment for enumerated and numeric attributes.
	    if (data.attribute(m_attIndex).isNominal()) {
	      m_complexityIndex = data.attribute(m_attIndex).numValues();
	      m_index = m_complexityIndex;
	      m_numSubsets = m_complexityIndex;
	    }else{
	      m_complexityIndex = 2;
	      m_index = 2;
	      m_numSubsets = 2;
	    }
	}

	/**
	 * Creates a split model based on the consolidated decision
	 *
	 * @param attIndex attribute to split on
	 * @param minNoObj minimum number of objects
	 * @param sumOfWeights sum of the weights
	 * @param data the training sample. Only to get information about the attributes 
	 * @param samplesVector the vector of samples used for consolidation
	 * @param splitPointConsolidated the split point to use to split, if numerical.
	 * @exception Exception if something goes wrong
	 */
	public C45ConsolidatedSplit(int attIndex, int minNoObj, double sumOfWeights, 
			Instances data, Instances[] samplesVector, double splitPointConsolidated) throws Exception {
		this(attIndex, minNoObj, sumOfWeights, data, splitPointConsolidated);
		// Create a null model with the consolidated decision to calculate the consolidated distribution
		C45ConsolidatedSplit nullModelToConsolidate = 
				new C45ConsolidatedSplit(m_attIndex, m_minNoObj, m_sumOfWeights, data, splitPointConsolidated); 
		m_distribution = new DistributionConsolidated(samplesVector, nullModelToConsolidate);
		m_infoGain = infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights);
		m_gainRatio = gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain);
	}
}

