/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    J48Consolidated.java
 *    Copyright (C) 2013 ALDAPA Team (http://www.sc.ehu.es/aldapa)
 *    Computing Engineering Faculty, Donostia, 20018
 *    University of the Basque Country (UPV/EHU), Basque Country
 *    
 */

package weka.classifiers.trees;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.Sourcable;
import weka.classifiers.trees.j48.C45ModelSelection;
import weka.classifiers.trees.j48.C45PruneableClassifierTree;
import weka.classifiers.trees.j48.ModelSelection;
import weka.classifiers.trees.j48Consolidated.C45ConsolidatedModelSelection;
import weka.classifiers.trees.j48Consolidated.C45ConsolidatedPruneableClassifierTree;
import weka.classifiers.trees.j48Consolidated.InstancesConsolidated;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Drawable;
import weka.core.Instances;
import weka.core.Matchable;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Summarizable;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

/**
<!-- globalinfo-start -->
 * Class for generating a pruned or unpruned C4.5 consolidated tree. Uses the Consolidated Tree Construction (CTC) algorithm: a single tree is built based on a set of subsamples. New options are added to the class J48 to set the Resampling Method (RM) for the generation of samples to use in the consolidation process. For more information, see:<br/>
 * <br/>
 * Jes&uacute;s M. P&eacute;rez and Javier Muguerza and Olatz Arbelaitz and Ibai Gurrutxaga and Jos&eacute; I. Mart&iacute;­n.  
 * "Combining multiple class distribution modified subsamples in a single tree". Pattern Recognition Letters (2007), 28(4), pp 414-422.
 * <a href="http://dx.doi.org/10.1016/j.patrec.2006.08.013" target="_blank">doi:10.1016/j.patrec.2006.08.013</a>
 * <p/>
<!-- globalinfo-end -->
 *
<!-- technical-bibtex-start -->
 * BibTeX:
 * <pre>
 * &#64;article{Perez2007,
 *    title = "Combining multiple class distribution modified subsamples in a single tree",
 *    journal = "Pattern Recognition Letters",
 *    volume = "28",
 *    number = "4",
 *    pages = "414 - 422",
 *    year = "2007",
 *    doi = "10.1016/j.patrec.2006.08.013",
 *    author = "Jes\'us M. P\'erez and Javier Muguerza and Olatz Arbelaitz and Ibai Gurrutxaga and Jos\'e I. Mart\'i­n"
 * }
 * </pre>
 * <p/>
<!-- technical-bibtex-end -->
 * *************************************************************************************<br/>
 * Attention! The visibility of the following members of the class 'J48' changed 
 *     to 'protected' instead of 'private' in order to use them here:
 * <ul>
 * 		<li>protected ClassifierTree m_root;</li>
 * 		<li>protected boolean m_unpruned = false;</li>
 * 		<li>protected float m_CF = 0.25f;</li>
 * 		<li>protected int m_minNumObj = 2;</li>
 * 		<li>protected boolean m_useLaplace = false;</li>
 * 		<li>protected boolean m_reducedErrorPruning = false;</li>
 * 		<li>protected int m_numFolds = 3;</li>
 * 		<li>protected boolean m_binarySplits = false;</li>
 * 		<li>protected boolean m_subtreeRaising = true;</li>
 * 		<li>protected boolean m_noCleanup = false;</li>
 * 		<li>protected int m_Seed = 1;</li>
 * </ul>
 * Attention! Exceptions added to the following methods of the class 'J48' 
 *     in order to use them here:
 * <ul>
 * 		<li>public void setReducedErrorPruning(boolean v) throws Exception {}</li>
 * 		<li>public void setNumFolds(int v) throws Exception {}</li>
 * 		<li>public void setBinarySplits(boolean v) throws Exception {}</li>
 * </ul>
<!-- options-start -->
 * Valid options are: <p/>
 * 
 * J48 options <br/>
 * ==========
 *
 * <pre> -U
 *  Use unpruned tree.</pre>
 * 
 * <pre> -C &lt;pruning confidence&gt;
 *  Set confidence threshold for pruning.
 *  (default 0.25)</pre>
 * 
 * <pre> -M &lt;minimum number of instances&gt;
 *  Set minimum number of instances per leaf.
 *  (default 2)</pre>
 *  
 * <pre> -S
 *  Don't perform subtree raising.</pre>
 * 
 * <pre> -L
 *  Do not clean up after the tree has been built.</pre>
 * 
 * <pre> -A
 *  Laplace smoothing for predicted probabilities.</pre>
 * 
 * <pre> -Q &lt;seed&gt;
 *  Seed for random data shuffling (default 1).</pre>
 * 
 * Options to set the Resampling Method (RM) for the generation of samples
 *  to use in the consolidation process <br/>
 * =============================================================================================== 
 * <pre> -N &lt;number of samples&gt;
 *  Number of samples to be generated for use in the construction of the consolidated tree.
 *  (default 5)</pre>
 * 
 * <pre> -R
 *  Determines whether or not replacement is used when generating the samples.
 *  (default false)</pre>
 * 
 * <pre> -B &lt;Size of each sample(&#37;)&gt;
 *  Size of each sample(bag), as a percentage of the training set size.
 *  Combined with the option &lt;distribution minority class&gt; accepts:
 *  * -1 (sizeOfMinClass): The size of the minority class  
 *  * -2 (Max): Maximum size taking into account &lt;distribution minority class&gt;
 *  *           and using no replacement
 *  (default -1)</pre>
 *  
 * <pre> -D &lt;distribution minority class&gt;
 *  Determines the new value of the distribution of the minority class, if we want to change it.
 *  It can be one of the following values:
 *  * A value between 0 and 100 to change the portion of minority class instances in the new samples
 *    (this option can only be used with binary problems (two classes datasets))
 *  * -1 (free): Works with the instances without taking into account their class  
 *  * -2 (stratified): Maintains the original class distribution in the new samples
 *  (default 50.0) 
 * 
<!-- options-end -->
 *
 * @author Jesús M. Pérez (txus.perez@ehu.es) 
 * @author Fernando Lozano (flozano002@ikasle.ehu.es)
 *  (based on J48.java written by Eibe Frank)
 * @version $Revision: 2.0 $
 */
public class J48Consolidated
	extends J48
	implements OptionHandler, Drawable, Matchable, Sourcable, 
				WeightedInstancesHandler, Summarizable, AdditionalMeasureProducer, 
				TechnicalInformationHandler {

	/** for serialization */
	private static final long serialVersionUID = -2647522302468491144L;

	/** Options to set the Resampling Method (RM) for the generation of samples
	 *   to use in the consolidation process
	 *   (Prefix RM added to the option names in order to appear together in the graphical interface)
	 ********************************************************************************/
	/** Number of samples to be generated for use in the construction of the consolidated tree.**/
	private int m_RMnumberSamples = 5; 

	/** Determines whether or not replacement is used when generating the samples.**/
	private boolean m_RMreplacement = false;

	/** Size of each sample(bag), as a percentage of the training set size.
	 *  Combined with the option &lt;distribution minority class&gt; accepts:
	 *  * -1 (sizeOfMinClass): The size of the minority class  
	 *  * -2 (maxSize): Maximum size taking into account &lt;distribution minority class&gt;
	 *  *           and using no replacement */
	private int m_RMbagSizePercent = -1; // default: sizeOfMinClass

	/** Value of the distribution of the minority class to be changed.
	 *  It also accepts:
	 *  * -1 (free): Works with the instances without taking into account their class  
	 *  * -2 (stratified): Maintains the original class distribution in the new samples */
	private float m_RMnewDistrMinClass = (float)50.0;

	/**
	 * Returns a string describing the classifier
	 * @return a description suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String globalInfo() {
		return "Class for generating a pruned or unpruned C45 consolidated tree. Uses the Consolidated "
			  + "Tree Construction (CTC) algorithm: a single tree is built based on a set of subsamples. "
			  + "New options are added to the class J48 to set the Resampling Method (RM) for "
			  + "the generation of samples to use in the consolidation process. "
			  + "For more information, see:\n\n"
				+ getTechnicalInformation().toString();
	}

	/**
	 * Returns an instance of a TechnicalInformation object, containing 
	 * detailed information about the technical background of this class,
	 * e.g., paper reference or book this class is based on.
	 * 
	 * @return the technical information about this class
	 */
	public TechnicalInformation getTechnicalInformation() {
		TechnicalInformation 	result;

		result = new TechnicalInformation(Type.ARTICLE);
		result.setValue(Field.AUTHOR, "Jesús M. Pérez and Javier Muguerza and Olatz Arbelaitz and Ibai Gurrutxaga and José I. Martí­n");
		result.setValue(Field.YEAR, "2007");
		result.setValue(Field.TITLE, "Combining multiple class distribution modified subsamples in a single tree");
	    result.setValue(Field.JOURNAL, "Pattern Recognition Letters");
	    result.setValue(Field.VOLUME, "28");
	    result.setValue(Field.NUMBER, "4");
	    result.setValue(Field.PAGES, "414-422");
	    result.setValue(Field.URL, "http://dx.doi.org/10.1016/j.patrec.2006.08.013");

		return result;
	}

	/**
	 * Returns default capabilities of the classifier.
	 *
	 * @return      the capabilities of this classifier
	 */
	public Capabilities getCapabilities() {
		Capabilities      result;

		try {
			result = new C45PruneableClassifierTree(null, !m_unpruned, m_CF, m_subtreeRaising, !m_noCleanup).getCapabilities();
		}
		catch (Exception e) {
			result = new Capabilities(this);
		}

		result.setOwner(this);

		return result;
	}
	
	/**
	 * Generates the classifier.
	 * (Implements the original CTC algorithm, then
	 *  does not implement the options binarySplits and reducedErrorPruning of J48,
	 *  so only what is based on C4.5 algorithm)
	 *
	 * @param instances the data to train the classifier with
	 * @throws Exception if classifier can't be built successfully
	 */
	public void buildClassifier(Instances instances) 
			throws Exception {

		ModelSelection modSelection;
		// TODO Implement the option binarySplits of J48
		modSelection = new C45ConsolidatedModelSelection(m_minNumObj, instances);
		// TODO Implement the option reducedErrorPruning of J48
		m_root = new C45ConsolidatedPruneableClassifierTree(modSelection, !m_unpruned,
				m_CF, m_subtreeRaising, !m_noCleanup);

		// remove instances with missing class before generate samples
		instances = new Instances(instances);
		instances.deleteWithMissingClass();

		//Generate as many samples as the number of samples with the given instances
		Instances[] samplesVector = generateSamples(instances);   
	    if (m_Debug)
	    	printSamplesVector(samplesVector);

		((C45ConsolidatedPruneableClassifierTree)m_root).buildClassifier(instances, samplesVector);

		((C45ModelSelection) modSelection).cleanup();
	}
	
	/**
	 * Generate as many samples as the number of samples based on Resampling Method parameters
	 * 
	 * @param instances the training data which will be used to generate the samples set
	 * @return Instances[] the vector of generated samples
	 * @throws Exception if something goes wrong
	 */
	private Instances[] generateSamples(Instances instances) throws Exception {
		Instances[] samplesVector = null;
		// can classifier tree handle the data?
		getCapabilities().testWithFail(instances);

		// remove instances with missing class
		InstancesConsolidated instancesWMC = new InstancesConsolidated(instances);
		instancesWMC.deleteWithMissingClass();
		if (m_Debug) {
			System.out.println("=== Generation of the set of samples ===");
			System.out.println(toStringResamplingMethod());
		}
		/** Original sample size */
		int dataSize = instancesWMC.numInstances();
		if(dataSize==0)
			System.err.println("Original data size is 0! Handle zero training instances!");
		else
			if (m_Debug)
				System.out.println("Original data size: " + dataSize);
		/** Size of samples(bags) to be generated */
		int bagSize = 0;

		// Some checks done in set-methods
		//@ requires -2 <= m_RMbagSizePercent && m_RMbagSizePercent <= 100 
		//@ requires -2 <= m_RMnewDistrMinClass && m_RMnewDistrMinClass < 100
		if(m_RMbagSizePercent >= 0 ){
			bagSize =  dataSize * m_RMbagSizePercent / 100;
			if(bagSize==0)
				System.err.println("Size of samples is 0 (" + m_RMbagSizePercent + "% of " + dataSize
						+ ")! Handle zero training instances!");
		} else if (m_RMnewDistrMinClass < 0) { // stratified OR free
			throw new Exception("Size of samples (" + m_RMbagSizePercent + "% of " + dataSize
					+ ") has to be greater than 0!");
		}

		Random random; 
		if (dataSize == 0) // To be OK the test to Handle zero training instances!
			random = new Random(m_Seed);
		else
			random = instancesWMC.getRandomNumberGenerator(m_Seed);

		// Generate the vector of samples with the given parameters
		// TODO Set the different options to generate the samples like a filter and then use it here.  
		if(m_RMnewDistrMinClass == (float)-2)
			// stratified: Maintains the original class distribution in the new samples
			samplesVector = generateStratifiedSamples(instancesWMC, dataSize, bagSize, random);
		else if (m_RMnewDistrMinClass == (float)-1)
			// free: Doesn't take into account the original class distribution
			samplesVector = generateFreeDistrSamples(instancesWMC, dataSize, bagSize, random);
		else
			// RMnewDistrMinClass is between 0 and 100: Changes the class distribution to the indicated value
			samplesVector = generateSamplesChangingMinClassDistr(instancesWMC, dataSize, bagSize, random);
		return samplesVector;
	}
	
	/**
	 * Generate a set of stratified samples
	 * 
	 * @param instances the training data which will be used to generate the samples set
	 * @param dataSize Size of original sample (instances)
	 * @param bagSize Size of samples(bags) to be generated
	 * @param random a random number generator
	 * @return Instances[] the vector of generated samples
	 * @throws Exception if something goes wrong
	 */
	private Instances[] generateStratifiedSamples(
			InstancesConsolidated instances, int dataSize, int bagSize, Random random) throws Exception{
		Instances[] samplesVector = new Instances[m_RMnumberSamples];
		int numClasses = instances.numClasses();
		/** Partial bag size */
		int localBagSize = 0;
		// Get the classes
		InstancesConsolidated[] classesVector =  instances.getClasses();
		// What is the minority class?
		/** Index of the minority class in the original sample */
		int iMinClass;
		/** Vector containing the size of each class */
		int classSizeVector[] = new int [numClasses];
		for (int iClass = 0; iClass < numClasses; iClass++)
			classSizeVector[iClass] = classesVector[iClass].numInstances();
		iMinClass = Utils.minIndex(classSizeVector);
		if (m_Debug){
			System.out.println("Minority class value (" + iMinClass +
					"): " + instances.classAttribute().value(iMinClass));
			System.out.println("Classes sizes:");
			for (int iClass = 0; iClass < numClasses; iClass++){
				/** Distribution of the 'iClass'-th class in the original sample */
				float distrClass;
				if (dataSize == 0)
					distrClass = (float)0;
				else
					distrClass = (float)100 * classSizeVector[iClass] / dataSize;
				System.out.print(classSizeVector[iClass] + " (" + distrClass + "%)");
				if(iClass < numClasses - 1)
					System.out.print(", ");
			}
			System.out.println("");
		}
		// Determine the sizes of each class in the new samples
		/** Vector containing the size of each class in the new samples */
		int newClassSizeVector[] = new int [numClasses];
		for(int iClass = 0; iClass < numClasses; iClass++)
			if(iClass != iMinClass){
				/** Value for the 'iClass'-th class size of the samples to have to be generated */
				int newClassSize = classSizeVector[iClass] * m_RMbagSizePercent / 100;
				newClassSizeVector[iClass] = newClassSize;
				localBagSize += newClassSize;
			}
		/** Value for the minority class size of the samples to have to be generated */
		// (Done in this way to know the exact size to be generated of the minority class)
		newClassSizeVector[iMinClass] = bagSize - localBagSize;
		if (m_Debug) {
			System.out.println("New bag size: " + bagSize);
			System.out.println("Classes sizes of the new bag:");
			for (int iClass = 0; iClass < numClasses; iClass++){
				System.out.print(newClassSizeVector[iClass]);
				if(iClass < numClasses - 1)
					System.out.print(", ");
			}
			System.out.println("");
		}
		
		// Generate the vector of samples 
		for(int iSample = 0; iSample < m_RMnumberSamples; iSample++){
			InstancesConsolidated bagData = null;
			InstancesConsolidated bagClass = null;
			for(int iClass = 0; iClass < numClasses; iClass++){
				// Extract instances of the iClass-th class
				if(m_RMreplacement)
					bagClass = new InstancesConsolidated(classesVector[iClass].resampleWithWeights(random));
				else
					bagClass = new InstancesConsolidated(classesVector[iClass]);
				// Shuffle the instances
				bagClass.randomize(random);
				if (newClassSizeVector[iClass] < classSizeVector[iClass]) {
					InstancesConsolidated newBagData = new InstancesConsolidated(bagClass, 0, newClassSizeVector[iClass]);
					bagClass = newBagData;
					newBagData = null;
				}
				if(bagData == null)
					bagData = bagClass;
				else
					bagData.add(bagClass);
				bagClass = null;
			}
			// Shuffle the instances
			bagData.randomize(random);
			samplesVector[iSample] = (Instances)bagData;
			bagData = null;
			bagClass = null;
		}
		return samplesVector;
	}
	
	/**
	 * Generate a set of samples without taking into account the class distribution
	 * (like in the meta-classifier Bagging)
	 * 
	 * @param instances the training data which will be used to generate the samples set
	 * @param dataSize Size of original sample (instances)
	 * @param bagSize Size of samples(bags) to be generated
	 * @param random a random number generator
	 * @return Instances[] the vector of generated samples
	 * @throws Exception if something goes wrong
	 */
	private Instances[] generateFreeDistrSamples(
			InstancesConsolidated instances, int dataSize, int bagSize, Random random) throws Exception{
		Instances[] samplesVector = new Instances[m_RMnumberSamples];
		if (m_Debug)
			System.out.println("New bag size: " + bagSize);
		for(int iSample = 0; iSample < m_RMnumberSamples; iSample++){
			Instances bagData = null;
			if(m_RMreplacement)
				bagData = new Instances(instances.resampleWithWeights(random));
			else
				bagData = new Instances(instances);
			// Shuffle the instances
			bagData.randomize(random);
			if (bagSize < dataSize) {
				Instances newBagData = new Instances(bagData, 0, bagSize);
				bagData = newBagData;
				newBagData = null;
			}
			samplesVector[iSample] = bagData;
		}
		return samplesVector;
	}
	
	/**
	 * Generate a set of samples changing the distribution of the minority class
	 * 
	 * @param instances the training data which will be used to generate the samples set
	 * @param dataSize Size of original sample (instances)
	 * @param bagSize Size of samples(bags) to be generated
	 * @param random a random number generator
	 * @return Instances[] the vector of generated samples
	 * @throws Exception if something goes wrong
	 */
	private Instances[] generateSamplesChangingMinClassDistr(
			InstancesConsolidated instances, int dataSize, int bagSize, Random random) throws Exception{
		Instances[] samplesVector = new Instances[m_RMnumberSamples];
		// Some checks
		if(instances.classAttribute().numValues() != 2)
			throw new Exception("Only binary problems (two classes datasets) can be used to change the distribution of classes!!!\n" +
							"Use 'free' or 'stratified' values in <distribution minority class> for multi-class datasets!!!");
			//throw new Exception("Multi-class datasets aren't contempled to change the distribution of classes!");
		// TODO Generalize the process to multi-class datasets
		// Some checks done in set-methods
		//@ requires m_RMreplacement = false 
		// TODO Accept replacement

		// Get the two classes
		InstancesConsolidated[] classesVector = instances.getClasses();
		// What is the minority class?
		int minClassSize, majClassSize; /** Sizes of the minority and majority classes */
		int iMinClass, iMajClass; /** Index of the minority and majority classes in the original sample */
		if(classesVector[0].numInstances() <= classesVector[1].numInstances())
			// The minority class is the first
			iMinClass = 0;
		else
			// The minority class is the second
			iMinClass = 1;
		iMajClass = 1 - iMinClass;
		minClassSize = classesVector[iMinClass].numInstances();
		majClassSize = classesVector[iMajClass].numInstances();
		
		/** Distribution of the minority class in the original sample */
		float distrMinClass;
		if (dataSize == 0)
			distrMinClass = (float)0;
		else
			distrMinClass = (float)100 * minClassSize / dataSize;
		if (m_Debug) {
			System.out.println("Minority class value (" + iMinClass +
					"): " + instances.classAttribute().value(iMinClass));
			System.out.println("Minority class size: " + minClassSize + " (" + distrMinClass + "%)");
			System.out.println("Majority class size: " + majClassSize);
		}
		/** Maximum values for minority and majority classes' size taking into account RMnewDistrMinClass
		 *   and using not replacement */
		int maxMinClassSize, maxMajClassSize;
		if(m_RMnewDistrMinClass > distrMinClass){
			// Maintains the whole minority class
			maxMinClassSize = minClassSize;
			maxMajClassSize = (int) (minClassSize * (100 - m_RMnewDistrMinClass) / m_RMnewDistrMinClass);
		} else {
			// Maintains the whole mayority class
			maxMajClassSize = majClassSize;
			maxMinClassSize = (int) (majClassSize * m_RMnewDistrMinClass / (100 - m_RMnewDistrMinClass));
		}
		/** Values for minority and majority classes' sizes of the samples to have to be generated */
		int newMinClassSize, newMajClassSize;
		if(m_RMbagSizePercent == -2){
			// maxSize : Generate the biggest samples according the indicated distribution (RMnewDistrMinClass)
			if(m_RMnewDistrMinClass == distrMinClass)
				System.err.println("Doesn't make sense that the original distribution and " +
						"the distribution to be changed (RMnewDistrMinClass) are the same and " +
						"the size of samples to be generated is maximum (RMbagSizePercent=-2)!!!");
			newMinClassSize = maxMinClassSize;
			newMajClassSize = maxMajClassSize;
			bagSize = maxMinClassSize + maxMajClassSize;
		} else {
			if (m_RMbagSizePercent == -1) {
				// sizeOfMinClass: the samples to be generated will have the same size that the minority class
				bagSize = minClassSize;
				newMinClassSize = (int) (m_RMnewDistrMinClass * bagSize / 100);
			} else {
				// m_RMbagSizePercent is between 0 and 100. bagSize is already set.
				newMinClassSize = (int) (m_RMnewDistrMinClass * bagSize / 100);
			}
			newMajClassSize = bagSize - newMinClassSize;
		}
		if (m_Debug) {
			System.out.println("New bag size: " + bagSize);
			System.out.println("New minority class size: " + newMinClassSize + " (" + (int)(newMinClassSize / (double)bagSize * 100) + "%)");
			System.out.println("New majority class size: " + newMajClassSize);
		}
		// Some checks
		if(newMinClassSize > minClassSize)
			throw new Exception("There isn't enough instances of the minority class (" +
						minClassSize + ") to extract " + newMinClassSize + " for the new samples " +
						"whithout replacement!!!");
		if(newMajClassSize > majClassSize)
			throw new Exception("There isn't enough instances of the majority class (" +
						majClassSize + ") to extract " + newMajClassSize + " for the new samples " +
						"whithout replacement!!!");
		// Generate the vector of samples 
		for(int iSample = 0; iSample < m_RMnumberSamples; iSample++){
			InstancesConsolidated bagData = null;
			InstancesConsolidated bagClass = null;
			// Extract instances of the minority class
			bagClass = new InstancesConsolidated(classesVector[iMinClass]);
			// Shuffle the instances
			bagClass.randomize(random);
			if (newMinClassSize < minClassSize) {
				InstancesConsolidated newBagData = new InstancesConsolidated(bagClass, 0, newMinClassSize);
    			bagClass = newBagData;
    			newBagData = null;
			}
			// Save the minority class to bagData
			bagData = bagClass;
			bagClass = null;
			// Extract instances of the majority class
			bagClass = new InstancesConsolidated(classesVector[iMajClass]);
			// Shuffle the instances
			bagClass.randomize(random);
			if (newMajClassSize < majClassSize) {
				InstancesConsolidated newBagData = new InstancesConsolidated(bagClass, 0, newMajClassSize);
    			bagClass = newBagData;
    			newBagData = null;
			}
			// Add the second bagClass (majority class) to bagData
			bagData.add(bagClass);
			// Shuffle the instances
			bagData.randomize(random);
			samplesVector[iSample] = (Instances)bagData;
			bagData = null;
			bagClass = null;
		}
		return samplesVector;
	}

	/**
	 * Print the generated samples. Only for test purposes. 
	 *
	 * @param samplesVector the vector of samples
	 */
	private void printSamplesVector(Instances[] samplesVector){

		for(int iSample=0; iSample<samplesVector.length; iSample++){
			System.out.println("==== SAMPLE " + iSample + " ====");
			System.out.println(samplesVector[iSample]);
			System.out.println(" ");
		}
	}
	
	/**
	 * Returns an enumeration describing the available options.
	 *
	 * Valid options are: <p>
	 * 
	 * J48 options
	 * ============= 
	 *
	 * -U <br>
	 * Use unpruned tree.<p>
	 *
	 * -C confidence <br>
	 * Set confidence threshold for pruning. (Default: 0.25) <p>
	 *
	 * -M number <br>
	 * Set minimum number of instances per leaf. (Default: 2) <p>
	 *
	 * -S <br>
	 * Don't perform subtree raising. <p>
	 *
	 * -L <br>
	 * Do not clean up after the tree has been built. <p>
	 *
	 * -A <br>
	 * If set, Laplace smoothing is used for predicted probabilites. <p>
	 *
	 * -Q seed <br>
	 * Seed for random data shuffling (Default: 1) <p>
	 * 
	 * Options to set the Resampling Method (RM) for the generation of samples
	 *  to use in the consolidation process
	 * ============================================================================ 
	 * -N number <br>
	 * Number of samples to be generated for use in the construction of the consolidated tree. <br>
	 * (Default: 5) <p>
	 * 
	 * -R <br>
	 * Determines whether or not replacement is used when generating the samples. <br>
	 * (Default: true)<p>
	 * 
	 * -B percent <br>
	 * Size of each sample(bag), as a percentage of the training set size. <br>
	 * Combined with the option &lt;distribution minority class&gt; accepts: <br>
	 *  * -1 (sizeOfMinClass): The size of the minority class <br>
	 *  * -2 (maxSize): Maximum size taking into account &lt;distribution minority class&gt;
	 *              and using no replacement <br>
	 * (Default: -1(sizeOfMinClass)) <p>
	 * 
	 * -D distribution minority class <br>
	 * Determines the new value of the distribution of the minority class, if we want to change it. <br>
	 * It can be one of the following values: <br>
	 *  * A value between 0 and 100 to change the portion of minority class instances in the new samples <br>
	 *    (this option can only be used with binary problems (two classes datasets)) <br>
	 *  * -1 (free): Works with the instances without taking into account their class <br>
	 *  * -2 (stratified): Maintains the original class distribution in the new samples <br>
	 * (Default: -1(free)) <p>
	 * 
	 * @return an enumeration of all the available options.
	 */
	public Enumeration listOptions() {

		Vector<Option> newVector = new Vector<Option>();

		// J48 options
		// ===========
	    Enumeration en;
	    en = super.listOptions();
	    while (en.hasMoreElements())
	    	newVector.addElement((Option) en.nextElement());

		// Options to set the Resampling Method (RM) for the generation of samples
		//  to use in the consolidation process
		// =========================================================================
		newVector.
		addElement(new Option("\tNumber of samples to be generated for use in the construction of the\n" +
				"\tconsolidated tree.\n" +
				"\t(default 5)",
				"RM-N", 1, "-RM-N <Number of samples>"));
		newVector.
		addElement(new Option("\tUse replacement to generate the set of samples\n" +
				"\t(default false)",
				"RM-R", 0, "-RM-R"));
		newVector.
		addElement(new Option("\tSize of each sample(bag), as a percentage of the training set size.\n" +
				"\tCombined with the option <distribution minority class> accepts:\n" +
				"\t * -1 (sizeOfMinClass): The size of the minority class\n" +
				"\t * -2 (maxSize): Maximum size taking into account <distribution minority\n" +
				"\t              class> and using no replacement\n" +
				"\t(default -1)",
				"RM-B", 1, "-RM-B <Size of each sample(%)>"));
		newVector.
		addElement(new Option(
				"\tDetermines the new value of the distribution of the minority class.\n" +
				"\tIt can be one of the following values:\n" +
				"\t * A value between 0 and 100 to change the portion of minority class\n" +
				"\t              instances in the new samples (Only for two-classes datasets)\n" +
				"\t * -1 (free): Works with the instances without taking into account\n" +
				"\t              their class\n" +
				"\t * -2 (stratified): Maintains the original class distribution in the\n" +
				"\t              new samples\n" +
				"\t(default 50.0)",
				"RM-D", 1, "-RM-D <distribution minority class>"));

		return newVector.elements();
	}
	
	/**
	 * Parses a given list of options.
	 * 
   <!-- options-start -->
	 * Valid options are: <p/>
	 * 
	 * Options to set the Resampling Method (RM) for the generation of samples
	 *  to use in the consolidation process
	 * ============================================================================ 
	 * <pre> -N &lt;Number of samples&gt;
	 *  Number of samples to be generated for use in the construction of the consolidated tree.
	 *  (default 5)</pre>
	 * 
	 * <pre> -R
	 *  Determines whether or not replacement is used when generating the samples.
	 *  (default true)</pre>
	 * 
	 * <pre> -B &lt;Size of each sample(&#37;)&gt;
	 *  Size of each sample(bag), as a percentage of the training set size.
	 *  Combined with the option &lt;distribution minority class&gt; accepts:
	 *  * -1 (sizeOfMinClass): The size of the minority class  
	 *  * -2 (maxSize): Maximum size taking into account &lt;distribution minority class&gt;
	 *  *           and using no replacement
	 *  (default -1(sizeOfMinClass))</pre>
	 * 
	 * <pre> -D &lt;distribution minority class&gt;
	 *  Determines the new value of the distribution of the minority class, if we want to change it.
	 *  It can be one of the following values:
	 *  * A value between 0 and 100 to change the portion of minority class instances in the new samples
	 *    (this option can only be used with binary problems (two classes datasets))
	 *  * -1 (free): Works with the instances without taking into account their class  
	 *  * -2 (stratified): Maintains the original class distribution in the new samples
	 *  (default -1(free)) 
	 * 
   <!-- options-end -->
	 *
	 * @param options the list of options as an array of strings
	 * @throws Exception if an option is not supported
	 */
	public void setOptions(String[] options) throws Exception {
	    
		// Options to set the Resampling Method (RM) for the generation of samples
		//  to use in the consolidation process
		// =========================================================================
		String RMnumberSamplesString = Utils.getOption("RM-N", options);
		if (RMnumberSamplesString.length() != 0) {
			setRMnumberSamples(Integer.parseInt(RMnumberSamplesString));
		} else {
			setRMnumberSamples(5);
		}
		String RMbagSizePercentString = Utils.getOption("RM-B", options);
		if (RMbagSizePercentString.length() != 0) {
			setRMbagSizePercent(Integer.parseInt(RMbagSizePercentString), false);
		} else {
			setRMbagSizePercent(-1, false); // default: sizeOfMinClass
		}
		String RMnewDistrMinClassString = Utils.getOption("RM-D", options);
		if (RMnewDistrMinClassString.length() != 0) {
			setRMnewDistrMinClass(new Float(RMnewDistrMinClassString).floatValue(), false);
		} else {
			setRMnewDistrMinClass((float)50, false);
		}
		// Only checking the combinations of the three options RMreplacement, RMbagSizePercent and
		//  RMnewDistrMinClass when they all are set.
		setRMreplacement(Utils.getFlag("RM-R", options), true);
		// J48 options
		// ===========
	    super.setOptions(options);
	}

	/**
	 * Gets the current settings of the Classifier.
	 *
	 * @return an array of strings suitable for passing to setOptions
	 */
	public String [] getOptions() {

		Vector<String> result = new Vector<String>();

		// J48 options
		// ===========
		String[] options = super.getOptions();
		for (int i = 0; i < options.length; i++)
			result.add(options[i]);
		// In J48 m_Seed is added only if m_reducedErrorPruning is true,
		// but in J48Consolidated is necessary to the generation of the samples 
		result.add("-Q");
		result.add("" + m_Seed);

		// Options to set the Resampling Method (RM) for the generation of samples
		//  to use in the consolidation process
		// =========================================================================
		result.add("-RM-N");
		result.add(""+ m_RMnumberSamples);
		if (m_RMreplacement)
			result.add("-RM-R");
		result.add("-RM-B");
		result.add("" + m_RMbagSizePercent);
		result.add("-RM-D");
		result.add("" + m_RMnewDistrMinClass);

		return (String[]) result.toArray(new String[result.size()]);	  
	}

	/**
	 * Returns a description of the classifier.
	 * 
	 * @return a description of the classifier
	 */
	public String toString() {

		if (m_root == null) {
			return "No classifier built";
		}
		if (m_unpruned)
			return "J48Consolidated unpruned tree\n" + 
				   toStringResamplingMethod() +
//				   "-------------------------------\n" +
				   m_root.toString();
		else
			return "J48Consolidated pruned tree\n" + 
			       toStringResamplingMethod() +
//				   "-----------------------------\n" +
				   m_root.toString();
	}

	/**
	 * Returns a description of the Resampling Method used in the consolidation process.
	 * 
	 * @return a description of the used Resampling Method (RM)
	 */
	public String toStringResamplingMethod() {
		String st;
		st = "[RM] N_S=" + m_RMnumberSamples;
		if (m_RMnewDistrMinClass == -2)
			st += " stratified";
		else if (m_RMnewDistrMinClass == -1)
			st += " free distribution";
		else
			st += " %Min=" + m_RMnewDistrMinClass;
		st += " Size=";
		if (m_RMbagSizePercent == -2)
			st += "maxSize";
		else if (m_RMbagSizePercent == -1)
			st += "sizeOfMinClass";
		else
			st += m_RMbagSizePercent + "%";
		if (m_RMreplacement)
			st += " (with replacement)";
		else
			st += " (without replacement)";
		st += "\n";
		char[] ch_line = new char[st.length()];
		for (int i = 0; i < ch_line.length; i++)
			ch_line[i] = '-';
		String line = String.valueOf(ch_line);
		line += "\n";
		st += line;
		return st;
	}

	/**
	 * Returns the tip text for this property
	 * @return tip text for this property suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String RMnumberSamplesTipText() {
		return "Number of samples to be generated for use in the consolidation process";
	}

	/**
	 * Get the value of RMnumberSamples.
	 *
	 * @return Value of RMnumberSamples.
	 */
	public int getRMnumberSamples() {

		return m_RMnumberSamples;
	}

	/**
	 * Set the value of RMnumberSamples.
	 *
	 * @param v  Value to assign to RMnumberSamples.
	 */
	public void setRMnumberSamples(int v) {
		// Doesn't make sense to build a consolidated tree with 1 or 2 samples, but it's possible! 
		m_RMnumberSamples = v;
	}

	/**
	 * Returns the tip text for this property
	 * @return tip text for this property suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String RMreplacementTipText() {
		return "Whether replacement is performed to generate the set of samples.";
	}
	
	/**
	 * Get the value of RMreplacement
	 *
	 * @return Value of RMreplacement
	 */
	public boolean getRMreplacement() {

		return m_RMreplacement;
	}

	/**
	 * Set the value of RMreplacement.
	 * Checks the combinations of the options RMreplacement, RMbagSizePercent and RMnewDistrMinClass
	 *  
	 * @param v  Value to assign to RMreplacement.
	 * @throws Exception if an option is not supported
	 */
	public void setRMreplacement(boolean v) throws Exception {

		setRMreplacement(v, true);
	}

	/**
	 * Set the value of RMreplacement, but, optionally,
	 * checks the combinations of the options RMreplacement, RMbagSizePercent and RMnewDistrMinClass.
	 * This makes possible only checking in the last call of the method setOptions().
	 *  
	 * @param v  Value to assign to RMreplacement.
	 * @param checkComb true to check some combinations of options
	 * @throws Exception if an option is not supported
	 */
	public void setRMreplacement(boolean v, boolean checkComb) throws Exception {

		if(checkComb)
			checkBagSizePercentAndReplacementAndNewDistrMinClassOptions(v, m_RMbagSizePercent, m_RMnewDistrMinClass);
		m_RMreplacement = v;
	}

	/**
	 * Returns the tip text for this property
	 * @return tip text for this property suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String RMbagSizePercentTipText() {
		return "Size of each sample(bag), as a percentage of the training set size/-1=sizeOfMinClass/-2=maxSize.\n" +
				"Combined with the option <distribution minority class> accepts:\n" +
				" * -1 (sizeOfMinClass): The size of the minority class\n" +
				" * -2 (maxSize): Maximum size taking into account <distribution minority class>\n" +
				"             and using no replacement.";
	}

	/**
	 * Get the value of RMbagSizePercent.
	 *
	 * @return Value of RMbagSizePercent.
	 */
	public int getRMbagSizePercent() {

		return m_RMbagSizePercent;
	}

	/**
	 * Set the value of RMbagSizePercent.
	 * Checks the combinations of the options RMreplacement, RMbagSizePercent and RMnewDistrMinClass
	 *
	 * @param v  Value to assign to RMbagSizePercent.
	 * @throws Exception if an option is not supported
	 */
	public void setRMbagSizePercent(int v) throws Exception {

		setRMbagSizePercent(v, true);
	}
	
	/**
	 * Set the value of RMbagSizePercent, but, optionally,
	 * checks the combinations of the options RMreplacement, RMbagSizePercent and RMnewDistrMinClass.
	 * This makes possible only checking in the last call of the method setOptions().
	 *  
	 * @param v  Value to assign to RMbagSizePercent.
	 * @param checkComb true to check some combinations of options
	 * @throws Exception if an option is not supported
	 */
	public void setRMbagSizePercent(int v, boolean checkComb) throws Exception {

		if ((v < -2) || (v > 100))
			throw new Exception("Size of sample (%) has to be greater than zero and smaller " +
					"than or equal to 100 " +
					"(or combining with the option <distribution minority class> -1 for 'sizeOfMinClass' " +
					"or -2 for 'maxSize')!");
		else if (v == 0)
			throw new Exception("Size of sample (%) has to be greater than zero and smaller "
					+ "than or equal to 100!");
		else {
			if(checkComb)
				checkBagSizePercentAndReplacementAndNewDistrMinClassOptions(m_RMreplacement, v, m_RMnewDistrMinClass);
			m_RMbagSizePercent = v;
		}
	}
	
	/**
	 * Checks the combinations of the options RMreplacement, RMbagSizePercent and RMnewDistrMinClass 
	 *
	 * @throws Exception if an option is not supported
	 */
	private void checkBagSizePercentAndReplacementAndNewDistrMinClassOptions(
			boolean replacement, int bagSizePercent, float newDistrMinClass) throws Exception{

		if((newDistrMinClass > (float)0) && (newDistrMinClass < (float)100))
			// NewDistrMinClass is a valid value to change the distribution of the sample
			if(replacement)
				throw new Exception("Using replacement isn't contempled to change the distribution of minority class!");
		if((newDistrMinClass == (float)-1) || (newDistrMinClass == (float)-2)){
			// NewDistrMinClass = free OR stratified
			if(bagSizePercent < 0)
				throw new Exception("Size of sample (%) has to be greater than zero and smaller " +
						  "than or equal to 100!");
			if((!replacement) && (bagSizePercent==100))
				System.err.println("Doesn't make sense that size of sample (%) is 100, when replacement is false!");
		}
	}

	/**
	 * Returns the tip text for this property
	 * @return tip text for this property suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String RMnewDistrMinClassTipText() {
		return "Determines the new value of the distribution of the minority class, if we want to change it/-1=free/-2=stratified.\n" +
			   "It can be one of the following values:\n" +
			   " * A value between 0 and 100 to change the portion of minority class instances in the new samples\n" + 
			   "   (this option can only be used with binary problems (two classes datasets))\n" +
			   " * -1 (free): Works with the instances without taking into account their class.\n" +  
			   " * -2 (stratified): Maintains the original class distribution in the new samples.\n" +
			   " (default: 50)";
	}

	/**
	 * Get the value of RMnewDistrMinClass
	 * 
	 * @return Value of RMnewDistrMinClass
	 */
	public float getRMnewDistrMinClass() {
		return m_RMnewDistrMinClass;
	}

	/**
	 * Set the value of RMnewDistrMinClass
	 * Checks the combinations of the options RMreplacement, RMbagSizePercent and RMnewDistrMinClass
	 * 
	 * @param v Value to assign to RMnewDistrMinClass
	 * @throws Exception if an option is not supported
	 */
	public void setRMnewDistrMinClass(float v) throws Exception {
		
		setRMnewDistrMinClass(v, true);
	}

	/**
	 * Set the value of RMnewDistrMinClass, but, optionally,
	 * checks the combinations of the options RMreplacement, RMbagSizePercent and RMnewDistrMinClass.
	 * This makes possible only checking in the last call of the method setOptions().
	 * 
	 * @param v Value to assign to RMnewDistrMinClass
	 * @param checkComb true to check
	 * @throws Exception if an option is not supported
	 */
	public void setRMnewDistrMinClass(float v, boolean checkComb) throws Exception {

		if ((v < -2) || (v == 0) || (v >= 100))
			  throw new Exception("Distribution minority class has to be greater than zero and smaller " +
					  "than 100 (or -1 for 'sizeOfMinClass' or -2 for 'maxSize')!");
		else {
			if (checkComb)
				checkBagSizePercentAndReplacementAndNewDistrMinClassOptions(m_RMreplacement, m_RMbagSizePercent, v);
			m_RMnewDistrMinClass = v;
		}
	}

	/**
	 * Returns the tip text for this property
	 * (Rewritten to indicate this option is not implemented for J48Consolidated)
	 * 
	 * @return tip text for this property suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String reducedErrorPruningTipText() {
		return "J48 option not implemented for J48Consolidated";
	}

	/**
	 * Set the value of reducedErrorPruning. Turns
	 * unpruned trees off if set.
	 * (Rewritten to maintain the default value of J48)
	 *
	 * @param v  Value to assign to reducedErrorPruning.
	 * @throws Exception if an option is not supported
	 */
	public void setReducedErrorPruning(boolean v) throws Exception {

		m_reducedErrorPruning = false;
		throw new Exception("J48 option not implemented for J48Consolidated");
	}

	/**
	 * Returns the tip text for this property
	 * (Rewritten to indicate this option is not implemented for J48Consolidated)
	 * 
	 * @return tip text for this property suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String numFoldsTipText() {
		return "J48 option not implemented for J48Consolidated";
	}

	/**
	 * Set the value of numFolds.
	 * (Rewritten to maintain the default value of J48)
	 *
	 * @param v  Value to assign to numFolds.
	 * @throws Exception if an option is not supported
	 */
	public void setNumFolds(int v) throws Exception {

		m_numFolds = 3;
		throw new Exception("J48 option not implemented for J48Consolidated");
	}
	 
	/**
	 * Returns the tip text for this property
	 * (Rewritten to indicate this option is not implemented for J48Consolidated)
	 * 
	 * @return tip text for this property suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String binarySplitsTipText() {
		return "J48 option not implemented for J48Consolidated";
	}

	/**
	 * Set the value of binarySplits.
	 * (Rewritten to maintain the default value of J48)
	 *
	 * @param v  Value to assign to binarySplits.
	 * @throws Exception if an option is not supported
	 */
	public void setBinarySplits(boolean v) throws Exception {

		m_binarySplits = false;
		throw new Exception("J48 option not implemented for J48Consolidated");
	}

	/**
	 * Returns the tip text for this property
	 * (Rewritten to indicate the true using of the seed in this class)
	 * 
	 * @return tip text for this property suitable for
	 * displaying in the explorer/experimenter gui
	 */
	public String seedTipText() {
		return "Seed for random data shuffling in the generation of samples";
	}

	/**
	 * Main method for testing this class
	 *
	 * @param argv the commandline options
	 */
	public static void main(String [] argv){
		runClassifier(new J48Consolidated(), argv);
	}
}
