#ifndef __contin_cpp__
#define __contin_cpp__

#include "contin.h"//



namespace BIOS {
/* File: contin.h */




/*************************************************************************///
/*                                                                	 *///
/*	Evaluation of a test on a continuous valued attribute	  	 */// 
/*	-----------------------------------------------------	  	 */// 
/*								  	 */// 
/*************************************************************************/// 
// 
// 
/*************************************************************************/// 
/*								  	 */// 
/*  Continuous attributes are treated as if they have possible values	 */// 
/*	0 (unknown), 1 (less than cut), 2(greater than cut)	  	 */// 
/*  This routine finds the best cut for items Fp through Lp and sets	 */// 
/*  Info[], Gain[] and Bar[]						 */// 
/*								  	 */// 
/*************************************************************************/// 
// 
// 
 void   EvalContinuousAtt(AttributeC45 Att, ItemNo Fp, ItemNo Lp, short int criterion)//
/*  -----------------  */ // 
{ // 



//    float *ListRisk;       /* ListRisk[i] = empirical risk */ //
    ItemNo i, BestI, Xp, Tries=0;// 
    ItemCount Items, KnownItems, LowItems, MinSplit;
	ItemCount CountItems(ItemNo, ItemNo);// 
    ClassNo c;// 
    float AvGain=0, Val, BestVal, LowestRisk, BaseInfo, ThreshCost, SRMComplexity, margen; //
     float ComputeGain(float, float, short, ItemCount);
	 float TotalInfo(ItemCount*, short, short);
	 float Worth(float, float, float);
     float TotalRisk (ItemCount*, short, short);//
    void Swap(ItemNo, ItemNo);// 
//    ListRisk = (float *) calloc(MaxItem+1, sizeof(float));// 
// 
    Verbosity(2) printf("\tAtt %s", AttName[Att]);// 
    Verbosity(3) printf("\n");// 
// 
    ResetFreq(2);// 
// 
    /*  Omit and count unknown values */// 
// 
    Items = CountItems(Fp, Lp);// 
    Xp = Fp;// 
    ForEach(i, Fp, Lp)// 
    {// 
	if ( CVal(Item[i],Att) == Unknown )// 
	{// 
	    Freq[ 0 ][ Class(Item[i]) ] += Weight[i];// 
	    Swap(Xp, i);// 
	    Xp++;// 
	}// 
    }// 
// 
    ValFreq[0] = 0;// 
    ForEach(c, 0, MaxClass)// 
    {// 
	ValFreq[0] += Freq[0][c];// 
    }// 
// 
    KnownItems = Items - ValFreq[0];// 
    UnknownRate[Att] = 1.0 - KnownItems / Items;// 
// 
    /*  Special case when very few known values  */// 
// 
    if ( KnownItems < 2 * MINOBJS )// 
    {// 
	Verbosity(2) printf("\tinsufficient cases with known values\n");// 
// 
	Gain[Att] = -Epsilon;// 
	Info[Att] = 0.0;//
        EmpRisk[Att]=maxreal; //
	return;// 
    }// 
// 
    Quicksort(Xp, Lp, Att, Swap);// 
// 
    /*  Count base values and determine base information  */// 
// 
    ForEach(i, Xp, Lp)// 
    {// 
	Freq[ 2 ][ Class(Item[i]) ] += Weight[i];// 
	SplitGain[i] = -Epsilon;// 
	SplitInfo[i] = 0;//
        SplitEmpRisk[i] = maxreal;//
    }// 
//
//exit(0);
      BaseInfo = TotalInfo(Freq[2], 0, MaxClass) / KnownItems;//
//    OldRisk =
//    ThisRisk=(float)ThisRisk/TotalItems; //
//    margen = ((double)2*Log MaxClass+Log TotalItems -Log (0.1))/(float)(2*TotalItems); //
//    ThisRisk = (ThisRisk+margen/2*(1+sqrt((1+(4*ThisRisk/margen)))-
//               OldRisk;
//
    /*  Try possible cuts between items i and i+1, and determine the// 
	information and gain of the split in each case.  We have to be wary// 
	of splitting a small number of items off one end, as we can always// 
	split off a single item, but this has little predictive power.  */// 
// 
    MinSplit = 0.10 * KnownItems / (MaxClass + 1);//
    if ( MinSplit <= MINOBJS ) MinSplit = MINOBJS;// 
    else// 
    if ( MinSplit > 25 ) MinSplit = 25;// 
//
    LowItems = 0;// 
    ForEach(i, Xp, Lp - 1)// 
    {// 
	c = Class(Item[i]);// 
	LowItems   += Weight[i];// 
	Freq[1][c] += Weight[i];// 
	Freq[2][c] -= Weight[i];// 
// 
	if ( LowItems < MinSplit ) continue;// 
	else// 
	if ( LowItems > KnownItems - MinSplit ) break;// 
	if ( CVal(Item[i],Att) < CVal(Item[i+1],Att) - 1E-5 )// 
	{// 
	    ValFreq[1] = LowItems;// 
	    ValFreq[2] = KnownItems - LowItems;// 
	    SplitGain[i] = ComputeGain(BaseInfo, UnknownRate[Att], 2, KnownItems);// 
	    SplitInfo[i] = TotalInfo(ValFreq, 0, 2) / Items;// 
//            if ((criterion==-4) || (criterion==-2) || (criterion==0))
//cout <<"knownitems: " << KnownItems <<", criterion: " << criterion;
            SplitEmpRisk[i] = ComputeRisk(2, KnownItems, criterion); // en info.h
//            ListRisk[i] = *OldRisk; //
	    AvGain += SplitGain[i];//
	    Tries++;// 
// 
	    Verbosity(3)// 
	    {	printf("\t\tCut at %.3f  (gain %.3f, val %.3f):",// 
	               ( CVal(Item[i],Att) + CVal(Item[i+1],Att) ) / 2,// 
	    	       SplitGain[i],// 
                       Worth(SplitInfo[i], SplitGain[i], Epsilon));// 
	    	       PrintDistribution(Att, 2, true);// 
	    }// 
	}// 
    }// 
// 
    /*  Find the best splitting point according to the given criterion  */// 
// 
  // 
    BestVal = 0;// 
    BestI   = None;//
    ThreshCost = Log(Tries) / Items;// se aadwe en c4.5 ver. 8 como medida de // complejidad en la discretizacin
    LowestRisk=maxreal; //             
//margen = (float)2*((float)2*(Log(MaxClass)+Log(Tries))-Log(0.1))/(float)(Items); // para complejidad en SRM
margen = (float)2*(2*Log(MaxClass)+Log(Tries-1)-Log(0.1))/(float)(Items); // para complejidad en SRM

//    Inter=ThisRisk;//


    switch (criterion)//
    {//
   case -5: //   LDM punto ptimo SRM
   ForEach(i, Xp, Lp - 1)//
    {//
        SRMComplexity = (margen/ (float)2)*( (float)1+sqrt((float)1+((float)4*SplitEmpRisk[i]/margen)));

    if (( SplitEmpRisk[i]+SRMComplexity)  < LowestRisk)//
	{// 
	    BestI   = i;//
            LowestRisk = SplitEmpRisk[i]+SRMComplexity; 
            BestVal = SplitGain[i] - ThreshCost;// 
	}// 
    }// 
   break; //
    case -4: //   SRM punto ptimo fayyad
    ForEach(i, Xp, Lp - 1)//
    {// 
        SRMComplexity = (margen/ (float)2)*( (float)1+sqrt((float)1+((float)4*SplitEmpRisk[i]/margen)));

	if ( (Val = SplitGain[i] - ThreshCost) > BestVal )// 
	{// 
	    BestI   = i;// 
            LowestRisk = SplitEmpRisk[i]+SRMComplexity; //
	    BestVal = Val;// 
	}// 
    }// 
   break; //
    case -3: // LDM punto ptimo LDM
    ForEach(i, Xp, Lp - 1)//
    {//
//        ThreshCost = (margen/ (double)2)*( (double)1+sqrt((double)1+((double)4*SplitEmpRisk[i]/margen)));

	if ( (Val = SplitGain[i] - ThreshCost) > BestVal )// 
	{// 
	    BestI   = i;// 
	    BestVal = Val;// 
	}// 
    }// 
    break;//
    case -2: //   SRM punto ptimo SRM
   ForEach(i, Xp, Lp - 1)//
    {// 
        SRMComplexity = (margen/ (float)2)*( (float)1+sqrt((float)1+((float)4*SplitEmpRisk[i]/margen)));

        if ( (SplitEmpRisk[i]+ SRMComplexity) < LowestRisk )// 
	{// 
	    BestI   = i;//
            LowestRisk = SplitEmpRisk[i]+SRMComplexity; //
       //     cout <<"margen: " << margen;
       //     cout <<"splitemprisk: " << SplitEmpRisk[i] << ", SRMcomp: " << SRMComplexity;
            BestVal = SplitGain[i] - ThreshCost;// 
	}// 
    }// 
   break; //
    case -1: // LDM punto medio
    i=(Lp-1+Xp)/2; //           
	if ( (Val = SplitGain[i] - ThreshCost) > BestVal )// 
	{// 
	    BestI   = i;// 
	    BestVal = Val;// 
	}// 
    break;//
    case 0: // SRM punto medio
    i=(Lp-1+Xp)/2; //           
        SRMComplexity = (margen/ (float)2)*( (float)1+sqrt((float)1+((float)4*SplitEmpRisk[i]/margen)));

        if ( (SplitEmpRisk[i]+SRMComplexity) < LowestRisk )// 
	{// 
	    BestI   = i;//
            LowestRisk = SplitEmpRisk[i]+SRMComplexity; //
            BestVal = SplitGain[i] - ThreshCost;// 
	}// 
    break;//
   } //
// 
    /*  If a test on the attribute is able to make a gain,// 
	set the best break point, gain and information  */ // 
// 
    if ( BestI == None )// 
    {// 
	Gain[Att] = -Epsilon;// 
	Info[Att] = 0.0;// 
        EmpRisk [Att] = maxreal; //
// 
	Verbosity(2) printf("\tno gain\n");// 
    }// 
    else// 
    {// 
	Bar[Att]  = (CVal(Item[BestI],Att) + CVal(Item[BestI+1],Att)) / 2;// 
	Gain[Att] = BestVal;// 
	Info[Att] = SplitInfo[BestI];//
        EmpRisk [Att]=LowestRisk;// realmente se debera llamar BoundedRisk
//
	Verbosity(2)// 
            printf("\tcut=%.3f, inf %.3f, gain %.3f, risk %.3f\n",// 
                   Bar[Att], Info[Att], Gain[Att], EmpRisk[Att]);// 
    }// 
} // 
// 
// 
// 
/*************************************************************************/// 
/*                                                                	 */// 
/*  Change a leaf into a test on a continuous attribute           	 */// 
/*                                                                	 */// 
/*************************************************************************/// 
// 
// 
  void  ContinTest(TreeC45 Node, AttributeC45 Att)// 
/*  ----------  */// 
{// 
    float Thresh;
	float GreatestValueBelow(AttributeC45, float);// 
    ItemCount CountItems();// 
// 
    Sprout(Node, 2);// 
// 
    Thresh = GreatestValueBelow(Att, Bar[Att]);// 
// 
    Node->NodeType	= ThreshContin;// 
    Node->Tested	= Att;// 
    Node->Cut		=// 
    Node->Lower		=// 
    Node->Upper		= Thresh;// 
    Node->Errors        = 0;// 
}// 
// 
// 
// 
/*************************************************************************/// 
/*                                                                	 */// 
/*  Return the greatest value of attribute Att below threshold t  	 */// 
/*                                                                	 */// 
/*************************************************************************/// 
// 
// 
float GreatestValueBelow(AttributeC45 Att, float t)// 
/*    ------------------  */// 
{// 
    ItemNo i;// 
    float v, Best=-maxreal;// 
    Boolean NotYet=true;//  
    ForEach(i, 0, MaxItem)// 
    {// 
	v = CVal(Item[i], Att);// 
	if ( v != Unknown && v <= t && ( NotYet || v > Best ) )// 
	{// 
	    Best = v;// 
	    NotYet = false;// 
	}// 
    }// 
// 
	if (NotYet) cout <<"Error, no values found below threshold " << t;
    return Best;// 
}// 

}
#endif
