/* File: discr.cpp */


#ifndef __discr_cpp__
#define __discr_cpp__


#include "discr.h"// //



namespace BIOS {

/*************************************************************************/// 
/*									 */// 
/*	Evaluation of a test on a discrete valued attribute		 */// 
/*      ---------------------------------------------------		 */// 
/*									 */// 
/*************************************************************************/// 
// 
// 
// 
/*************************************************************************/// 
/*									 */// 
/*  Compute frequency tables Freq[][] and ValFreq[] for attribute	 */// 
/*  Att from items Fp to Lp, and set the UnknownRate for Att		 */// 
/*									 */// 
/*************************************************************************/// 
// 
// 
 void ComputeFrequencies(AttributeC45 Att, ItemNo Fp, ItemNo Lp)// 
/*  ------------------  */// 
{// 
    Description Case; // 
    ClassNo c;// 
    DiscrValue v;// 
    ItemCount CountItems(ItemNo, ItemNo);// 
    ItemNo p;// 
// 
    ResetFreq(MaxAttVal[Att]);// 
// 
    /*  Determine the frequency of each class amongst cases// 
	with each possible value for the given attribute  */// 
// 
    ForEach(p, Fp, Lp)// 
    { // 
	Case = Item[p];// 
	Freq[ DVal(Case,Att) ][ Class(Case) ] += Weight[p];// 
    } // 
// 
    /*  Determine the frequency of each possible value for the// 
	given attribute  */// 
// 
    ForEach(v, 0, MaxAttVal[Att]) // 
    { // 
	ForEach(c, 0, MaxClass)// 
	{// 
	    ValFreq[v] += Freq[v][c];// 
	}// 
    }// 
// 
    /*  Set the rate of unknown values of the attribute  */// 
 // 
    UnknownRate[Att] = ValFreq[0] / CountItems(Fp, Lp);// 
}// 
// 
// 
// 
// 
// 
/*************************************************************************/// 
/*									 */// 
/*  Set Info[] and Gain[] for discrete partition of items Fp to Lp	 */// 
/*									 */// 
/*************************************************************************/// 
// 
// 
 void EvalDiscreteAtt(AttributeC45 Att, ItemNo Fp, ItemNo Lp, ItemCount Items,
 short int criterion)//
/*  ---------------  */ // 
{ // 
    ItemCount KnownItems;// 
    float DiscrKnownBaseInfo(ItemCount, DiscrValue), //
    ComputeGain(float, float, DiscrValue, ItemCount), //
    TotalInfo(ItemCount*, short, DiscrValue), margen;//
// 
    ComputeFrequencies(Att, Fp, Lp);// 
// 
    KnownItems = Items - ValFreq[0];// 
// 
    /*  Special case when no known values of the attribute  */// 
// 
    if ( Items <= ValFreq[0] )// 
    {// 
	Verbosity(2) printf("\tAtt %s: no known values\n", AttName[Att]);// 
// 
	Gain[Att] = -Epsilon;//
	Info[Att] = 0.0;// 
        EmpRisk[Att] = maxreal;//
	return;//
    }// 
// 
//    if ((criterion==-1) || (criterion==-3) || (criterion==-5) || (criterion==1))
//    {//
    Gain[Att] = ComputeGain(DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]),// en info.h
			    UnknownRate[Att], MaxAttVal[Att], KnownItems);// 
    Info[Att] = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items;// 
//    };//
// ;//   else //
    EmpRisk[Att] = ComputeRisk(MaxAttVal[Att], KnownItems, criterion); // en info.h
 //   margen = (float)2*((float)MaxAttVal[Att]*(Log(MaxClass))-Log(0.1))/(float)(Items); // para complejidad en SRM
 //   EmpRisk[Att] = EmpRisk[Att] + (margen/ (float)2)*( (float)1+sqrt((float)1+((float)4*EmpRisk[Att]/margen)));


//
    Verbosity(2)// 
    {// 
    	printf("\tAtt %s", AttName[Att]);// 
    	Verbosity(3) PrintDistribution(Att, MaxAttVal[Att], true);// 
    	printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]);// 
    }// 
// 
} // 
// 
// 
/*************************************************************************/// 
/*									 */// 
/*  Return the base info for items with known values of a discrete	 */// 
/*  attribute, using the frequency table Freq[][]			 */// 
/*	 								 */// 
/*************************************************************************/// 
// 
// 
float DiscrKnownBaseInfo(ItemCount KnownItems, DiscrValue MaxVal)// 
/*    ------------------  */// 
{// 
    ClassNo c;// 
    ItemCount ClassCount;// 
    double Sum=0;// 
    DiscrValue v;// 
// 
    ForEach(c, 0, MaxClass)// 
    {// 
	ClassCount = 0;// 
	ForEach(v, 1, MaxVal)// 
	{// 
	    ClassCount += Freq[v][c];// 
	}// 
	Sum += ClassCount * Log(ClassCount);// 
    }// 
// 
    return (KnownItems * Log(KnownItems) - Sum) / KnownItems;// 
}// 
// 
// 
// 
/*************************************************************************/// 
/*									 */// 
/*  Construct and return a node for a test on a discrete attribute	 */// 
/*									 */// 
/*************************************************************************/// 
// 
// 
    void DiscreteTest(TreeC45 Node, AttributeC45 Att)// 
/*  ----------  */// 
{// 
    ItemCount CountItems();// 
// 
    Sprout(Node, MaxAttVal[Att]);// 
// 
    Node->NodeType	= BrDiscr;// 
    Node->Tested	= Att;// 
    Node->Errors	= 0;// 
}// 
}
#endif
