/* File: PhylogeneticDistance.cpp */
#include "PhylogeneticDistance.h"


#ifndef __PhylogeneticDistance_cpp__
#define __PhylogeneticDistance_cpp__


// List of methods

int PhylogeneticDistance::TOTAL_NUMBER_OF_METHODS = 2;

string PhylogeneticDistance::METHOD_NAME_LONG [] = {
	"Method: Count mutations", //0
	"Method: Binary" //1
	//"Method: Count recombinations", // 2

};
string PhylogeneticDistance::METHOD_NAME_SHORT [] = {
	"CNTM", // 0
	"CNTB" //1
	//"CNTR", // 2

};

// End of the list of methods

string PhylogeneticDistance::ITER_NAME_LONG [] = {
	"Iterator: All pairs", //0
	"Iterator: Pair up with most frequent", // 1 
};
string PhylogeneticDistance::ITER_NAME_SHORT [] = {
	"ALL", // 0
	"MFR", // 1
};

// List of iterators


// End of the list of iterators


//std::vector <CMP_METHOD> PhylogeneticDistance::methods_vector_2 = {};

namespace BIOS {

PhylogeneticDistance::PhylogeneticDistance(FreqAndKeyVector* hapList, MultidimensionalEmptyTable<int>* haplotypeArray)
{
	this->hapList = hapList;
	this->haplotypeArray = haplotypeArray;
};

// This is the one use currently
PhylogeneticDistance::PhylogeneticDistance( MultidimensionalEmptyTable<int>* haplotypeArray)
{


	this->haplotypeArray = haplotypeArray;
	this->method = 0;
	this->iterator = 0;

/*
	CMP_METHOD m1;

	m1.long_name = "Method: Count mutations";
	m1.short_name = "CNTM";

	methods_vector.push_back(m1);

	m1.long_name = "Method: Count recombinations";
	m1.short_name = "CNTR";

	methods_vector.push_back(m1);	
	
	m1.long_name = "Method: Binary";
	m1.short_name = "CNTB";

	methods_vector.push_back(m1);



	//TOTAL_NUMBER_OF_METHODS = methods_vector.size();

	for (int i=0; i< methods_vector.size(); i++)
	cout << "Test: " << methods_vector[i].long_name << endl;

	cout << "Test: " << methods_vector[1].long_name << endl;
*/

};

PhylogeneticDistance::~PhylogeneticDistance()
{};
  
float PhylogeneticDistance::calculate(FreqAndKeyVector* hapList)
{

	// Possible methods to choose, stored in the array "m"
	float (PhylogeneticDistance::* m [])(intList*, intList*) = {
	
		&PhylogeneticDistance::DistanceOfPairByDiffPos, // 0		
		&PhylogeneticDistance::DistanceOfPairByDiffBinary // 1
	
	};


	// Choose the iterators

	switch(this->iterator){
	case ITER_TYPE_ALL:
	return calcByIterAll( m[this->method] , hapList);
	break;

	case ITER_TYPE_MOST_FREQ:

	return calcByIterMostFreq( m[this->method] , hapList);
	break;

	}

}

  intList* PhylogeneticDistance::getHaplotype(long long int key)
  {
    return haplotypeArray->getPosList(key);
  }

    intList* PhylogeneticDistance::getHaplotypeAt(long int listPosition, FreqAndKeyVector* hapList)
{  
    int haplotypeKey;
    haplotypeKey = hapList->getElement(listPosition)->getSecond();
    return haplotypeArray->getPosList(haplotypeKey);
}
/*
  intList* PhylogeneticDistance::getHaplotype(int listPosition, FreqAndKeyVector* hapList2)
  { 
        // hapList->getElement( i )->getSecond() 
        // gives the ith key in the haplotype array
        if (hapList==NULL)
            return haplotypeArray->getPosList(hapList->getElement(listPosition)->getSecond());
        else 
            return haplotypeArray->getPosList(hapList2->getElement(listPosition)->getSecond());
  }
*/
  float PhylogeneticDistance::getBetweenDistance(int method, FreqAndKeyVector* hapList2)
  {
        switch(method){
            case 0:
               getBetweenDistanceCountMut(hapList2);
                break;
            case 1:
                getBetweenDistanceCountRecomb(hapList2);
                break;
            default:
                // unknown method
                return -1;
                break;
        }
    }

 float PhylogeneticDistance::getBetweenDistanceCountMut(FreqAndKeyVector* hapList2)
    {
        return 0;
    }

 float PhylogeneticDistance::getBetweenDistanceCountRecomb(FreqAndKeyVector* hapList2)
    {
        return 0;
    }



    


    float PhylogeneticDistance::DistanceOfPairByDiffPos(intList* haplotype1, intList* haplotype2){
        int number_of_differences=0;

        //cout << "Comparing (DistanceOfPairByDiffPos) " << *haplotype1 << " and " << *haplotype2 << endl;
        // Check haplotypes have the same length
        if (haplotype1->size() != haplotype2->size()){
            printf("DistanceOfPairByDiffPos: Different haplotype sizes");           
            return -1;
        }

         //printf(" [%i ] ", haplotype1->getElement(0));

        // On each location compare the haplotype value
        for(int i=0; i<haplotype1->size();i++){
            //printf(" [%i ] ", haplotype1[i]);
            //if (haplotype1[i] != haplotype2[i])
            if (haplotype1->getElement(i) != haplotype2->getElement(i))
                number_of_differences++;
        }
    
        //cout << "Total number of differences: " << number_of_differences << endl;
        return (float)number_of_differences / haplotype1->size();

    }

    float PhylogeneticDistance::DistanceOfPairByDiffBinary(intList* haplotype1, intList* haplotype2){
        int number_of_differences=0;

        //cout << "Comparing (DistanceOfPairByDiffPos) " << *haplotype1 << " and " << *haplotype2 << endl;
        // Check haplotypes have the same length
        if (haplotype1->size() != haplotype2->size()){
            printf("DistanceOfPairByDiffPos: Different haplotype sizes");           
            return -1;
        }

        // On each location compare the haplotype value
        for(int i=0; i<haplotype1->size();i++){
            //printf(" [%i ] ", haplotype1[i]);
            //if (haplotype1[i] != haplotype2[i])
            if (haplotype1->getElement(i) != haplotype2->getElement(i))
                return 1;
        }
    
        return 0;

    }

float PhylogeneticDistance::calcByIterAll( float (PhylogeneticDistance::*distance_function)(intList*, intList*) , FreqAndKeyVector* hapList)
{
       //  Sum of distance between pairs
        float total_distance = 0;
        // Pair of haplotypes to compare
        intList *haplotype1;
        intList * haplotype2; 
        // Key of  the haplotypes in the table for mapping keys and halotypes
        int haplotypeKey1, haplotypeKey2;
        // Freqs of the haplotypes
        float haplotype_freq_1, haplotype_freq_2;
        // Comparisons performed
        int total_number_of_comparisons=0;

	float one_pair_distance=0.0; // Distance by two simple haplotypes
	float one_pair_distance_times_freq=0.0; // Distance multiplied by the frequencies

	int n_comparisons_same =0;
	int n_comparisons_others=0;

/*
	for ( int jj=0; jj< hapList->size(); jj++){
		cout << hapList->getElement(jj)->getSecond() << ", ";	
	}
*/
        // Extract pairs of haplotypes. Only the ones that have not been compared
        for(int i=0; i < hapList->size(); i++){

            // First haplotype to compare
            haplotypeKey1 = hapList->getElement(i)->getSecond();
            haplotype_freq_1 = hapList->getElement(i)->getFirst();
            haplotype1 = getHaplotype(haplotypeKey1);

		#ifdef _PRINT_COMPARISON_NUMBER_
		cout << endl << "Comparing key " << haplotypeKey1 << endl;
		#endif
            
            for(int j=i+1; j < hapList->size(); j++){
                  //haplotype1 = getHaplotypeAt(i, hapList);

                  // Second haplotype to compare
                  haplotypeKey2 = hapList->getElement(j)->getSecond();
                  haplotype_freq_2 = hapList->getElement(j)->getFirst();
                  haplotype2 = getHaplotype(haplotypeKey2);

                  // For each haplotypes pair calculate distance using the given function
                  one_pair_distance = (this->*distance_function)(haplotype1, haplotype2);
		      one_pair_distance_times_freq = one_pair_distance * (haplotype_freq_1 * haplotype_freq_2);
		      total_distance += one_pair_distance_times_freq;

                // Add comparisons to the counter
			n_comparisons_others = (int)(haplotype_freq_1 * haplotype_freq_2);
                	total_number_of_comparisons +=  n_comparisons_others;

			#ifdef _PRINT_COMPARISON_NUMBER_
		      cout << endl<< "\tDistance (" << haplotypeKey1 << ", " << haplotypeKey2 << ") = " << one_pair_distance << ". Freqs = (" << haplotype_freq_1  <<  "," << haplotype_freq_2  <<  ")" << endl;
                  cout << "\tHaplotypes: \n" << "\t" << *haplotype1 << "\t"<< *haplotype2;
			cout << "\tNumber of comparisonsd= " << n_comparisons_others << endl;
			#endif
                
               // if (total_distance > 0)
                    //printf("total_distance between %i and %i: %f \n", i, j, total_distance);
            }
		n_comparisons_same = (int)(haplotype_freq_1*(haplotype_freq_1-1.0f)/2.0f); // Comparisons with the same haplotype, all equal to 0
		total_number_of_comparisons += n_comparisons_same;

			#ifdef _PRINT_COMPARISON_NUMBER_
			cout << endl << "\tNumber of comparisons same = " << n_comparisons_same  << endl;
			#endif
	}

    //float n=(float)hapList->size();
    //float denominator = n*(n-1.0f)/2.0f;  // Number of comparisons
	//cout << "Total number of coparisons: " << total_number_of_comparisons << endl;

    if (total_number_of_comparisons > 0 )        
        return total_distance / (float)total_number_of_comparisons;
    else
        return 0.0f;

//printf("hapList->size = %i\n", hapList->size());
}

float PhylogeneticDistance::calcByIterMostFreq( float (PhylogeneticDistance::*distance_function)(intList*, intList*) , FreqAndKeyVector* hapList)
{
       //  Sum of distance between pairs
        float total_distance = 0;
        // Pair of haplotypes to compare
        intList *haplotype1; 
        intList *haplotype2;
        int haplotypeKey1, haplotypeKey2;
        float haplotype_freq_2;
        float total_number_of_haplotypes = 0; // Number of haplotypes, including repetition

    // Get the most frequent haplotype (MFH)
    int most_freq_haplotype;
   most_freq_haplotype = get_most_freq_haplotype(hapList);
   haplotypeKey1 = hapList->getElement(most_freq_haplotype)->getSecond();
    haplotype1 = getHaplotype(haplotypeKey1);

    // Pair up (MFH) with the rest of haplotypes in the list
    for(int i=0; i < hapList->size(); i++){
                haplotype_freq_2 = hapList->getElement(i)->getFirst();
                haplotypeKey2 = hapList->getElement(i)->getSecond();
                haplotype2 = getHaplotype(haplotypeKey2);      
                //haplotype2 = getHaplotypeAt(i, hapList);
                // Calculate distance by using the given method 
                total_distance += (this->*distance_function)(haplotype1, haplotype2) * haplotype_freq_2; 
                
                total_number_of_haplotypes += haplotype_freq_2;

    }


    return total_distance / (float)total_number_of_haplotypes;
}

int  PhylogeneticDistance::get_most_freq_haplotype( FreqAndKeyVector* hapList ){

    int index_most_frequent=0;
    double value_most_frequent =0, current_value;
    

    FreqAndKey* fk;
    int haplotypeKey;

     for(int i=0; i < hapList->size(); i++){
           fk = hapList->getElement(i);
            current_value = fk->getFirst();
            haplotypeKey = fk->getSecond();
            if ( current_value > value_most_frequent ){
                index_most_frequent = i;
                value_most_frequent = current_value;
            }
            //printf(" Most frequent: [%i], %f.\n", index_most_frequent, value_most_frequent);
    }
    return index_most_frequent;

}

  float PhylogeneticDistance::getWithinDistance(int method)
  {
         float (PhylogeneticDistance::*m)(intList*, intList*);
        switch(method){
            case 0:
               //getWithinDistanceCountMut();
                m = &PhylogeneticDistance::DistanceOfPairByDiffPos;
                return getWithinDistance( m );
                break;
            case 1:
                //getWithinDistanceCountRecomb();
                m = &PhylogeneticDistance::DistanceOfPairByRecomb;
                getWithinDistance( m );
                break;
            default:
                // unknown method
                return -1;
                break;
        }
}




float PhylogeneticDistance::getWithinDistance( float (PhylogeneticDistance::*distance_function)(intList*, intList*) )
{
       //  Sum of distance between pairs
        float total_distance = 0;
        // Pair of haplotypes to compare
        intList *haplotype1;
        intList * haplotype2; 

        // Extract pairs of haplotypes. Only the ones that have not been compared
        
        for(int i=0; i < hapList->size(); i++)
            for(int j=i; j < hapList->size(); j++){
                haplotype1 = getHaplotype(i);
                haplotype2 = getHaplotype(j);

                // For each haplotypes pair calculate distance using the given function
                total_distance += (this->*distance_function)(haplotype1, haplotype2);
                
               // if (total_distance > 0)
                    //printf("total_distance between %i and %i: %f \n", i, j, total_distance);
            }            

    return total_distance/ (float)hapList->size();

//printf("hapList->size = %i\n", hapList->size());
}


/*
 float PhylogeneticDistance::getWithinDistanceCountMut()
    {
        //  Sum of distance between pairs
        float total_distance = 0;
        // Pair of haplotypes to compare
        intList *haplotype1;
        intList * haplotype2; 

        // Extract pairs of haplotypes. Only the ones that have not been compared
        for(int i=0; i < hapList->size(); i++)
            for(int j=i; j < hapList->size(); j++){
                haplotype1 = getHaplotype(i);
                haplotype2 = getHaplotype(j);
            }            

            // For each haplotypes pair calculate distance
            total_distance += DistanceOfPairByDiffPos(haplotype1, haplotype2);
        
    }
*/


    float PhylogeneticDistance::DistanceOfPairByRecomb(intList* haplotype1, intList* haplotype2){
        int number_of_switches=0;
        enum switch_states { S_EQUAL, S_DIFFERENT }  switch_state = S_EQUAL;
/*
        // Check haplotypes have the same length
        if (haplotype1->size() != haplotype2->size()){
            printf("DistanceOfPairByDiffPos: Different haplotype sizes");
            return -1;
        }


        // On each location compare the haplotype value
        for(int i=0; i<haplotype1->size();i++){

            if (switch_state == S_EQUAL){
                if (haplotype1[i] != haplotype2[i]){
                    number_of_switches++;
                    switch_state = S_DIFFERENT;
                }
            }
            else{
                if (haplotype1[i] == haplotype2[i]){
                    number_of_switches++;
                    switch_state = S_EQUAL;
                }
            }

        }

*/
        return (float)number_of_switches;

    }
/*
 float PhylogeneticDistance::getWithinDistanceCountRecomb()
    {
        return 0;
    }
*/


  FreqAndKeyVector* PhylogeneticDistance::getHapList()
  {
    return hapList;
  };


};  // End of Namespace

#endif

/* End of file: PhylogeneticDistance.cpp */




