/*----------------------------------------------------------------------+
|       Title:  Phyltree.java                                           |
|               Java Class                                              |
|               (used in Phylap.java applet)                            |
|                                                                       |
|       Author: David E. Joyce                                          |
|               Department of Mathematics and Computer Science          |
|               Clark University                                        |
|               Worcester, MA 01610-1477                                |
|               U.S.A.                                                  |
|                                                                       |
|               http://aleph0.clarku.edu/~djoyce/                       |
|                                                                       |
|       Date:   January, 1996, updated November, 2002.                  |
+----------------------------------------------------------------------*/

import java.awt.*;
import java.lang.*;

/*
 * Description of the data structure. A tree has a number of leaves, n,
 * and a number of other nodes, at most 2n nodes in all.  Each node has a
 * height.  The leaves have height 0.0. The variable treeHeight indicates
 * the maximum height of the nodes in the tree, which will be the height
 * of a root for a completed tree. Each node in the tree has a parent node,
 * except the root, of course.  Nodes that are parents of other nodes
 * point to one of those nodes, called the eldest node. The eldest node
 * points to the next sibling of the node; thus, the children of the parent
 * are kept in a linked list.
 *
 * As a tree is constructed, many of the nodes will not have parents.  But
 * as new nodes are added to the tree, they will receive parents.  Eventually
 * when the number of parentless nodes gets down to 1, the construction is
 * finished.
 */

public class Phyltree {
  private int n;                // the number of leaves on the tree
  private int assigned;         // the number of assigned nodes
  private int parentless;       // the number of parentless nodes
  private double treeHeight;    // the greatest height of any node
  private double height[];      // the heights of the nodes
  private int parent[];         // the parents of the nodes
  private int eldest[];         // the eldest children of the nodes
  private int sib[];            // the next siblings of the nodes

  public Phyltree (int nval) {
    setSize(nval);
  }

 /* setSize sets up an incomplete tree with n leaves, all parentless.
  * Later, the rest of the nodes of the tree will be created.
  */
  public void setSize(int nval) {
    n = nval;
    int two_n = 2*n;
    assigned = n;
    parentless = n;
    treeHeight = 0;
    height = new double[two_n];
    parent = new int[two_n];
    eldest = new int[two_n];
    sib = new int[two_n];
    for (int i=0; i<two_n; ++i) {
      height[i] = 0.0;
      parent[i] = -1;
      eldest[i] = -1;
      sib[i]    = -1;
    }
  } // setSize

  public double getTreeHeight() { return treeHeight;}

  public int getLeaves() { return n;}

  public int getAssigned() { return assigned;}

  public int getParent(int i) {
    if (i < 0 || i >= assigned) return -1;
    return parent[i];
  }

  public int getEldest(int i) {
    if (i < 0 || i >= assigned) return -1;
    return eldest[i];
  }

  public int getSib(int i) {
    if (i < 0 || i >= assigned) return -1;
    return sib[i];
  }

  public double getHeight(int i) {
    if (i < 0 || i >= assigned) return 0.0;
    return height[i];
  }

 /* newNode creates a new node of a specified height.  It is unconnected
  * to the tree as yet.
  */
  private int newNode(double h) {
    // return the next available node after setting its height
    height[assigned] = h;
    if (h > treeHeight)
      treeHeight = h;
    ++parentless;
    return assigned++;
  } // newNode

 /* a child node is connected to a parent node.  The parent pointer of
  * the child is set, and the list of children of the parent is updated.
  */
  private void assertParentage (int child, int paren) {
    if (parent[child] == -1) {
      parent[child] = paren;
      --parentless;
      sib[child] = eldest[paren];
      eldest[paren] = child;
    }
  } // assertParentage

 /* find the most remote ancestor of a given node. Run up the parent
  * pointers until one without a parent is found.
  */
  private int remoteAncestor (int i) {
    int j;
    while ((j=parent[i]) != -1)
      i = j;
    return i;
  }

 /* given two nodes i and j, find the most recent common ancestor if there
  * is one, otherwise indicate they have no common ancestor.
  */
  private int commonAncestor (int i, int j) {
    int p,q;
    while (i!=-1 && j!=-1 && i!=j) {
      if (height[i] < height[j])      i = parent[i];
      else if (height[i] > height[j]) j = parent[j];
      else if (i<j)                   i = parent[i];
      else                            j = parent[j];
    }
    return (i==j)? i : -1;
  } // commonAncestor

 /* getRoot and nextNode can be used for a depth-first traversal through
  * all the nodes. Thus, root() returns the root of the tree (assuming
  * the tree has been completed), while nextNode either goes to the
  * eldest child, if there is one, otherwise the next
  * sibling node, if there is one, or backtracks up the tree until it
  * finds a sibling or runs out of tree.
  */
  private int getRoot() { return remoteAncestor(0); }

  private int nextNode (int i) {
    if (eldest[i] != -1)
      return eldest[i];
    while (i != -1 && sib[i] == -1)
      i = parent[i];
    return (i == -1)? -1 : sib[i];
  }

 /* eldestLeaf and nextLeaf can be used to step through all the leaves
  * which are the descendants of a given node.  Thus, eldestLeaf finds
  * a leaf by travelling down the eldest pointers starting with the node,
  * while nextLeaf either goes to the next sibling leaf, if there is one,
  * or backtracks up the tree and back down to get the next leaf.
  */
  private int eldestLeaf (int i) {
    while (eldest[i] != -1)
      i = eldest[i];
    return i;
  }

  private int nextLeaf (int j, int i) {
    while (j != i && sib[j] == -1)
      j = parent[j];
    if (j==i) return -1;
    return eldestLeaf(sib[j]);
  }

  /* determine the number of leaves under the given node */
  private int numberLeaves (int i) {
    if (i < n) return 1;
    int count = 0;
    for (int j= eldestLeaf(i); j!= -1; j=nextLeaf(j,i))
      ++count;
    return count;
  } // numberLeaves

 /* For each clade of this tree, determine how many more leaves there
  * are in the encompassing clade of the other tree (-1 indicates that
  * there is no encompassing clade of the other tree).
  */
  public int[] compareWith (Phyltree other) {
    int[] compare;        // the resulting comparison to another tree
    compare = new int[assigned-n];
    for (int i=n; i<assigned; ++i) {
      int iother = eldestLeaf(i);
      for (int j=iother; j!=-1; j=nextLeaf(j,i)) {
        iother = other.commonAncestor(iother,j);
        if (iother == -1) break;
      }
      if (iother == -1)         // no encompassing clade
        compare[i-n] = -1;
      else
        compare[i-n] = other.numberLeaves(iother) - numberLeaves(i);
    }  // for i
    return compare;
  } //compare

 /* Create a random tree starting with given Phyltree.  The tree may
  * has a number of existing nodes to begin with, namely assigned, and
  * of these parentless of them have no parents.  In a newly created
  * tree, assigned equals parentless.
  *
  * Usually a parent will have two children, but with probability
  * extra, an extra will be added to make it three, and with that
  * same probability a fourth will be added to three, etc.  This
  * parameter extra can be set to 0.0 to make sure a binary tree
  * results.
  */
  public void random (double extra) {
    // first, create a random permutation of the parentless nodes
    int who[] = new int[parentless];
    int j = 0;
    for (int i=0; i<assigned; ++i)
      if (parent[i] == -1) {
        int m = (int)((j+1)*Math.random());
        who[j++] = who[m];
        who[m] = i;
      }
    // now start creating parents, one parent at a time, each with
    // 2 or more children which did not have parents.
    while (parentless > 1) {
      // select some number of nodes to merge
      int numberToMerge = 2;
      while (Math.random() < extra)
        ++numberToMerge;
      if (numberToMerge > parentless)
        numberToMerge = parentless;
      // Next merge that many parentless nodes.  Before that's done
      // the height of the new node has to be determined.  It will
      // be at least 0.2 plus the maximum height of the children, but
      // no more than 1.0 more than that.
      double maxHeight = 0.0;
      for (j=0; j<numberToMerge; ++j) {
        int i = who[parentless-1-j];
        if (height[i] > maxHeight)
          maxHeight = height[i];
      }
      // Now create the node and link it up
      int k = newNode(0.2+Math.random()+maxHeight);
      for (j=0; j<numberToMerge; ++j) {
        int i = who[parentless-2];
        assertParentage(i,k);
      }
      int m = (int)(parentless*Math.random());
      who[parentless-1] = who[m];
      who[m] = k;
    } // while
  } // random

 /* Compute the distances between every pair of leaves in the tree.
  * Return the results as a triangular matrix.
  */
  private double[][] getDistanceMatrix(){
    double d[][] = new double[n][];
    for (int i=1; i<n; ++i) {
      d[i] = new double[i];
      for (int j=0; j<i; ++j) {
        int k = commonAncestor(i,j);
        d[i][j] = (k==-1)? Double.POSITIVE_INFINITY
                : (2.0*height[k] - height[i] - height[j]);
      } // for j
    } // for i
    return d;
  } // distanceMatrix

  /* The probability that a gene mutates depends on the elapsed time and the
   * mutation rate.  If the elapsed time is t, and the mutation rate is r, then
   * the probability p(t) that the gene is the same at the end of the elapsed
   * time t as it was at the beginning is
   *
   *              m-1
   *   p(t) = 1 - --- (1-e^(-rt))
   *               m
   *
   * where m is the number of alternative values for the gene. This formula takes
   * into account that there may be multiple mutations during the time interval.
   * Also, the probability that the gene changes to a different particular value
   * is
   *          1
   *   q(t) = - (1-e^(-rt))
   *          m
   */

 /* Create a random genome for each of the assigned nodes.  Each genome is
  * an array of length traits, each gene being one of alts many alternate
  * values.  The root of the tree is given a genome of all zero values, and
  * its children will have the same genome except for mutations according to
  * a specified rate.
  */
  public int[][] createRandomGenome(int traits, int alts, double rate) {
    int gene[][] = new int[assigned][traits];
    int r = getRoot();
    for (int t=0; t<traits; ++t)
      gene[r][t] = 0;
    for (int j=nextNode(r); j!=-1; j=nextNode(j)) {
      int i = getParent(j);
      double diff = height[i] - height[j];
      double prob = 1.0 - (alts-1)*(1.0-Math.exp(-rate*diff))/alts;
      for (int t=0; t<traits; ++t) {
        if (Math.random() < prob)
          gene[j][t] = gene[i][t];
        else {
          int v = (int)(Math.random()*(alts-1));
          gene[j][t] = (v<gene[i][t])? v : v+1;
        } //else
      } // for t
    } // for j
    return gene;
  }


 /* Take an array of genomes for the assigned nodes and produce a triangular
  * difference array holding the number of differences in the genomes for
  * each pair of nodes.
  */
  public static double[][] differenceMatrix(int[][] gene){
    int traits = gene[0].length;
    double mut[][] = new double[gene.length][];
    for (int i=1; i<gene.length; ++i) {
      mut[i] = new double[i];
      for (int j=0; j<i; ++j) {
        mut[i][j] = 0.0;
        for (int t=0; t<traits; ++t)
          if (gene[i][t] != gene[j][t])
            mut[i][j]++;
      } //for j
    } // for i
    return mut;
  } // differenceMatrix

  private double compByMethod (double x, double y, int method) {
    if (method == -1)
      return Math.min(x,y);
    else if (method == +1)
      return Math.max(x,y);
    else // method == 0
      return (x + y) / 2.0;
  }

  /* reconstruct a tree from the distance matrix using one
   * of three methods--
   *   -1: minimum method, 0: average method, 1: maximum method
   */
  public void construct (double dist[][], int method) {
    int i,j,k,p;
    int owner[] = new int[n];
    for (i=0; i<n; ++i)
      owner[i] = i;
    while (parentless > 1) {
      // first find the closest pair
      double close_d = Double.POSITIVE_INFINITY;
      int close_i=-2, close_j=-2;
      for (i=1; i<n; ++i) if (owner[i] != -1)
        for (j=0; j<i; ++j) if (owner[j] != -1)
          if (dist[i][j] < close_d) {
            close_d = dist[i][j];
            close_i = i; close_j = j;
          }
      if (close_d == Double.POSITIVE_INFINITY)
        return; // The top of the tree is too remote to reconstruct
      i = close_i; j = close_j;
      double height_k = close_d/2.0;
      if (owner[i]>=n && height[owner[i]]>=height_k) {
        k = owner[i];
        assertParentage(owner[j],k);
      } else if (owner[j]>=n && height[owner[j]]>=height_k) {
        k = owner[j];
        assertParentage(owner[i],k);
      } else {
        k = newNode(height_k);
        assertParentage(owner[i],k);
        assertParentage(owner[j],k);
      } // if/else
      owner[i] = k;
      owner[j] = -1;
      for (p=0; p<j; ++p) if (owner[p] != -1)
        dist[i][p] = compByMethod(dist[i][p],dist[j][p],method);
      for (p=j+1; p<i; ++p) if (owner[p] != -1)
        dist[i][p] = compByMethod(dist[i][p],dist[p][j],method);
      for (p=i+1; p<n; ++p) if (owner[p] != -1)
        dist[p][i] = compByMethod(dist[p][i],dist[p][j],method);
    } // while
  } // construct

} // Phyltree
