/*****************************************************************************
******************************************************************************
******************************************************************************
*****                                                                    *****
*****                        PROGRAM NJTREE v2.0                         *****
*****    A program for building Neighbor-Joining trees (Saitou & Nei,    *****
*****                     Mol. Biol. Evol. 4:406).                       *****
*****    Copyright (c) 1990 by Joyce C. Miller.  All Rights Reserved.    *****
*****                                                                    *****
***** This program  constructs Neighbor-Joining trees  from  a matrix of *****
***** genetic  distances.  It  takes  the  name  of an  ASCII  text file *****
***** containing a distance  matrix, the name  of an  output file, and a *****
***** one-letter  code  describing the  type of matrix in the input file *****
***** ('U'pper-fight triangle  or 'D'own --  lower-left triangle).   The *****
***** matrix  file is opened, the list of OTU names is read in, followed *****
***** by the matrix of genetic  distances.  If the distance  matrix type *****
***** is 'D', then the matrix is converted to an upper-right type.  Then *****
***** the tree is constructed.  For  each X-Y combination, a value (Sij) *****
***** is  computed.  The X-Y  combination that  produces  the lowest Sij *****
***** value is  the one chosen to join at that round of clustering.  The *****
***** branch  lengths leading  from the  current  node to  each  OTU are *****
***** computed.  Then  the  distance matrix is then reduced by averaging *****
***** the  distances from X and Y to each  remaining OTU.  Then the next *****
***** cycle of clustering begins.  The results of each cycle are printed *****
***** to  both the screen and the  output file.  After the clustering is *****
***** completed, a tree  diagram  is made.  This is also printed to both *****
***** the screen and the output file.                                    *****
*****    The  format  of  the  distance  matrix  file  can be  either an *****
***** upper-right triangle, or a lower-left triangle:                    *****
*****                                                                    *****
*****           OTU2NAME                    OTU2NAME                     *****
*****           OTU3NAME                    OTU3NAME                     *****
*****           OTU4NAME                    OTU4NAME                     *****
*****                                                                    *****
*****           D1x2 D1x3 D1x4              D1x2                         *****
*****                D2x3 D2x4              D1x3 D2x3                    *****
*****                     D3x4              D1x4 D2x4 D3x4               *****
*****                                                                    *****
***** There can be more than 4 OTUs, of course.                          *****
*****                                                                    *****
***** There must be a blank line  between the list of OTU  names and the *****
***** start  of the distance matrix, and  NO blank lines before the list *****
***** of OTU names.                                                      *****
*****                                                                    *****
***** Reference:                                                         *****
*****                                                                    *****
***** Saitou, N., and M. Nei  (1987)  The Neighbor-Joining method: A new *****
*****    method for reconstructing phylogenetic trees.  Mol. Biol. Evol. *****
*****    4:406-425.                                                      *****
*****                                                                    *****
*****                                                                    *****
***** List of C functions used in this program:                          *****
*****                                                                    *****
*****     FUNCTION         LIBRARY          FUNCTION         LIBRARY     *****
*****     asctime          time.h           fclose           stdio.h     *****
*****     fgets            stdio.h          fopen            stdio.h     *****
*****     fprintf          stdio.h          free             stdlib.h    *****
*****     fscanf           stdio.h          localtime        time.h      *****
*****     malloc           stdlib.h         nowtime          time.h      *****
*****     printf           stdio.h          rewind           stdio.h     *****
*****     sscanf           stdio.h          strcat           string.h    *****
*****     strcpy           string.h         strlen           string.h    *****
*****     time             time.h           toupper          ctype.h     *****
*****                                                                    *****
******************************************************************************
******************************************************************************
*****************************************************************************/

/*****************************************************************************
**                             INCLUDE FILES                                **
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <rstypes.h>        /* file containing type definitions & constants */
#include <rserrors.h>                     /* file containing error messages */
#include <rsfuncs.h>        /* file containing some commonly-used functions */
/*****************************************************************************
**                            TYPE DEFINITIONS                              **
*****************************************************************************/
typedef struct {                       /* structure to hold X,Y coordinates */
  int x;                                      /* location of OTU X in array */
  int y;                                      /* location of OTU Y in array */
} coord;                                                  /* size = 4 bytes */
typedef struct {           /* structure to hold characteristics of one node */
  otuname nodename;                                 /* name of node in tree */
  otuname otuX;                                            /* name of OTU X */
  float   distX;                                /* distance of OTU X branch */
  otuname otuY;                                            /* name of OTU Y */
  float   distY;                                /* distance of OTU Y branch */
} node;                                                  /* size = 72 bytes */
typedef struct {        /* structure to hold one branch (text line) of tree */
  otuname name;                                   /* name of current branch */
  float brlen;                    /* length (in genetic distance) of branch */
  char br[75];                                    /* text content of branch */
} branch;                                               /* size = 102 bytes */
/*****************************************************************************
**                            SYMBOLIC CONSTANTS                            **
*****************************************************************************/
#define VNUM 2.0                          /* version number of this program */
#define YEAR 1990                /* calendar year this version was released */
/*****************************************************************************
**                          FUNCTION PROTOTYPES                             **
*****************************************************************************/
/****************** EXTRACT DATA FROM DISTANCE MATRIX FILE ******************/
void getotus(char infl[]);                     /* gets otus from input file */
void getdist(FILE *fpd);                  /* gets distances from input file */
/******************* FLIP MATRIX TO UPPER-RIGHT TRIANGLE ********************/
void flipmatrix();             /* flips matrix from 'D' to 'U' if necessary */
/***************************** NEIGHBOR-JOINING *****************************/
void cluster(char infl[], char outfl[]);   /* operates clustering algorithm */
FILE *printheader(FILE *fpt, char infl[], char outfl[]);       /* prog info */
void reducematrix(coord xy);             /* reduces matrix after each cycle */
void reduceotulist(int node,int x,int y);  /* reduces otulist after a cycle */
/******************************** DRAW TREE *********************************/
void drawtree();                             /* draws a diagram of the tree */
int  orderotus(int nlines, int otulen);     /* order otus and nodes in tree */
float treelength(int nlines, float totlen);     /* get total length of tree */
void printtree(int nlines, int otulen, float conv, float totlen);
void printscale(int otulen, float conv);    /* prints dist scale under tree */
/*****************************************************************************
**                           GLOBAL VARIABLES                               **
*****************************************************************************/
otuname otulist[MAXOTU];                 /* list of the OTUs to be compared */
float   branchlen[MAXOTU];         /* list of the cumulative branch lengths */
float  *dist;               /* list of the genetic distances to be analyzed */
node   *nodetable;                             /* list of nodes in the tree */
branch *tree;                                       /* text diagram of tree */
FILE   *fpt;                                /* file pointer for output file */
int     notu   = 0;                     /* total number of OTUs in data set */
int     cotu   = 0;      /* current number of OTUs remaining to be compared */
int     ndist  = 0;            /* total number of distances in the data set */
int     cdist  = 0;                /* cuurent number of distances remaining */
int     nnodes = 0;                                 /* number of nodes made */
int     nlines = 0;                 /* number of lines used to make up tree */
/*****************************************************************************
******************************************************************************
******************************************************************************
******                                                                  ******
******                          MAIN PROGRAM                            ******
******                                                                  ******
******************************************************************************
******************************************************************************
******************************************************************************
***** This  section of the program prints a message to the user (program *****
***** name and copyright  message), and  reads  in the command line.  If *****
***** either the  input file name, output file  name, or the matrix type *****
***** ('U' or 'D') are missing, an  error  message is  printed, and  the *****
***** program aborts.  If the  command line is complete, then  the input *****
***** file  is read, a tree  constructed, and the results are printed to *****
***** the output file.                                                   *****
*****                                                                    *****
***** Functions called:                                                  *****
***** getotus        -- gets the list of OTUs.                           *****
***** flipmatrix     -- flips matrix from lower-left to upper-right      *****
*****                   triangle.                                        *****
***** cluster        -- contructs the NJ tree.                           *****
*****************************************************************************/
void main(int argc, char *argv[])
{
  printf("\r\nProgram NJTREE v%3.1f\r\n",VNUM);          /* message to user */
  printf("A program for creating trees from distance matrix files via the\r");
  printf("\nNeighbor-Joining method (Saitou & Nei, Mol. Biol. Evol. 4:406).");
  printf("\r\nCopyright (c) %d by Joyce C. Miller.  ",YEAR);
  printf("All Rights Reserved.\r\n");

  /* error-check the command line */
  if (argc < 4) {              /* error if not enough items on command line */
    printf("\a\r\nError 101:  The proper format of the command line should ");
    printf("be:\r\n\n            NJTREE MATRIXFILE OUTPUTFILE MATRIXTYPE\r\n");
    printf("\n            All of these items are necessary.  Refer to the ");
    printf("documentation\r\n            if you require more information.\r\n");
    exit(0);
  }

  getotus(argv[1]);                 /* get the list of OTUs (and distances) */
  if (toupper(argv[3][0]) == 'D')          /* if matrix is lower-left type, */
    flipmatrix();                            /* flip it to upper-right type */

  cluster(argv[1],argv[2]);                                /* make the tree */
  free(dist);                         /* free memory used by distance array */

  drawtree();                   /* draw diagram of tree from nodes in table */
  free(tree);                           /* free memory used by tree diagram */
}                                                    /* END OF MAIN PROGRAM */
/*****************************************************************************
**                                                                          **
**                            FUNCTION GETOTUS                              **
**                                                                          **
** This function is called from FUNCTION MAIN, and reads in the list of OTU **
** names.  First, the  slots in the  list are initialized.  Then the matrix **
** file (RDFL) is opened, and  the OTUs  are read in, line by line, until a **
** blank  line  is reached.  Then  control is  passed  to FUNCTION GETDIST, **
** which reads in the distance values from the matrix file.                 **
**                                                                          **
** Functions called:                                                        **
** opntrdfl       --  opens a text file for reading.                        **
** rserror311     --  error message is error reading datafile.              **
** getdist        --  reads values from distance matrix file into memory.   **
*****************************************************************************/
void getotus(char infl[])
{
  register int i;                                  /* loop control variable */
  FILE *fpd;                                          /* input file pointer */
  char done = 'N';             /* boolean -- finished reading in OTU names? */
  char tempstr[MAXSTRLEN];                  /* line read in from input file */
  char tn[MAXSTRLEN];                           /* word from the above line */

  printf("\r\nReading list of OTUs.....\r\n");           /* message to user */

  for (i=0; i<MAXOTU; ++i) otulist[i][0] = '\0';      /* initialize OTULIST */
  fpd = opntrdfl(fpd,infl);                              /* open input file */
  while (done == 'N') {                            /* read until blank line */
    if ((fgets(tempstr,MAXSTRLEN,fpd)) == NULL) rserror311(infl);
    sscanf(tempstr,"%s",tn);                      /* scan line for OTU name */
    if (strlen(tn) < 1) done = 'Y';          /* if a blank line, we're done */
    else strcpy(otulist[notu++],tn);         /* copy OTU name into OTU list */
  }
  cotu = notu;                               /* number of otus to be joined */
  ndist = notu*(notu-1)/2;                           /* number of distances */
  getdist(fpd);                             /* go & read in those distances */
}                                                /* END OF FUNCTION GETOTUS */
/*****************************************************************************
**                                                                          **
**                            FUNCTION GETDIST                              **
**                                                                          **
** This function is called from FUNCTION GETOTUS.  It reads distance values **
** from the distance  matrix  file (RDFL)  and into the  memory array DIST. **
** When  done, it closes  the distance matrix file, and passes control back **
** to  FUNCTION  GETOTUS, which immediately passes control back to FUNCTION **
** MAIN.                                                                    **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
void getdist(FILE *fpd)
{
  register int i;                                  /* loop control variable */
  int tn;                                             /* pointer for fscanf */

  printf("Reading list of distances.....\r\n");          /* message to user */

  /* allocate memory to hold that number of distances */
  if ((dist=(float *)malloc((ndist+1)*sizeof(float))) == NULL) rserror400();
  for (i=0; i<ndist; ++i) dist[i] = -5;        /* initialize distance array */
  for (i=0; i<ndist; ++i) tn = fscanf(fpd,"%f",&dist[i]); /* read distances */

  rewind(fpd);                                       /* close up input file */
  fclose(fpd);
}                                                /* END OF FUNCTION GETDIST */
/*****************************************************************************
**                                                                          **
**                           FUNCTION FLIPMATRIX                            **
**                                                                          **
** This function is called  from  FUNCTION MAIN.  The loops in this program **
** operate on the  assumption that the distance matrix is in the form of an **
** upper-right triangle.  If it is a  lower-left triangle, this function is **
** called to invert the matrix.  It accomplishes this  by creating a second **
** distance  array  called 'U'.  It  goes  to the first  slot  of 'U', then **
** locates the  corresponding  slot in the distance  array ('DIST'), copies **
** the distance into 'U', then goes to  the second slot in 'U', etc.  After **
** all of the  distances  have  been  copied  out  of 'dist' and  into  the **
** appropriate  slots  in  'U', 'U' is  copied back into 'dist', and 'U' is **
** erased.  Control then passes back to MAIN.                               **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
void flipmatrix(char infl[])
{
  register int k,m;                /* loop control variables for 'D' matrix */
  int i,j;                         /* loop control variables for 'U' matrix */
  int d,u;      /* linear array counter for the 'D'own and the 'U' matrices */
  float *U;                            /* temporary array for 'U' distances */

  /* allocate memory for 'U'p distance array */
  if ((U=(float *)malloc((ndist+1)*sizeof(float))) == NULL) rserror400();
  for (i=0; i<ndist; ++i) U[i] = -5;           /* initialize distance array */

  u = 0;                           /* initialize 'U' distance array counter */
  for (i=0; i<notu-1; ++i)       /* begin to fill the 'U' distance array... */
    for (j=i+1; j<notu; ++j) {
      d = 0;                                      /* initialize 'D' counter */
      for (k=1; k<notu; ++k)           /* go through the 'D' distance array */
        for (m=0; m<k; ++m)       /* look for match to current 'U' distance */
          if ((k!=j) || (m!=i)) d++;       /* if not a match, keep counting */
          else {                                 /* if found, stop counting */
            k = notu;                     /* get out of two innermost loops */
            m = k;
            break;
          }
      U[u++] = dist[d];          /* copy 'D' distance into current 'U' slot */
    }

  for (i=0; i<ndist; ++i) dist[i] = U[i];     /* copy U array to dist array */
  free(U);                                            /* get rid of U array */
}                                             /* END OF FUNCTION FLIPMATRIX */
/*****************************************************************************
**                                                                          **
**                            FUNCTION CLUSTER                              **
**                                                                          **
** This  function is  called  from  FUNCTION MAIN.  It is the function that **
** actually coordinates the tree-making process.  It  prepares  the  output **
** file, and  then  begins  joining  pairs of  OTUs at  nodes (printing the **
** results of each cycle), until  there  are no more OTUs to be joined.  To **
** join two OTUs, the program goes through all of the X-Y combinations, and **
** finds the  one that will  produce the  shortest tree (and has the lowest **
** Sij  (equation 4) ).  Once that  combination has been  found, the branch **
** lengths from the current node to OTUs X and Y are computed via equations **
** 6a and 6b.  The results  are  printed to  the  output file, and then the **
** distance matrix and the OTU list are reduced by one OTU.                 **
**                                                                          **
** Function called:                                                         **
** opntwtfl      --  opens a text file for writing.                         **
** printheader   --  prints column headings to the output file.             **
** reducematrix  --  reduces the distance matrix by (N-1) distances.        **
** reduceotulist --  reduces the otulist by 1 OTU.                          **
*****************************************************************************/
void cluster(char infl[], char outfl[])
{
  register int l,m;                     /* innermost loop control variables */
  int   i,j,k;                                    /* loop control variables */
  int   n;                                              /* counting integer */
  int   z;                     /* counting cariable to find lowest distance */
  char  nodestr[20];                      /* the node number in string form */
  char *ns;                                           /* pointer to nodestr */
  coord xy;                             /* x-y coordinates of that distance */
  float Dxy;                                        /* current X-Y distance */
  float Dxk;                     /* sum of distances between X & all others */
  float Dyk;                     /* sum of distances between Y & all others */
  float Dij;                                    /* sum of non-X-Y distances */
  float Sij;                              /* Sij for current X-Y comparison */
  float lowSij;                                               /* lowest Sij */
  float lowDxy;                              /* X-Y distance for lowest Sij */
  float lowDxk;                      /* sum of X-k distances for lowest Sij */
  float lowDyk;                      /* sum of Y-k distances for lowest Sij */
  float Lx;                                           /* length of X branch */
  float Ly;                                           /* length of Y branch */
  int lowx;                                              /* slot # of OTU X */
  int lowy;                                              /* slot # of OTU Y */

  printf("Clustering.....\r\n");                         /* message to user */

  fpt = opntwtfl(fpt,outfl);                        /* open the output file */
  fpt = printheader(fpt,infl,outfl);         /* print header to output file */

  /* print column headings to screen and output file */
  printf("\r\nNODE: OTU1               (branch length)  & OTU2");
  printf("                (branch length)\r\n");
  fprintf(fpt,"NODE: OTU1               (branch length)  & OTU2");
  fprintf(fpt,"               (branch length)\n");

  /* allocate memory for node table */
  if ((nodetable=(node *)malloc((notu)*sizeof(node))) == NULL) rserror400();

  for (i=0; i<cotu; ++i) {                         /* initialize node table */
    nodetable[i].nodename[0] = '\0';
    nodetable[i].otuX[0]     = '\0';
    nodetable[i].distX       =   0;
    nodetable[i].otuY[0]     = '\0';
    nodetable[i].distY       =   0;
  }

  /* begin building tree */
  for (i=0; i<MAXOTU; ++i) branchlen[i] = 0;       /* initialize b.len list */

  for (i=1; ((i<=notu) && (cotu>3)); ++i) {             /* for each node... */
    cdist = cotu*(cotu-1)/2;                    /* find number of distances */
    z = 0;                                       /* initialize slot counter */
    for (j=0; j<cotu-1; ++j) {            /* go through all X-Y comparisons */
      for (k=j+1; k<cotu; ++k) {
        xy.x = j;                            /* get current X-Y coordinates */
        xy.y = k;
        Dxy = 0;                                          /* initialize D's */
        Dxk = 0;
        Dyk = 0;
        Dij = 0;
        Dxy = dist[z];                              /* get current distance */
        n = 0;
        Dxy = dist[z];
        for (l=0; l<cotu-1; ++l) {                          /* total up D's */
          for (m=l+1; m<cotu; ++m) {
            if ((l==j) || (m==j)) Dxk += dist[n];
            if ((l==k) || (m==k)) Dyk += dist[n];
            Dij += dist[n];
            n++;
          }
        }                                                /* now compute Sij */
        Dxk -= Dxy;
        Dyk -= Dxy;
        Dij -= (Dxk + Dyk + Dxy);
        Sij = ((Dxk+Dyk)/(2*(cotu-2))) + (Dxy/2) + (Dij/(cotu-2));
        if ((z == 0) || (Sij < lowSij)) {           /* if it's a new low... */
          lowSij  = Sij;                                        /* copy Sij */
          lowDxy  = Dxy;                               /* copy X-Y distance */
          lowDxk  = Dxk;                       /* copy sum of X-k distances */
          lowDyk  = Dyk;                       /* copy sum of Y-k distances */
          lowx    = j;                              /* copy X-Y coordinates */
          lowy    = k;
        }
        z++;                                            /* increase counter */
      }                                                  /* end of 'k' loop */
    }                                                    /* end of 'j' loop */
    xy.x = lowx;                                    /* copy X-Y coordinates */
    xy.y = lowy;
    lowDxk = lowDxk / (cotu-2);                               /* compute DXz */
    lowDyk = lowDyk / (cotu-2);                               /* compute DYz */
    /* compute branch lengths via equations 6a & 6b */
    Lx = ((lowDxy + lowDxk - lowDyk) / 2) - (branchlen[lowx]);   /* b.len X */
    Ly = ((lowDxy + lowDyk - lowDxk) / 2) - (branchlen[lowy]);   /* b.len Y */
    branchlen[lowx] += (branchlen[lowy] + Lx + Ly);   /* cumulative b.len X */
    branchlen[lowx] = branchlen[lowx] / 2;

    /* print out results */
    printf("%3.0d   %-20s",i,otulist[xy.x]);                /* node & OTU X */
    fprintf(fpt,"%3.0d   %-20s",i,otulist[xy.x]);
    printf("  (%8.6f)      %-20s",Lx,otulist[xy.y]);       /* BL(X) & OTU Y */
    fprintf(fpt,"  (%8.6f)      %-20s",Lx,otulist[xy.y]);
    printf("  (%8.6f)\r\n",Ly);                                    /* BL(Y) */
    fprintf(fpt,"  (%8.6f)\n",Ly);

    /* put results of this cycle into the node table */
    strcat(nodetable[i-1].nodename,"Node ");  /* make 1st part of node name */
    ns = inttoalph(i,nodestr);           /* convert node number to a string */
    strcpy(nodestr,ns);
    strcat(nodetable[i-1].nodename,nodestr);     /* combine "Node" & node # */
    strcpy(nodetable[i-1].otuX,otulist[xy.x]);           /* copy OTU X name */
    strcpy(nodetable[i-1].otuY,otulist[xy.y]);           /* copy OTU Y name */
    nodetable[i-1].distX = Lx;                    /* length of OTU X branch */
    nodetable[i-1].distY = Ly;                    /* length of OTU Y branch */
    nnodes++;                         /* increment the number of nodes done */

    reducematrix(xy);                          /* reduce the distance array */
    reduceotulist(i,xy.x,xy.y);                       /* reduce the otulist */
  }

  if (cotu == 3) {                          /* join the three remaining OTUs */
    Lx  = ((dist[0] + dist[1] - dist[2]) / 2) - branchlen[0];      /* BL(0) */
    Ly  = ((dist[0] + dist[2] - dist[1]) / 2) - branchlen[1];      /* BL(1) */
    Dxy = ((dist[1] + dist[2] - dist[0]) / 2) - branchlen[2];      /* BL(2) */

    /* print out results of last node */
    printf("Node %3.0d joins:\n      %-20s",i,otulist[0]);    /* Node & OTU */
    fprintf(fpt,"Node %3.0d joins:\n      %-20s",i,otulist[0]);
    printf("  (%8.6f),\n",Lx);                                     /* BL(0) */
    fprintf(fpt,"  (%8.6f),\n",Lx);
    printf("      %-20s",otulist[1]);                                /* OTU */
    fprintf(fpt,"      %-20s",otulist[1]);
    printf("  (%8.6f), and ",Ly);                                  /* BL(1) */
    fprintf(fpt,"  (%8.6f), and ",Ly);
    printf("%-20s",otulist[2]);                                      /* OTU */
    fprintf(fpt,"%-20s",otulist[2]);
    printf("  (%8.6f).\r\n",Dxy);                                  /* BL(2) */
    fprintf(fpt,"  (%8.6f).\r\n",Dxy);

    /* put results of this cycle into the node table */
    strcat(nodetable[i-1].nodename,"Node "); /* make next-to-last node name */
    ns = inttoalph(i,nodestr);
    strcpy(nodestr,ns);
    strcat(nodetable[i-1].nodename,nodestr);           /* put into nodename */
    strcpy(nodetable[i-1].otuX,otulist[0]);              /* copy OTU X name */
    strcpy(nodetable[i-1].otuY,otulist[1]);              /* copy OTU Y name */
    nodetable[i-1].distX = Lx;                    /* length of OTU X branch */
    nodetable[i-1].distY = Ly;                    /* length of OTU Y branch */
    nnodes++;                         /* increment the number of nodes done */

    strcat(nodetable[i].nodename,"Node ");           /* make last node name */
    ns = inttoalph(i+1,nodestr);
    strcpy(nodestr,ns);
    strcat(nodetable[i].nodename,nodestr);
    strcpy(nodetable[i].otuY,otulist[2]);                 /* put into OTU Y */
    strcpy(nodetable[i].otuX,nodetable[i-1].nodename);
    nodetable[i].distY = Dxy;                           /* copy in distance */
    nnodes++;
  }                                               /* now done with all OTUs */
}                                                /* END OF FUNCTION CLUSTER */
/*****************************************************************************
**                                                                          **
**                          FUNCTION PRINTHEADER                            **
**                                                                          **
** This function is called  from  FUNCTION CLUSTER.  It  prints a header to **
** the  output  file, including  the  data and time, the program name and a **
** copyright notice, and the names of the input and output files.           **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
FILE *printheader(FILE *fpt, char infl[], char outfl[])
{
  int i;                                           /* loop control variable */
  struct tm *nowtime;     /* time and clock structures to get date and time */
  time_t aclock;

  for (i=0; i<strlen(infl); ++i) {       /* convert input file name to caps */
    if (i<FILELEN) infl[i] = toupper(infl[i]);
    else break;
  }
  infl[i] = '\0';

  for (i=0; i<strlen(outfl); ++i) {     /* convert output file name to caps */
    if (i<FILELEN) outfl[i] = toupper(outfl[i]);
    else break;
  }
  outfl[i] = '\0';

  time(&aclock);
  nowtime = localtime(&aclock);
  fprintf(fpt,"%s",asctime(nowtime));
  fprintf(fpt,"Results from program NJTREE v%3.1f\r\n",VNUM);
  fprintf(fpt,"A program for creating trees from distance matrix files ");
  fprintf(fpt,"via the\r\nNeighbor-Joining method (Saitou & Nei, Mol. ");
  fprintf(fpt,"Biol. Evol. 4:406).\r\n");
  fprintf(fpt,"Copyright (c) %d by Joyce C. Miller.  ",YEAR);
  fprintf(fpt,"All Rights Reserved.\n");
  fprintf(fpt,"This file, %s, contains analysis of file ",outfl);
  fprintf(fpt,"%s:\n\n",infl);
  return(fpt);
}                                            /* END OF FUNCTION PRINTHEADER */
/*****************************************************************************
**                                                                          **
**                         FUNCTION REDUCEMATRIX                            **
**                                                                          **
** This  function is called  from  FUNCTION CLUSTER, and  its purpose is to **
** average and  reduce the distance matrix  when two  OTUs are to be joined **
** into  a single cluster.  It  does this by receiving the  X-Y coordinates **
** (within the  distance matrix) of the distance value at which the present **
** cycle of joining occurred (e.g., OTU 3  by 5).  It then counts down from **
** the  Nth OTU  to zero.  Basically, the OTU X x OTU Y distance is erased. **
** For each  non-X and  non-Y  OTU, the  OTU N x OTU X distance is averaged **
** with the OTU N x OTU Y distance.                                         **
** For example:  There  are  five OTUs, and #3 and #5 are being joined this **
** cycle:                                                                   **
**                                                                          **
** D1x2 D1x3 D1x4 D1x5               D1x2  (D1x3+D1x5)/2       D1x4         **
**      D2x3 D2x4 D2x5      --->           (D2x3+D2x5)/2       D2x4         **
**           D3x4 D3x5                                     (D3x4+D4x5)/2    **
**                D4x5                                                      **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
void reducematrix(coord xy)
{
  int i,j;                                        /* loop control variables */
  coord xy1;                 /* coordinates of the OTU X vs. OTU k distance */
  coord xy2;                 /* coordinates of the OTU Y vs. OTU k distance */
  int L1;                    /* linear slot of the OTU X vs. OTU k distance */
  int L2;                    /* linear slot of the OTU Y vs. OTU k distance */

  for (i=(cotu-1); i>=0; --i) {                 /* go from last to first OTU */
    if (i < xy.x) {          /* arrange the "OTU X vs. OTU k" pair in order */
      xy1.x = i;
      xy1.y = xy.x;
    }
    else if (i > xy.x) {
      xy1.x = xy.x;
      xy1.y = i;
    }
    else if (i == xy.x) {            /* delete the OTU X vs. OTU Y distance */
      L1 = (cotu*(xy.x+1))-(xy.x*(xy.x+1)/2+(xy.x+1))-(cotu-(xy.y+1))-1;
      for (j=L1; j<cdist; ++j) dist[j] = dist[j+1];
      dist[cdist--] = -5;
      continue;
    }
    if (i < xy.y) {          /* arrange the "OTU Y vs. OTU k" pair in order */
      xy2.x = i;
      xy2.y = xy.y;
    }
    else if (i > xy.y) {
      xy2.x = xy.y;
      xy2.y = i;
    }
    else if (i == xy.y) continue;

    if ((i != xy.x) && (i != xy.y)) {
      L1 = (cotu*(xy1.x+1))-(xy1.x*(xy1.x+1)/2+(xy1.x+1))-(cotu-(xy1.y+1))-1;
      L2 = (cotu*(xy2.x+1))-(xy2.x*(xy2.x+1)/2+(xy2.x+1))-(cotu-(xy2.y+1))-1;
      dist[L1] = (dist[L1] + dist[L2]) / 2;    /* average the two distances */
      for (j=L2; j<cdist; ++j) dist[j] = dist[j+1];   /* move the others up */
      dist[cdist--] = -5;                           /* initialize last slot */
    }
  }
}                                           /* END OF FUNCTION REDUCEMATRIX */
/*****************************************************************************
**                                                                          **
**                         FUNCTION REDUCEOTULIST                           **
**                                                                          **
** This  function is  called  from  FUNCTION  CLUSTER, and  it  reduces the **
** OTUlist by one after  each cycle of clustering.  It receives the current **
** node  number, and the location in the OTUlist of  OTUs X and Y  (the two **
** OTUs that were just joined).  The name of OTU X is replaced by "Node #", **
** where "#" is  the node number (e.g. "Node 1", "Node 2", etc.).  OTU Y is **
** erased, and all remaining OTU names are moved up one.                    **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
void reduceotulist(int node, int x, int y)
{
  int i;                                           /* loop control variable */
  char *n;                                          /* pointer to node name */
  char nodename[20];                      /* strings for building node name */
  char nodestr[20];

  nodestr[0] = '\0';                                  /* initialize strings */
  nodename[0] = '\0';
  strcat(nodename,"Node ");                                   /* add in 'N' */
  n = inttoalph(node,nodestr);           /* convert node number to a string */
  strcpy(nodestr,n);
  strcat(nodename,nodestr);                                 /* add onto 'N' */
  strcpy(otulist[x],nodename);                        /* copy into OTU list */
  for (i=y; i<cotu; ++i) {  /* delete old OTU y and move up remaining OTUs  */
    strcpy(otulist[i],otulist[i+1]);         /* move up remaining OTU names */
    branchlen[i] = branchlen[i+1];      /* move up remaining branch lengths */
  }
  otulist[cotu-1][0] = '\0';                     /* initialize last OTU name */
  branchlen[cotu--] = 0;                    /* initialize last branch length */
}                                         /* END OF FUNCTION REDUCE OTULIST */
/*****************************************************************************
**                                                                          **
**                            FUNCTION DRAWTREE                             **
**                                                                          **
** This  function is  called from FUNCTION MAIN.  It draws a diagram of the **
** tree from the information in nodetable.  It does this by making an array **
** of 2*cotu-1 text lines (one for each OTU and a blank line in between). A **
** call to  FUNCTION ORDEROTUS puts  the  OTUs into the order in which they **
** will appear on the tree.  Then, the  branches are drawn.  For each node, **
** each  line  of the tree  diagram  is examined.  If a line corresponds to **
** either OTU X or OTU Y, then  the appropriate number of dashes is printed **
** to form the branch, and the "node switch" is turned "on".  From then on, **
** text lines in  the tree will be  padded out to that distance with blanks **
** and a "|", until the other OTU is found, when a branch of dashes will be **
** printed, and the node switch turned off.  Then the tree is printed.      **
**                                                                          **
** Functions called:                                                        **
** orderotus     --  puts the OTUs into the order they will appear in tree. **
** treelength    --  gets the width (in genetic distance) of the tree.      **
** roundnum      --  rounds off a number, e.g. 3.5 -> 4, 3.49 -> 3.         **
** printtree     --  prints the completed tree diagram to screen and file.  **
*****************************************************************************/
void drawtree()
{
  register int j,k;                               /* loop control variables */
  int i;                                           /* loop control variable */
  float totlen = 0;                         /* total length of tree diagram */
  float trlen  = 0;                      /* length of tree diagram thus far */
  float conv   = 0;                      /* char/distance conversion factor */
  int otulen   = 0;            /* length (in chars) of the longest OTU name */
  int swtch    = 0;                              /* "node switch" on or off */
  int s,fX,fY;                          /* start & finish of branch drawing */

  printf("\r\nConstructing tree diagram.....");          /* message to user */
  printf("\r\n\nNote! If many of the distances are very small relative to ");
  printf("other distances,\r\n      then many of the branches on the tree ");
  printf("will appear to be zero, and the\r\n      tree will be difficult ");
  printf("to read.\r\n");
  fprintf(fpt,"\n\nNote! If many of the distances are very small relative ");
  fprintf(fpt,"to other distances,\n      then many of the branches on the ");
  fprintf(fpt,"tree will appear to be zero, and the\n      tree will be ");
  fprintf(fpt,"difficult to read.\n");

  nlines = 2*notu-1;                             /* number of lines in tree */

  /* allocate memory for the tree diagram */
  if ((tree=(branch *)malloc((nlines)*sizeof(branch))) == NULL) rserror400();
  for (i=0; i<nlines; ++i) {                     /* initialize tree diagram */
    tree[i].name[0] = '\0';
    tree[i].brlen   =   0;
    tree[i].br[0]   = '\0';
  }

  otulen = orderotus(nlines,otulen);   /* get OTUs in correct order on tree */
  totlen = treelength(nlines,totlen); /* get width of tree in genetic dist. */
  conv   = totlen/(80-otulen-5);    /* character/distance conversion factor */

  for (i=nnodes-1; i>=0; --i) {                     /* go through each node */
    if (nodetable[i].distX == 0) fX = 0; /* convert branch lengths to chars */
    else fX = roundnum(nodetable[i].distX/conv,0);
    if (nodetable[i].distY == 0) fY = 0;
    else fY = roundnum(nodetable[i].distY/conv,0);

    for (j=0; j<nlines; ++j) {                /* find this node on the tree */
      if (strcmp(nodetable[i].nodename,tree[j].name) == 0)
      s = strlen(tree[j].br);     /* get starting point of current branches */
    }

    swtch = 0;                                      /* turn off node switch */
    for (j=0; j<nlines; ++j) {               /* go through branches on tree */
      if (strcmp(tree[j].name,nodetable[i].otuX)==0)  {       /* it's OTU X */
        for (k=strlen(tree[j].br); k<s; ++k) strcat(tree[j].br," ");
        if (tree[j].br[strlen(tree[j].br)-1] == '|') fX -= 1;
        for (k=0; k<fX; ++k) strcat(tree[j].br,"-");          /* add branch */
        if (swtch == 1) swtch = 0;         /* if we're now outside the node */
        else swtch = 1;                       /* if we're now inside a node */
      }
      else if (strcmp(tree[j].name,nodetable[i].otuY)==0) {   /* it's OTU Y */
        for (k=strlen(tree[j].br); k<s; ++k) strcat(tree[j].br," ");
        if (tree[j].br[strlen(tree[j].br)-1] == '|') fY -= 1;
        for (k=0; k<fY; ++k) strcat(tree[j].br,"-");          /* add branch */
        if (swtch == 1) swtch = 0;         /* if we're now outside the node */
        else swtch = 1;                       /* if we're now inside a node */
      }
      else {                                /* if current OTU is not X or Y */
        if (swtch == 1) {                     /* if currently inside a node */
          for (k=strlen(tree[j].br); k<s; ++k) strcat(tree[j].br," ");
          if (tree[j].br[strlen(tree[j].br)-1] != '|')
            strcat(tree[j].br,"|");                         /* add crossbar */
        }
      }
    }                                                    /* end of "j" loop */
  }                                                      /* end of "i" loop */
  free(nodetable);
  printtree(nlines,otulen,conv,totlen);      /* print tree to screen & file */
}                                               /* END OF FUNCTION DRAWTREE */
/*****************************************************************************
**                                                                          **
**                            FUNCTION ORDEROTUS                            **
**                                                                          **
** This  function is  called from FUNCTION DRAWTREE.  It puts the OTUs into **
** the  order  in  which  they  will  occur on  the tree.  This  is done by **
** allocating memory  for a list  of 2*cotu-1  text lines (one for each OTU **
** and a blank line between each).  Then, the nodetable is read  backwards. **
** The OTUs that make up the last node  are  inserted  into the tree (these **
** "OTUs" are  sometimes  previous  nodes).  Then  the next to last node is **
** read, and that node's place on the tree is replaced by the two OTUs that **
** make up that node.  This is done until the beginning of the nodetable is **
** reached; each node being replaced in  the list by the two OTUs that make **
** up that cluster.  After the  ordering is completed, the program looks at **
** all of the OTU names, and  finds the length of the longest one.  This is **
** used later to decide how wide the tree can be.                           **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
int orderotus(int nlines, int otulen)
{
  int i,j = 0;                                    /* loop control variables */
  int numbranches = 0;                       /* number of branches done yet */

  cotu = notu;                             /* get number of taxa to be done */

  strcpy(tree[0].name,nodetable[nnodes-1].nodename);  /* copy in first node */

  for (i=nnodes-1; i>=0; --i) {        /* go through each node in nodetable */
    for (j=numbranches; j>=0; --j) {                 /* look at each branch */
      if (strcmp(tree[j].name,nodetable[i].nodename) == 0) { /* locate node */
        strcpy(tree[j+2].name,nodetable[i].otuY);              /* add otu Y */
        strcpy(tree[j+1].name,nodetable[i].nodename);   /* add in node name */
        strcpy(tree[j].name,nodetable[i].otuX);                /* add otu X */
        numbranches+=2;               /* increase number of branches by two */
        j = 0;                                           /* get out of loop */
      }
      else strcpy(tree[j+2].name,tree[j].name);         /* copy in otu name */
    }                                                    /* end of "j" loop */
  }                                                      /* end of "i" loop */

  for (i=0; i<nlines; ++i)               /* find length of longest OTU name */
    if (strncmp(tree[i].name,"Node ",5) != 0)
      if (strlen(tree[i].name) > otulen) otulen = strlen(tree[i].name);

  return(otulen);                      /* return length of longest OTU name */
}                                              /* END OF FUNCTION ORDEROTUS */
/*****************************************************************************
**                                                                          **
**                           FUNCTION TREELENGTH                            **
**                                                                          **
** This  function is  called  from  FUNCTION DRAWTREE.  It goes through the **
** NODETABLE, and determines the total length  (in genetic distance) of the **
** tree  diagram  that is to be built.  It  does  this by reading the first **
** node, and  computing the distance  contained in OTU X (its branch length **
** plus any  previous  branch lengths) and OTU Y.  It then goes through the **
** TREE diagram, finds that node, and puts the distance into its brlen.     **
** For example:                                                             **
** NODETABLE:                                TREE:                          **
** Node 1   OTU 1  (0.25) & OTU 2 (0.17)     OTU 1                          **
** Node 2   Node 1 (0.27) & OTU 3 (0.23)     Node 1    0.25                 **
**                                           OTU 2                          **
**                                           Node 2    0.27 + 0.25          **
**                                           OTU 3                          **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
float treelength(int nlines, float totlen)
{
  int i,j;                                        /* loop control variables */
  float dX,dY;       /* length of current & previous branches in OTUs X & Y */

  for (i=0; i<nnodes; ++i) {                        /* go through each node */
    dX = nodetable[i].distX;                     /* get OTU X branch length */
    dY = nodetable[i].distY;                     /* get OTU Y branch length */
    for (j=0; j<nlines; ++j) {              /* add in any previous branches */
      if (strcmp(tree[j].name,nodetable[i].otuX) == 0) dX += tree[j].brlen;
      if (strcmp(tree[j].name,nodetable[i].otuY) == 0) dY += tree[j].brlen;
    }

    for (j=0; j<nlines; ++j)                   /* find current node in tree */
      if (strcmp(tree[j].name,nodetable[i].nodename) == 0)
        if (dX > dY) tree[j].brlen = dX;            /* get larger of 2 OTUs */
        else tree[j].brlen = dY;
  }

  for (i=0; i<nnodes; ++i) {
    for (j=0; j<nlines; ++j) {
      if (strncmp(nodetable[i].otuX,"Node ",5) != 0)
        if (strcmp(nodetable[i].otuX,tree[j].name) == 0)
          tree[j].brlen = nodetable[i].distX;
      if (strncmp(nodetable[i].otuY,"Node ",5) != 0)
        if (strcmp(nodetable[i].otuY,tree[j].name) == 0)
          tree[j].brlen = nodetable[i].distY;
    }
  }

  totlen = 0;                                    /* determine width of tree */
  for (i=0; i<nlines; ++i) if (tree[i].brlen > totlen) totlen = tree[i].brlen;

  return(totlen);
}                                             /* END OF FUNCTION TREELENGTH */
/*****************************************************************************
**                                                                          **
**                            FUNCTION PRINTTREE                            **
**                                                                          **
** This function is called from FUNCTION DRAWTREE.  It  prints out the OTUs **
** and their respective branches to form a tree diagram.                    **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
void printtree(int nlines, int otulen, float conv, float totlen)
{
  int i;                                          /* loop control variables */

  printf("\r\n\n");
  fprintf(fpt,"\n\n");
  for (i=0; i<nlines; ++i) {           /* for each text line of the tree... */
    printf("%s ",tree[i].br);                   /* print out text of branch */
    fprintf(fpt,"%s ",tree[i].br);

    if (strncmp(tree[i].name,"Node ",5) != 0) {       /* if it's not a node */
      printf("%s\r\n",tree[i].name);                  /* print the OTU name */
      fprintf(fpt,"%s\n",tree[i].name);
    }
    else {
      printf("\r\n");
      fprintf(fpt,"\n");
    }
  }
  printf("\r\n");                         /* carriage return at end of line */
  fprintf(fpt,"\n");

  printscale(otulen,conv);        /* print distance scale at bottom of tree */

  rewind(fpt);                                      /* close up output file */
  fclose(fpt);
}                                              /* END OF FUNCTION PRINTTREE */
/*****************************************************************************
**                                                                          **
**                           FUNCTION PRINTSCALE                            **
**                                                                          **
** This  function  is  called  from  FUNCTION PRINTTREE.  It  prints  out a **
** distance scale beneath the tree diagram.                                 **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
void printscale(int otulen, float conv)
{
  int i,j;                                        /* loop control variables */
  int k;                                          /* loop control variables */
  int linelen = 0;                             /* length (in char) of scale */

  linelen = 80 - otulen - 5;                 /* get maximum length of scale */

  printf("|");                           /* print out bar for zero distance */
  fprintf(fpt,"|");
  j = 1;
  k = 1;
  for (i=1; i<linelen; ++i) {                            /* construct scale */
    if (j == 10) {                         /* print bar every 10 characters */
      printf("|");
      fprintf(fpt,"|");
      ++k;                                          /* count number of bars */
      j = 1;
    }
    else {                                      /* between bars, print line */
      printf("_");
      fprintf(fpt,"_");
      ++j;
    }
  }
  printf("\r\n");                                              /* next line */
  fprintf(fpt,"\n");

  printf("        ");
  fprintf(fpt,"        ");
  for (i=1; i<k; ++i) {                   /* print a distance for every bar */
    printf("%5.3f     ",i*10*conv);
    fprintf(fpt,"%5.3f     ",i*10*conv);
  }
}                                             /* END OF FUNCTION PRINTSCALE */
/****************************************************************************/

