/*****************************************************************************
******************************************************************************
******************************************************************************
*****                                                                    *****
*****                         PROGRAM UPGMA v2.0                         *****
***** A program for building binary trees out of distance matrix files.  *****
*****    Copyright (c) 1990 by Joyce C. Miller.  All Rights Reserved.    *****
*****                                                                    *****
***** This program performs the simple  arithmatic clustering  algorithm *****
***** of  Sneath  and Sokal (1972).  It takes  the name of an ASCII text *****
***** file containing a distance matrix, and the name of an output file. *****
***** The  matrix  file is opened, and a list  of OTU  names is read in, *****
***** followed by the matrix of genetic distances.  The tree is produced *****
***** in the following way.  The lowest distance is found, and those two *****
***** OTUs  are joined into a single  OTU.  The distances from these two *****
***** OTUs to each remaining OTU are then averaged.  Once the matrix has *****
***** been  reduced in this way, the  next cycle of clustering begins by *****
***** searching for  the new lowest distance.  The results of each cycle *****
***** are printed to both the screen and the output file.  The format of *****
***** the distance  matrix  file must be either an upper-right triangle, *****
***** or a lower-left triangle:                                          *****
*****                                                                    *****
*****           OTU2NAME                    OTU2NAME                     *****
*****           OTU3NAME                    OTU3NAME                     *****
*****           OTU4NAME                    OTU4NAME                     *****
*****                                                                    *****
*****           D1x2 D1x3 D1x4              D1x2                         *****
*****                D2x3 D2x4              D1x3 D2x3                    *****
*****                     D3x4              D1x4 D2x4 D3x4               *****
*****                                                                    *****
***** There can be more than 4 OTUs, of course.                          *****
*****                                                                    *****
***** There must be a blank line  between the list of OTU  names and the *****
***** start  of the distance matrix, and  NO blank lines before the list *****
***** of OTU names.                                                      *****
*****                                                                    *****
***** Reference:                                                         *****
*****                                                                    *****
***** Sneath, P.H.A., and R.R. Sokal  (1973)  Numerical  Taxonomy.  W.H. *****
***** Freeman and Company, San Fransisco.  Pgs. 230-234.                 *****
*****                                                                    *****
*****                                                                    *****
***** List of C functions used in this program:                          *****
*****                                                                    *****
*****     FUNCTION         LIBRARY          FUNCTION         LIBRARY     *****
*****     asctime          time.h           fclose           stdio.h     *****
*****     fgets            stdio.h          fopen            stdio.h     *****
*****     fprintf          stdio.h          free             stdlib.h    *****
*****     fscanf           stdio.h          localtime        time.h      *****
*****     malloc           stdlib.h         printf           stdio.h     *****
*****     rewind           stdio.h          sscanf           stdio.h     *****
*****     strcat           string.h         strcpy           string.h    *****
*****     strlen           string.h         strncmp          string.h    *****
*****     strncpy          string.h         time             time.h      *****
*****     toupper          ctype.h                                       *****
*****                                                                    *****
******************************************************************************
******************************************************************************
*****************************************************************************/

/*****************************************************************************
**                             INCLUDE FILES                                **
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <rstypes.h>        /* file containing type definitions & constants */
#include <rserrors.h>                     /* file containing error messages */
#include <rsfuncs.h>        /* file containing some commonly-used functions */
/*****************************************************************************
**                            TYPE DEFINITIONS                              **
*****************************************************************************/
typedef struct {                       /* structure to hold X,Y coordinates */
  int x;                                      /* location of OTU X in array */
  int y;                                      /* location of OTU Y in array */
} coord;                                                  /* size = 4 bytes */
typedef struct {           /* structure to hold characteristics of one node */
  otuname nodename;                                 /* name of node in tree */
  float   dist;                                         /* distance of node */
  otuname otuX;                                            /* name of OTU X */
  otuname otuY;                                            /* name of OTU Y */
} node;                                                  /* size = 68 bytes */
typedef char brnch[75];                       /* "branch" -- a line of text */
typedef struct {        /* structure to hold one branch (text line) of tree */
  otuname name;                                   /* name of current branch */
  brnch br;                                       /* text content of branch */
} branch;                                                /* size = 96 bytes */
/*****************************************************************************
**                            SYMBOLIC CONSTANTS                            **
*****************************************************************************/
#define VNUM 2.0                          /* version number of this program */
#define YEAR 1990                /* calendar year this version was released */
/*****************************************************************************
**                          FUNCTION PROTOTYPES                             **
*****************************************************************************/
/****************** EXTRACT DATA FROM DISTANCE MATRIX FILE ******************/
void getotus(char infl[]);                     /* gets otus from input file */
void getdist(FILE *fpd);                  /* gets distances from input file */
/******************* FLIP MATRIX TO UPPER-RIGHT TRIANGLE ********************/
void flipmatrix();                /* flips matrix from 'D' type to 'U' type */
/***************************** NEIGHBOR-JOINING *****************************/
void cluster(char infl[], char outfl[]);   /* operates clustering algorithm */
void printheader(char infl[], char outfl[]);      /* prints column headings */
void reducematrix(coord xy);             /* reduces matrix after each cycle */
void reduceotulist(int nodenumber,int x,int y);          /* reduces otulist */
/******************************** DRAW TREE *********************************/
void drawtree();                             /* draws a diagram of the tree */
int  orderotus(int nlines, int otulen);     /* order otus and nodes in tree */
void printtree(int nlines, int otulen);     /* prints tree to screen & file */
void fliptree(int nlines);          /* turns tree around for easier reading */
void printscale(int otulen);          /* prints a distance scale under tree */
/*****************************************************************************
**                           GLOBAL VARIABLES                               **
*****************************************************************************/
otuname otulist[MAXOTU];                 /* list of the OTUs to be compared */
float  *dist;               /* list of the genetic distances to be analyzed */
node   *nodetable;                             /* list of nodes in the tree */
branch *tree;                                       /* text diagram of tree */
FILE   *fpt;                              /* pointer for output (tree) file */
int     notu   = 0;                     /* total number of OTUs in data set */
int     cotu   = 0;                     /* current number of OTUs remaining */
int     ndist  = 0;                /* total number of distances in data set */
int     cdist  = 0;                /* total number of distances in data set */
int     nnodes = 0;                                 /* number of nodes made */
/*****************************************************************************
******************************************************************************
******************************************************************************
******                                                                  ******
******                          MAIN PROGRAM                            ******
******                                                                  ******
******************************************************************************
******************************************************************************
******************************************************************************
***** This  section of the program  prints  out the  program name  and a *****
***** copyright  message, then  error-checks the command line.  The name *****
***** of the matrix file and the output  file are  read in, and the type *****
***** up  matrix  ("U"pper-right, or "D", lower-left).  The  the list of *****
***** OTUs and distances are read in, and the UPGMA tree is constructed. *****
*****                                                                    *****
***** Functions called:                                                  *****
***** getotus        -- gets the list of OTUs.                           *****
***** flipmatrix     -- flips matrix from lower-left to upper-right      *****
*****                   triangle.                                        *****
***** cluster        -- contructs the UPGMA tree.                        *****
*****************************************************************************/
void main(int argc, char *argv[])
{
  printf("\r\nProgram UPGMA v%3.1f\r\n",VNUM);           /* message to user */
  printf("A program for creating trees from distance matrix files via the");
  printf("\r\nUnweighted Pair-Group Method of Sneath and Sokal (1973).\r\n");
  printf("Copyright (c) %d by Joyce C. Miller.  ",YEAR);
  printf("All Rights Reserved.\r\n");

  if (argc < 4) {              /* error if not enough items on command line */
    printf("\a\r\nError 101:  The proper format of the command line should ");
    printf("be:\r\n\n            UPGMA MATRIXFILE OUTPUTFILE MATRIXTYPE\r\n");
    printf("\n            All of these items are necessary.  Refer to the ");
    printf("documentation\r\n            if you require more information.\r\n");
    exit(0);
  }

  getotus(argv[1]);                   /* get the list of OTUs and distances */

  if (toupper(argv[3][0]) == 'D')
    flipmatrix();                               /* flip matrix if necessary */

  cluster(argv[1],argv[2]);                                /* make the tree */
  free(dist);                 /* release memory used for array of distances */

  drawtree();                                       /* draw diagram of tree */
  free(nodetable);                    /* release memory used to store nodes */
}                                                    /* END OF MAIN PROGRAM */
/*****************************************************************************
**                                                                          **
**                            FUNCTION GETOTUS                              **
**                                                                          **
** This function is called from FUNCTION MAIN, and reads in the list of OTU **
** names.  First, the  slots in the  list are initialized.  Then the matrix **
** file (RDFL) is opened, and  the OTUs  are read in, line by line, until a **
** blank  line  is reached.  Then  control is  passed  to FUNCTION GETDIST, **
** which reads in the distance values from the matrix file.                 **
**                                                                          **
** Functions called:                                                        **
** opntrdfl       --  opens a text file for reading.                        **
** rserror311     --  error message is error reading datafile.              **
** getdist        --  reads values from distance matrix file into memory.   **
*****************************************************************************/
void getotus(char infl[])
{
  register int i;                                  /* loop control variable */
  char  tempstr[MAXSTRLEN];                             /* temporary string */
  char  tn[MAXSTRLEN];                       /* pointer to temporary string */
  char  done = 'N';                                    /* are we there yet? */
  FILE *fpd;                            /* pointer for distance matrix file */

  printf("\r\nReading list of OTUs.....\r\n");           /* message to user */

  for (i=0; i<MAXOTU; ++i) otulist[i][0] = '\0';      /* initialize OTULIST */
  fpd = opntrdfl(fpd,infl);                    /* open distance matrix file */
  while (done == 'N') {    /* read in OTU names until blank line is reached */
    if ((fgets(tempstr,MAXSTRLEN,fpd)) == NULL) rserror311(infl);
    sscanf(tempstr,"%s",tn);                  /* extract OTU name from line */
    if (strlen(tn) < 1) done = 'Y';     /* if it's a blank line, we're done */
    /* if not, add it to the OTU list */
    else if(strlen(tn) <= 20) strcpy(otulist[notu++],tn);  /* copy OTU name */
    else {                   /* cut name down to 20 characters if necessary */
      strncpy(otulist[notu],tn,20);
      otulist[notu++][21] = '\0';           /* increase number of OTUs by 1 */
    }
  }
  cotu  = notu;                                 /* get total number of taxa */
  ndist = notu*(notu-1)/2;                 /* get total number of distances */
  cdist = ndist;                  /* get number of distances yet to be done */
  getdist(fpd);                               /* go on to read in distances */
}                                                /* END OF FUNCTION GETOTUS */
/*****************************************************************************
**                                                                          **
**                            FUNCTION GETDIST                              **
**                                                                          **
** This function is called from FUNCTION GETOTUS.  It reads distance values **
** from the distance  matrix  file (RDFL)  and into the  memory array DIST. **
** When  done, it closes  the distance matrix file, and passes control back **
** to  FUNCTION  GETOTUS, which immediately passes control back to FUNCTION **
** MAIN.                                                                    **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
void getdist(FILE *fpd)
{
  register int i;                                  /* loop control variable */
  int tn;                                       /* integer needed by fscanf */

  printf("Reading list of distances.....\r\n");          /* message to user */

  if ((dist=(float *)malloc((ndist+1)*sizeof(float))) == NULL) rserror400();
  for (i=0; i<ndist; ++i) dist[i] = -5;        /* initialize distance array */
  for (i=0; i<ndist; ++i) tn = fscanf(fpd,"%f",&dist[i]); /* read distances */

  rewind(fpd);                             /* close up distance matrix file */
  fclose(fpd);
}                                                /* END OF FUNCTION GETDIST */
/*****************************************************************************
**                                                                          **
**                           FUNCTION FLIPMATRIX                            **
**                                                                          **
** This function is called  from  FUNCTION MAIN.  The loops in this program **
** operate on the  assumption that the distance matrix is in the form of an **
** upper-right triangle.  If it is a  lower-left triangle, this function is **
** called to invert the matrix.  It accomplishes this  by creating a second **
** distance  array  called 'U'.  It  goes  to the first  slot  of 'U', then **
** locates the  corresponding  slot in the distance  array ('DIST'), copies **
** the distance into 'U', then goes to  the second slot in 'U', etc.  After **
** all of the  distances  have  been  copied  out  of 'dist' and  into  the **
** appropriate  slots  in  'U', 'U' is  copied back into 'dist', and 'U' is **
** erased.  Control then passes back to MAIN.                               **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
void flipmatrix()
{
  register int k,m;                /* loop control variables for 'D' matrix */
  int  d,u;     /* linear array counter for the 'D'own and the 'U' matrices */
  int  i,j;                        /* loop control variables for 'U' matrix */
  float *U;                            /* temporary array for 'U' distances */

  /* allocate memory for 'U'p distance array */
  if ((U=(float *)malloc((ndist+1)*sizeof(float))) == NULL) rserror400();
  for (i=0; i<ndist; ++i) U[i] = -5;           /* initialize distance array */

  u = 0;     /* initialize 'U' distance array counter */
  for (i=0; i<notu-1; ++i)       /* begin to fill the 'U' distance array... */
    for (j=i+1; j<notu; ++j) {
      d = 0;                                      /* initialize 'D' counter */
      for (k=1; k<notu; ++k)           /* go through the 'D' distance array */
	for (m=0; m<k; ++m)       /* look for match to current 'U' distance */
	  if ((k!=j) || (m!=i)) d++;       /* if not a match, keep counting */
	  else {                                 /* if found, stop counting */
	    k = notu;                     /* get out of two innermost loops */
	    m = k;
	    break;
	  }
      U[u++] = dist[d];          /* copy 'D' distance into current 'U' slot */
    }

  for (i=0; i<ndist; ++i) dist[i] = U[i];     /* copy U array to dist array */
  free(U);                                            /* get rid of U array */
}                                             /* END OF FUNCTION FLIPMATRIX */
/*****************************************************************************
**                                                                          **
**                           FUNCTION CLUSTER                               **
**                                                                          **
** This  function is  called  from  FUNCTION MAIN.  It is the function that **
** actually coordinates the tree-making process.  It  prepares  the  output **
** file, and  then begins joining pairs of OTUs at nodes until there are no **
** more  OTUs to be  joined.  To  join  two  OTUs, the  distance  array  is **
** scanned, and the lowest  value is located.  Its  position  in the linear **
** array is converted to X-Y coordinates, so that the names of the two OTUs **
** involved can  be  determined.  The  results  are then  put into the node **
** table, which holds the node number, the distance, and the name of OTUs X **
** and Y.  Then FUNCTION REDUCEMATRIX is called, which reduces the distance **
** matrix by (notu-1) distances.  Then FUNCTION REDUCEOTULIST is  called to **
** reduce the otulist by 1 OTU (the node number becomes the other OTU).     **
**                                                                          **
** Function called:                                                         **
** reducematrix  --  reduces the distance matrix by (N-1) distances.        **
** reduceotulist --  reduces the otulist by 1 OTU.                          **
*****************************************************************************/
void cluster(char infl[], char outfl[])
{
  register int j,k;                               /* loop control variables */
  int   i;                                         /* loop control variable */
  float lowdist;               /* lowest genetic distance in distance array */
  int   lowslot;                             /* location of lowest distance */
  char  nodestr[20];                      /* the node number in string form */
  char *n;                                            /* pointer to nodestr */
  coord xy;                             /* x-y coordinates of that distance */
  int   z;                                /* slot number of lowest distance */

  printf("Clustering.....\r\n");

  fpt = opntwtfl(fpt,outfl);                        /* open the output file */
  printheader(infl,outfl);                       /* print header to outfile */

  /* allocate memory for node table */
  if ((nodetable=(node *)malloc((notu)*sizeof(node))) == NULL) rserror400();
  for (i=0; i<notu; ++i) {                         /* initialize node table */
    nodetable[i].nodename[0] = '\0';
    nodetable[i].dist        =   0;
    nodetable[i].otuX[0]     = '\0';
    nodetable[i].otuY[0]     = '\0';
  }

  /* print column headings to screen and output file */
  printf("\r\nNODE   OCCURS AT  and joins   OTU X      and       OTU Y\r\n");
  fprintf(fpt,"NODE   OCCURS AT  and joins   OTU X      and       OTU Y\n");

  for (i=1; cotu>1; ++i) {     /* do as long as there are OTUs to be joined */
    lowdist = dist[0];            /* seed value for finding lowest distance */
    lowslot = 0;                                  /* location of seed value */
    cdist   = cotu*(cotu-1)/2;       /* find number of distances left to do */
    for (j=0; j<cdist; ++j) {               /* locate lowest distance value */
      if (dist[j] < lowdist) {       /* see if we've got a new low distance */
        lowdist = dist[j];                              /* get low distance */
        lowslot = j;                          /* get its slot in dist array */
      }
    }                               /* smallest distance has now been found */

    z = 0;                                   /* set low slot number to zero */
    for (j=0; j<cotu-1; ++j) {           /* for each comparison of OTU X... */
      for (k=j+1; k<cotu; ++k) {                         /* ... to OTU Y... */
        if (z != lowslot) z++;               /* count up to the lowest slot */
        else {         /* if this is the slot number of the lowest distance */
          xy.x = j;                  /* x coordinate is equal to outer loop */
          xy.y = k;                  /* y coordinate is equal to inner loop */
	  j    = cotu;    /* set LCV to max value to stop execution of loop */
	  k    = cotu;    /* set LCV to max value to stop execution of loop */
          break;                                                 /* get out */
        }                                                    /* end of else */
      }                                                  /* end of "k" loop */
    }                                                    /* end of "j" loop */

    /* put results of this cycle into the node table */
    strcat(nodetable[i-1].nodename,"Node ");  /* make 1st part of node name */
    n = inttoalph(i,nodestr);            /* convert node number to a string */
    strcpy(nodestr,n);
    strcat(nodetable[i-1].nodename,nodestr);   /* combine "N" & node number */
    nodetable[i-1].dist = dist[lowslot];
    strcpy(nodetable[i-1].otuX,otulist[xy.x]);
    strcpy(nodetable[i-1].otuY,otulist[xy.y]);
    nnodes++;                         /* increment the number of nodes done */
    reducematrix(xy);                          /* reduce the distance array */
    reduceotulist(nnodes,xy.x,xy.y);                  /* reduce the otulist */

    /* print results to screen */
    printf("%3.0d     ",i);                                  /* node number */
    printf("%8.6f              ",nodetable[i-1].dist);          /* distance */
    printf("%s",nodetable[i-1].otuX);  /* print out name of OTU X to screen */
    for (j=strlen(nodetable[i-1].otuX); j<KEYLEN; ++j) printf(" ");
    printf("%s",nodetable[i-1].otuY);  /* print out name of OTU Y to screen */
    for (j=strlen(nodetable[i-1].otuY); j<KEYLEN; ++j) printf(" ");
    printf("\r\n");

    fprintf(fpt,"%3.0d     ",i);                             /* node number */
    fprintf(fpt,"%8.6f              ",nodetable[i-1].dist);     /* distance */
    fprintf(fpt,"%s",nodetable[i-1].otuX);              /* print OTU X name */
    for (j=strlen(nodetable[i-1].otuX); j<KEYLEN; ++j) fprintf(fpt," ");
    fprintf(fpt,"%s",nodetable[i-1].otuY);              /* print OTU Y name */
    for (j=strlen(nodetable[i-1].otuY); j<KEYLEN; ++j) fprintf(fpt," ");
    fprintf(fpt,"\n");
  }                                              /* all done with all nodes */
  printf("\r\n");
  fprintf(fpt,"\n\n");
}                                                /* END OF FUNCTION CLUSTER */
/*****************************************************************************
**                                                                          **
**                         FUNCTION REDUCEMATRIX                            **
**                                                                          **
** This  function is called  from  FUNCTION CLUSTER, and its  purpose is to **
** average and  reduce the distance matrix  when two  OTUs are to be joined **
** into  a single cluster.  It  does this by receiving the  X-Y coordinates **
** (within the  distance matrix) of the distance value at which the present **
** cycle of joining occurred (e.g., OTU 3  by 5).  It then counts down from **
** the  Nth OTU  to zero.  Basically, the OTU X x OTU Y distance is erased. **
** For each  non-X and  non-Y  OTU, the  OTU N x OTU X distance is averaged **
** with the OTU N x OTU Y distance.                                         **
** For example:  There  are  five OTUs, and #3 and #5 are being joined this **
** cycle:                                                                   **
**                                                                          **
** D1x2 D1x3 D1x4 D1x5               D1x2  (D1x3+D1x5)/2       D1x4         **
**      D2x3 D2x4 D2x5      --->           (D2x3+D2x5)/2       D2x4         **
**           D3x4 D3x5                                     (D3x4+D4x5)/2    **
**                D4x5                                                      **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
void reducematrix(coord xy)
{
  int   i,j;                                      /* loop control variables */
  coord xy1;                           /* X-Y coordinates for OTU ? x OTU X */
  coord xy2;                           /* X-Y coordinates for OTU ? x OTU Y */
  int   L1;                              /* array location of OTU ? x OTU Y */
  int   L2;                              /* array location of OTU ? x OTU X */

  /** BEGIN LOCATING THE DISTANCES TO BE AVERAGED:  e.g., IF OTU 1 & 2 ARE **/
  /** BEING JOINED, THEN GET  LOCATIONS OF 1xN  AND 2xN, AND AVERAGE THEM, **/
  /************************ THEN GO ON TO OTU N-1, ETC. *********************/
  for (i=(cotu-1); i>=0; --i) {       /* count down through all of the OTUs */
    if (i < xy.x) {           /* locate the distance for this OTU vs. OTU X */
      xy1.x = i;
      xy1.y = xy.x;
    }
    else if (i > xy.x) {
      xy1.x = xy.x;
      xy1.y = i;
    }
    else if (i == xy.x) {     /* if this is one of the OTUs being joined... */
      L1 = (cotu*(xy.x+1))-(xy.x*(xy.x+1)/2+(xy.x+1))-(cotu-(xy.y+1))-1;
      for (j=L1; j<cdist; ++j) dist[j] = dist[j+1];  /* erase that distance */
      dist[cdist--] = -5;                     /* and move all others up one */
      continue;                       /* find the other: this OTU vs. OTU Y */
    }
    if (i < xy.y) {           /* locate the distance for this OTU vs. OTU Y */
      xy2.x = i;
      xy2.y = xy.y;
    }
    else if (i > xy.y) {
      xy2.x = xy.y;
      xy2.y = i;
    }
    else if (i == xy.y) continue;

    /* we now have the X-Y coordinates for OTU N x OTU X, and OTU N x OTU Y */
    if ((i != xy.x) && (i != xy.y)) {
      L1 = (cotu*(xy1.x+1))-(xy1.x*(xy1.x+1)/2+(xy1.x+1))-(cotu-(xy1.y+1))-1;
      L2 = (cotu*(xy2.x+1))-(xy2.x*(xy2.x+1)/2+(xy2.x+1))-(cotu-(xy2.y+1))-1;
      dist[L1] = (dist[L1] + dist[L2]) / 2;    /* average the two distances */
      for (j=L2; j<cdist; ++j) dist[j] = dist[j+1];   /* move dist's up one */
      dist[cdist--] = -5;                         /* set the last one to -5 */
    }                                                 /* done with this OTU */
  }                                            /* done with all of the OTUs */
}                                           /* END OF FUNCTION REDUCEMATRIX */
/*****************************************************************************
**                                                                          **
**                         FUNCTION REDUCEOTULIST                           **
**                                                                          **
** This  function is  called  from  FUNCTION CLUSTER, and  it  reduces  the **
** OTUlist by one after  each cycle of clustering.  It receives the current **
** node  number, and the location in the OTUlist of  OTUs X and Y  (the two **
** OTUs that  were just  joined).  The  name of  OTU X is replaced by "N#", **
** where "#" is  the node number (e.g. "N1", "N2", etc.).  OTU Y is erased, **
** and all remaining OTU names are moved up one.                            **
**                                                                          **
** Functions called: none                                                   **
*****************************************************************************/
void reduceotulist(int nodenumber, int x, int y)
{
  char  nodename[20];        /* "Node name" (N1, N2, etc.) to replace OTU X */
  char  nodestr[20];                      /* the node number in string form */
  char *n;                                            /* pointer to nodestr */
  int   i;                                         /* loop control variable */

  nodestr[0] = '\0';                                          /* initialize */
  nodename[0] = '\0';                                         /* initialize */
  strcat(nodename,"Node ");               /* create first part of node name */
  n = inttoalph(nodenumber,nodestr);     /* convert node number to a string */
  strcpy(nodestr,n);
  strcat(nodename,nodestr);                  /* combine "N" and node number */
  strcpy(otulist[x],nodename);              /* replace OTU X name with "N#" */
  for (i=y; i<cotu; ++i) strcpy(otulist[i],otulist[i+1]);  /* remove OTU Y, */
  otulist[cotu--][0] = '\0';                    /* and move the rest up one */
}                                          /* END OF FUNCTION REDUCEOTULIST */
/*****************************************************************************
**                                                                          **
**                          FUNCTION PRINTHEADER                            **
**                                                                          **
** This function is called  from  FUNCTION CLUSTER.  It  prints a header to **
** the  output  file, including  the  data and time, the program name and a **
** copyright notice, and the names of the input and output files.           **
** Functions called: none                                                   **
*****************************************************************************/
void printheader(char infl[], char outfl[])
{
  int  i;                                          /* loop control variable */
  struct tm *nowtime;     /* time and clock structures to get date and time */
  time_t aclock;

  /* convert input file name to uppercase */
  for (i=0; i<strlen(infl); ++i) {
    if (i<FILELEN) infl[i] = toupper(infl[i]);
    else break;
  }
  infl[i] = '\0';

  /* convert output file name to uppercase */
  for (i=0; i<strlen(outfl); ++i) {
    if (i<FILELEN) outfl[i] = toupper(outfl[i]);
    else break;
  }
  outfl[i] = '\0';

  time(&aclock);                          /* get time from computer's clock */
  nowtime = localtime(&aclock);
  fprintf(fpt,"%s",asctime(nowtime));                /* print time and date */
  fprintf(fpt,"Results from program UPGMA v%3.1f\n",VNUM);
  fprintf(fpt,"A program for creating trees from distance matrix files via ");
  fprintf(fpt,"the\nUnweighted Pair-Group Method of Sneath and Sokal (1973).");
  fprintf(fpt,"\nCopyright (c) %d by Joyce C. Miller.  ",YEAR);
  fprintf(fpt,"All Rights Reserved.\n");
  fprintf(fpt,"This file, %s, contains analysis of file ",outfl);
  fprintf(fpt,"%s:\n\n",infl);
}                                            /* END OF FUNCTION PRINTHEADER */
/*****************************************************************************
**                                                                          **
**                            FUNCTION DRAWTREE                             **
**                                                                          **
** This  function is  called from FUNCTION MAIN.  It draws a diagram of the **
** tree from the information in nodetable.  It does this by making an array **
** of 2*notu-1 text lines (one for each OTU and a blank line in between). A **
** call to  FUNCTION ORDEROTUS puts  the  OTUs into the order in which they **
** will appear on the tree.  Then, the  branches are drawn.  For each node, **
** each  line  of the tree  diagram  is examined.  If a line corresponds to **
** either OTU X or OTU Y, then  the appropriate number of dashes is printed **
** to form the branch, and the "node switch" is turned "on".  From then on, **
** text lines in  the tree will be  padded out to that distance with blanks **
** and a "|", until the other OTU is found, when a branch of dashes will be **
** printed, and the node switch turned off.  Then the tree is printed.      **
**                                                                          **
** Functions called:                                                        **
** orderotus     --  puts the OTUs into the order they will appear in tree. **
** roundnum      --  rounds off a number, e.g. 3.5 -> 4, 3.49 -> 3.         **
** printtree     --  prints the completed tree diagram to screen and file.  **
*****************************************************************************/
void drawtree()
{
  register int j,k;                               /* loop control variables */
  int i;                                           /* loop control variable */
  int nlines = 0;                                /* number of lines in tree */
  otuname otux;                                     /* short name for OTU X */
  otuname otuy;                                     /* short name for OTU Y */
  float conv = 0;                        /* char/distance conversion factor */
  int otulen = 0;              /* length (in chars) of the longest OTU name */
  int swtch = 0;                                 /* "node switch" on or off */
  int s,f;                              /* start & finish of branch drawing */

  printf("Constructing tree diagram.....\r\n");          /* message to user */

  nlines = 2*notu-1;                             /* number of lines in tree */

  /* allocate memory for the tree diagram */
  if ((tree=(branch *)malloc((nlines)*sizeof(branch))) == NULL) rserror400();
  for (i=0; i<nlines; ++i) {                     /* initialize tree diagram */
    tree[i].name[0] = '\0';
    tree[i].br[0]   = '\0';
  }

  otulen = orderotus(nlines,otulen);   /* get OTUs in correct order on tree */
  conv = nodetable[nnodes-1].dist/(80-otulen-5);      /* char/dist conversn */

  for (i=0; i<nnodes; ++i) {                        /* go through each node */
    if (i==0) f = roundnum(nodetable[i].dist/conv,0);     /* get # of chars */
    else f = roundnum(nodetable[i].dist/conv,0);
    swtch = 0;                                      /* turn off node switch */
    for (j=0; j<nlines; ++j) {            /* go through each branch of tree */
      strcpy(otux,nodetable[i].otuX);                     /* get OTU X name */
      strcpy(otuy,nodetable[i].otuY);                     /* get OTU Y name */
      s = strlen(tree[j].br);               /* get length of current branch */
      if ((strcmp(tree[j].name,otux)==0) || (strcmp(tree[j].name,otuy)==0)) {
	  for (k=s; k<f; ++k) strcat(tree[j].br,"-");         /* add branch */
	  if (swtch == 1) swtch = 0;       /* if we're now outside the node */
	  else swtch = 1;                     /* if we're now inside a node */
      }
      else {                                /* if current OTU is not X or Y */
	if (swtch == 1) {                     /* if currently inside a node */
	  for (k=s; k<f-1; ++k) strcat(tree[j].br," ");       /* pad branch */
	  strcat(tree[j].br,"|");                           /* add crossbar */
	}
      }
    }                                                    /* end of "j" loop */
  }                                                      /* end of "i" loop */

  fliptree(nlines);
  printtree(nlines,otulen);                  /* print tree to screen & file */
  free(tree);
}                                               /* END OF FUNCTION DRAWTREE */
/*****************************************************************************
**                                                                          **
**                            FUNCTION ORDEROTUS                            **
**                                                                          **
** This  function is  called from FUNCTION DRAWTREE.  It puts the OTUs into **
** the  order  in  which  they  will  occur on  the tree.  This  is done by **
** allocating memory  for a list of  2*notu-1  text lines (one for each OTU **
** and a blank line between each).  Then, the nodetable  is read backwards. **
** The OTUs that make up the  last node  are  inserted into the tree (these **
** "OTUs" are  sometimes  previous  nodes).  Then the next to  last node is **
** read, and that node's place on the tree is replaced by the two OTUs that **
** make up that node.  This is done until the beginning of the nodetable is **
** reached; each node being replaced in  the list by the two OTUs that make **
** up that cluster.  After the  ordering is completed, the program looks at **
** all of the OTU names, and  finds the length of the longest one.  This is **
** used later to decide how wide the tree can be.                           **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
int orderotus(int nlines, int otulen)
{
  int numbranches = 0;                       /* number of branches done yet */
  int i,j = 0;                                    /* loop control variables */

  cotu = notu;                             /* get number of taxa to be done */

  strcpy(tree[0].name,nodetable[nnodes-1].nodename);  /* copy in first node */

  for (i=nnodes-1; i>=0; --i) {        /* go through each node in nodetable */
    for (j=numbranches; j>=0; --j) {                 /* look at each branch */
      if (strcmp(tree[j].name,nodetable[i].nodename) == 0) { /* locate node */
	strcpy(tree[j+2].name,nodetable[i].otuY);              /* add otu Y */
	strcpy(tree[j+1].name,nodetable[i].nodename);   /* add in node name */
	strcpy(tree[j].name,nodetable[i].otuX);                /* add otu X */
	numbranches+=2;               /* increase number of branches by two */
	j = 0;                                           /* get out of loop */
      }
      else strcpy(tree[j+2].name,tree[j].name);         /* copy in otu name */
    }                                                    /* end of "j" loop */
  }                                                      /* end of "i" loop */

  for (i=0; i<nlines; ++i)               /* find length of longest OTU name */
    if (strncmp(tree[i].name,"Node ",5) != 0)
      if (strlen(tree[i].name) > otulen) otulen = strlen(tree[i].name);

  return(otulen);                      /* return length of longest OTU name */
}                                              /* END OF FUNCTION ORDEROTUS */
/*****************************************************************************
**                                                                          **
**                            FUNCTION FLIPTREE                             **
**                                                                          **
** This  function  is  called  from  FUNCTION DRAWTREE.  It takes the tree, **
** which was drawn with the OTUs on the left, and  the origin on the right, **
** and turns it around.  This  way, the  tree "evolves" from left to right, **
** which is more easily understood by the user.                             **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
void fliptree(int nlines)
{
  int i,j,k;                                      /* loop control variables */
  brnch tmp;             /* temporary string to hold branch during flipping */
  char t1;                                       /* character being flipped */

  k = strlen(tree[0].br);                  /* find length of longest branch */
  for (i=0; i<nlines; ++i) if (strlen(tree[i].br) > k) k = strlen(tree[i].br);

  for (i=0; i<nlines; ++i)               /* pad all branches to same length */
    for (j=strlen(tree[i].br); j<k; ++j) strcat(tree[i].br," ");

  for (i=0; i<nlines; ++i) {                            /* flip each branch */
    k = strlen(tree[i].br) - 1;                     /* get length of branch */
    strcpy(tmp,tree[i].br);                                  /* copy branch */
    for (j=0; j<k; ++j) {                            /* go from ends inward */
      t1 = tmp[j];                                   /* get first character */
      tmp[j] = tmp[k];                    /* copy last char into first char */
      tmp[k--] = t1;                      /* copy first char into last char */
    }
    strcpy(tree[i].br,tmp);           /* copy flipped branch back into tree */
  }
}                                               /* END OF FUNCTION FLIPTREE */
/*****************************************************************************
**                                                                          **
**                            FUNCTION PRINTTREE                            **
**                                                                          **
** This function is called from FUNCTION DRAWTREE.  It  prints out the OTUs **
** and their respective branches to form a tree diagram.                    **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
void printtree(int nlines, int otulen)
{
  int i,j;                                        /* loop control variables */

  for (i=0; i<nlines; ++i) {           /* for each text line of the tree... */
    printf("   %s ",tree[i].br);                /* print out text of branch */
    fprintf(fpt,"   %s ",tree[i].br);

    if (strncmp(tree[i].name,"Node ",5) != 0) {       /* if it's not a node */
      printf("%s",tree[i].name);                      /* print the OTU name */
      fprintf(fpt,"%s",tree[i].name);
    }
    printf("\r\n");
    fprintf(fpt,"\n");
  }
  printf("\r\n");                         /* carriage return at end of line */
  fprintf(fpt,"\n");

  printscale(otulen);             /* print distance scale at bottom of tree */

  rewind(fpt);                                      /* close up output file */
  fclose(fpt);
}                                              /* END OF FUNCTION PRINTTREE */
/*****************************************************************************
**                                                                          **
**                           FUNCTION PRINTSCALE                            **
**                                                                          **
** This  function  is  called  from  FUNCTION PRINTTREE.  It  prints  out a **
** distance scale beneath the tree diagram.                                 **
**                                                                          **
** Functions called:  none                                                  **
*****************************************************************************/
void printscale(int otulen)
{
  brnch tmp;             /* temporary string to hold branch during flipping */
  char t1;                                       /* character being flipped */
  float conv    = 0;                     /* distance/char conversion factor */
  int   linelen = 0;                           /* length (in char) of scale */
  int   i,j,k;                                    /* loop control variables */

  linelen = 80 - otulen - 5;                 /* get maximum length of scale */
  conv = nodetable[nnodes-1].dist / linelen;   /* char/dist conversn factor */

  tree[0].br[0] = '\0';             /* initialize tree branch to hold scale */

  strcat(tree[0].br,"|");
  j = 1;
  k = 1;
  for (i=1; i<linelen; ++i) {                            /* construct scale */
    if (j == 10) {                         /* print bar every 10 characters */
      strcat(tree[0].br,"|");
      ++k;                                          /* count number of bars */
      j = 1;
    }
    else {                                      /* between bars, print line */
      strcat(tree[0].br,"_");
      ++j;
    }
  }

  /* flip scale around to match tree */
  j = strlen(tree[0].br) - 1;                       /* get length of branch */
  strcpy(tmp,tree[0].br);                                    /* copy branch */
  for (i=0; i<j; ++i) {                              /* go from ends inward */
    t1 = tmp[i];                                     /* get first character */
    tmp[i] = tmp[j];                      /* copy last char into first char */
    tmp[j--] = t1;                        /* copy first char into last char */
  }
  strcpy(tree[0].br,tmp);                                    /* copy branch */

  printf("   %s\n",tree[0].br);                              /* print scale */
  fprintf(fpt,"   %s\n",tree[0].br);
  for (i=(k-1)*10; i<strlen(tree[0].br); ++i) {
    printf(" ");
    fprintf(fpt," ");
  }
  for (i=k-1; i>=0; --i) {               /* print a distance for every tick */
    printf("%5.3f     ",i*10*conv);
    fprintf(fpt,"%5.3f     ",i*10*conv);
  }
}                                             /* END OF FUNCTION PRINTSCALE */
/****************************************************************************/

