/*****************************************************************************
******************************************************************************
******************************************************************************
*****                                                                    *****
*****                       PROGRAM RS_TO_01 v1.0                        *****
*****        A program for converting RESTSITE data to 0/1 matrix.       *****
*****    Copyright (c) 1991 by Joyce C. Miller.  All Rights Reserved.    *****
*****                                                                    *****
***** This  program  converts data  from  the RESTSITE  program to a 0/1 *****
***** matrix  (used  by  PAUP, McClade, and Principal Component Analysis *****
***** programs).  From  the  command  line, it  reads in the name of the *****
***** ASCII text file for the 0/1 data to be written to, the name of the *****
***** file  for the  OTU and character names, and the type of 0/1 matrix *****
***** ("1" for OTUs by characters, and "2" for characters by OTUs).  For *****
***** more  information on the  formats  of these files, see the program *****
***** documentation.                                                     *****
*****                                                                    *****
***** PAUP by  David  Swofford,  Copyright (c) 1989  by Illinois Natural *****
***** History Survey.  All Rights Reserved.                              *****
***** McCLADE Copyright (c) 1989 by  Wayne  Maddison and David Maddison. *****
***** All Rights Reserved.                                               *****
*****                                                                    *****
***** List of C functions used in this program:                          *****
*****                                                                    *****
*****     FUNCTION         LIBRARY          FUNCTION         LIBRARY     *****
*****     fclose           stdio.h          feof             stdio.h     *****
*****     fgetc            stdio.h          fopen            stdio.h     *****
*****     fprintf          stdio.h          fread            stdio.h     *****
*****     fseek            stdio.h          fwrite           stdio.h     *****
*****     printf           stdio.h          qsort            stdlib.h    *****
*****     rewind           stdio.h          strcat           string.h    *****
*****     strcpy           string.h         system           stdlib.h    *****
*****                                                                    *****
******************************************************************************
******************************************************************************
*****************************************************************************/

/*****************************************************************************
**                             INCLUDE FILES                                **
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <rstypes.h>        /* file containing type definitions & constants */
#include <rserrors.h>                     /* file containing error messages */
#include <rsfuncs.h>        /* file containing some commonly-used functions */
/*****************************************************************************
**                  TYPE DEFINITIONS & SYMBOLIC CONSTANTS                   **
*****************************************************************************/
#define VNUM 1.0                          /* version number of this program */
#define YEAR 1991                /* calendar year this version was released */
#define ALLFS 500                 /* max # of different frags/sites per PEC */
/*****************************************************************************
**                          FUNCTION PROTOTYPES                             **
*****************************************************************************/
void getninp(char chrfl[]);             /* get number of individuals & PECs */
int  fs_sort();                       /* sort the fragments/sites in fslist */
void convert(char outfl[], char chrfl[]);         /* convert RS to 0/1 data */
void flipmat(char matfl[]);              /* flip matrix to type 2 if needed */
/*****************************************************************************
**                           GLOBAL VARIABLES                               **
*****************************************************************************/
int ni = 0;                            /* number of individuals in data set */
int np = 0;       /* number of PECs (probe/enzyme combinations) in data set */
int totchar = 0;                              /* total number of characters */
float fslist[ALLFS];               /* array to hold list of fragments/sites */
/*****************************************************************************
******************************************************************************
******************************************************************************
******                                                                  ******
******                          MAIN PROGRAM                            ******
******                                                                  ******
******************************************************************************
******************************************************************************
******************************************************************************
***** This  section of the program  prints  out the  program name  and a *****
***** copyright  message, then  error-checks the command line.  Then the *****
***** rest of the program is executed.                                   *****
*****                                                                    *****
***** Functions called:                                                  *****
***** getninp        -- gets the number of OTUs and PECs.                *****
***** convert        -- converts RESTSITE data to type 1 0/1 matrix.     *****
***** flipmat        -- flips 0/1 matrix from type 1 to 2 if needed.     *****
*****************************************************************************/
void main(int argc, char *argv[])
{
  printf("\r\nProgram RS_TO_01 v%3.1f\r\n",VNUM);        /* message to user */
  printf("A program for converting RESTSITE data to 0/1 matrices.");
  printf("\r\nCopyright (c) %d by Joyce C. Miller.  ",YEAR);
  printf("All Rights Reserved.\r\n");

  if (argc < 3) {                       /* error if command line incomplete */
    printf("\a\r\nError 101:  The proper format of the command line should ");
    printf("be:\r\n\n            RS_TO_01 01FILE OTUFILE MATRIXTYPE");
    printf("\r\n\n            All of these items are necessary. ");
    printf("  Refer to the\r\n            documentation if you require ");
    printf("more information.\r\n");
    exit(0);
  }

  getninp(argv[2]);                     /* get number of individuals & PECs */
  convert(argv[1],argv[2]);                       /* convert RS to 0/1 data */
  system("DEL 000.$$$");                          /* delete file of fslists */
  if (argv[3][0] == '2') flipmat(argv[1]);         /* flip matrix to type 2 */
}                                                   /* END OF FUNCTION MAIN */
/*****************************************************************************
**                                                                          **
**                            FUNCTION GETNINP                              **
**                                                                          **
** This  function is called from FUNCTION MAIN, and opens the "00.$$$" file **
** to get the  number of OTUs (ni) and PECs (np).  It then goes through all **
** of  the  pooled  data  files  ("#.$$$" files), and  compiles  a  list of **
** fragments/sites  for  each  of  the  PECs.  These lists are written to a **
** temporary  file.  Control then passes back to FUNCTION MAIN.             **
**                                                                          **
** Functions called:                                                        **
** opnbrdfl       --  opens a binary file for reading.                      **
** opnbwtfl       --  opens a binary file for writing.                      **
** fs_sort        --  sorts the fragments/sites in fslist.                  **
** rserror303     --  error message if FSEEK error.                         **
** rserror311     --  error message if error reading datafile.              **
** rserror323     --  error message if error writing to temp file.          **
*****************************************************************************/
void getninp(char chrfl[])
{
  FILE *fpr, *fpw;                                         /* file pointers */
  char flnm[20], *f;   /* string & pointer for making sequential file names */
  int i,j;                                        /* loop control variables */
  register int k,l;                               /* loop control variables */
  long s;                                         /* long integer for tseek */
  pooldat pd;                                         /* pooled data record */
  otuname id;                                                /* name of OTU */
  int hil = 0;                        /* number of fragments/sites in a PEC */

  printf("\r\nRecovering list of OTUs and characters");

  fpr = opnbrdfl(fpr,"00.$$$");             /* open file with # OTUs & PECs */
  fpw = opnbwtfl(fpw,"000.$$$");       /* open file for lists of characters */

  if (fread(&ni,sizeof(int),1,fpr) == NULL) rserror311("00.$$$"); /* get ni */
  if (fread(&np,sizeof(int),1,fpr) == NULL) rserror311("00.$$$"); /* get np */
  if ((fseek(fpr,(long)sizeof(int),SEEK_CUR)) != NULL) rserror303("00.$$$");
  for (i=0; i<ni; ++i)                            /* skip over list of OTUs */
    if (fread(id,sizeof(otuname),1,fpr) == NULL) rserror311("00.$$$");
  rewind(fpr);                                             /* close up file */
  fclose(fpr);

  for (i=0; i<np; ++i) {                      /* go through all of the PECs */
    printf(".");
    hil = 1;                /* initialize number of frags/sites in this PEC */
    for (j=0; j<ALLFS; ++j) fslist[j] = -5;    /* initialize frag/site list */
    for (j=0; j<ni; ++j) {                    /* go through all of the OTUs */
      flnm[0] = '\0';                          /* create name of OTU's file */
      f = inttoalph(j,flnm);
      strcpy(flnm,f);
      f = strcat(flnm,".$$$");
      fpr = opnbrdfl(fpr,flnm);                            /* open OTU file */
      s = i*sizeof(pooldat);                     /* compute location of PEC */
      if ((fseek(fpr,s,SEEK_CUR)) != NULL) rserror303(flnm);    /* go there */
      if (fread(&pd,sizeof(pooldat),1,fpr) == NULL) rserror311(flnm);
      rewind(fpr);
      fclose(fpr);
      for (k=0; ((k<MAXFS)&&(pd.fs[k].f!=-5)); ++k) {    /* go thru pooldat */
        for (l=0; (l<ALLFS)&&(fslist[l]!=-5); ++l)     /* compare to fslist */
          if (pd.fs[k].f == fslist[l]) break;
        if (fslist[l] == -5) {
          fslist[l] = pd.fs[k].f;                          /* add frag/site */
          hil++;                         /* increment number of frags/sites */
        }
      }                                        /* end of "k" (pooldat) loop */
    }                                              /* end of "j" (OTU) loop */
    qsort(&fslist[0],hil,sizeof(float),fs_sort);             /* sort fslist */
    for (l=0; l<ALLFS; ++l)                         /* write fslist to file */
      if (fwrite(&fslist[l],sizeof(float),1,fpw) == NULL) rserror323();
    totchar+=(hil-1);
  }
  rewind(fpw);                                         /* close fslist file */
  fclose(fpw);
}                                                /* END OF FUNCTION GETNINP */
/*****************************************************************************
**                                                                          **
**                            FUNCTION FS_SORT                              **
**                                                                          **
** This  function is called from FUNCTION GETNINP.  It receives an array of **
** fragments/sites,  and  sorts  them  in  ascending order.  It then passes **
** control back to FUNCTION GETNINP.                                        **
**                                                                          **
** Functions called:  none.                                                 **
*****************************************************************************/
int fs_sort(float *x, float *y)
{
  if (*x ==  *y) return(0);
  if (*y == -5)  return(-1);
  if (*x == -5)  return(1);
  return (*x-*y);
}                                                /* END OF FUNCTION FS_SORT */
/*****************************************************************************
**                                                                          **
**                            FUNCTION CONVERT                              **
**                                                                          **
** This  function  is  called  from  FUNCTION  MAIN.  Once all the lists of **
** characters (fragments/sites) have been made, it  converts the data found **
** in the pooled data files to a 0/1 matrix.  It retrieves an  fslist, then **
** goes through all of the pooled data files for the  current PEC, printing **
** out zeros or ones, depending on whether each fragment/site was absent or **
** present in that OTU.  In this  way, a  type 1  0/1 data matrix is built. **
** As  each  fslist  is  retrieved, the characters are printed to the ASCII **
** list of OTU names and characters.  Control  then passes back to FUNCTION **
** MAIN.                                                                    **
**                                                                          **
** Functions called:                                                        **
** opntwtfl       --  opens a text file for writing.                        **
** opnbrdfl       --  opens a binary file for reading.                      **
** opnbwtfl       --  opens a binary file for writing.                      **
** fs_sort        --  sorts the fragments/sites in fslist.                  **
** rserror303     --  error message if FSEEK error.                         **
** rserror311     --  error message if error reading datafile.              **
** rserror313     --  error message if error reading temp file.             **
** rserror323     --  error message if error writing to temp file.          **
*****************************************************************************/
void convert(char outfl[], char chrfl[])
{
  register int k,l;                               /* loop control variables */
  int i,j;                                        /* loop control variables */
  FILE *fpi;                                /* pointer for pooled data file */
  FILE *fpf;                                     /* pointer for fslist file */
  FILE *fp0;                                     /* pointer for output file */
  FILE *fpr;                                   /* pointer for "00.$$$" file */
  FILE *fpc;                                  /* pointer for character file */
  char flnm[20], *f;   /* string & pointer for making sequential file names */
  pooldat pd;                                         /* pooled data record */
  otuname id;                                                /* name of OTU */
  int swtch = 0;                                           /* on/off switch */

  printf("\r\n\nConverting site/fragment data to zeros and ones");

  fpr = opnbrdfl(fpr,"00.$$$");             /* open file with # OTUs & PECs */
  fpc = opntwtfl(fpc,chrfl);            /* open file for list of characters */
  fprintf(fpc,"%d %d\n",ni,totchar);   /* write number of OTUs & characters */

  /* skip to beginning of OTU list in file "00.$$$" */
  if ((fseek(fpr,(long)sizeof(int)*3,SEEK_CUR)) != NULL) rserror303("00.$$$");

  for (i=0; i<ni; ++i) {                       /* read & write list of OTUs */
    if (fread(id,sizeof(otuname),1,fpr) == NULL) rserror311("00.$$$");
    fprintf(fpc,"%s\n",id);
  }
  rewind(fpr);                                    /* close up "00.$$$" file */
  fclose(fpr);

  fpf = opnbrdfl(fpf,"000.$$$");                             /* open fslist */
  fp0 = opntwtfl(fp0,outfl);                     /* open/create output file */

  for (i=0; i<ni; ++i) {                      /* go through all of the OTUs */
    flnm[0] = '\0';                            /* create name of OTU's file */
    f = inttoalph(i,flnm);
    strcpy(flnm,f);
    f = strcat(flnm,".$$$");
    fpi = opnbrdfl(fpi,flnm);                           /* open ".$$$" file */
    rewind(fpf);                   /* go to beginning of fragment/site file */
    for (j=0; j<np; ++j) {                    /* go through all of the PECs */
      printf(".");
      for (k=0; k<ALLFS; ++k)                                 /* get fslist */
        if (fread(&fslist[k],sizeof(float),1,fpf) == NULL) rserror313();
      if (fread(&pd,sizeof(pooldat),1,fpi) == NULL) rserror311(flnm);
      for (k=0; ((k<ALLFS)&&(fslist[k]!=-5)); ++k) {   /* go through fslist */
        if (i == 0)
          fprintf(fpc,"%s %s %.2f %.2f\n",pd.prb,pd.enz,pd.rv,fslist[k]);
        swtch = 0;                                        /* set switch off */
        for (l=0; (l<MAXFS); ++l) {              /* go through frags in PEC */
          if (pd.fs[l].f >  fslist[k]) break;
          if (pd.fs[l].f == fslist[k]) swtch = 1;            /* frags match */
          if (pd.fs[l].f == -5) break;
        }
        if (swtch == 1) fprintf(fp0,"1");           /* print "1" if present */
        else fprintf(fp0,"0");                                /* "0" if not */
      }                                    /* finished going through fslist */
    }                                              /* end of "j" (PEC) loop */
    fprintf(fp0,"\n");
    rewind(fpi);                                    /* close up ".$$$" file */
    fclose(fpi);
  }                                                /* end of "i" (OTU) loop */
  rewind(fpf);                                      /* close "000.$$$" file */
  fclose(fpf);
  rewind(fp0);                                         /* close output file */
  fclose(fp0);
}                                                /* END OF FUNCTION CONVERT */
/*****************************************************************************
**                                                                          **
**                            FUNCTION FLIPMAT                              **
**                                                                          **
** This function is  called  from  FUNCTION  MAIN, and  flips  the 0/1 data **
** matrix  from  type 1 to type 2, if  necessary.  In a type 1 matrix, each **
** row  represents one individual, and the columns are the characters found **
** in   that  individual.  In  a  type  2  matrix,  the   columns  are  the **
** individuals,  and the rows are the characters.  First, the 0/1 matrix is **
** read into a binary file.  This removes  any  newline  or carriage return **
** characters.  Then, it is read  (during which it is "flipped") into a new **
** binary file.  Control then passes back to FUNCTION MAIN.                 **
**                                                                          **
** Functions called:                                                        **
** opntrdfl       --  opens a text file for reading.                        **
** opntwtfl       --  opens a text file for writing.                        **
** opnbrdfl       --  opens a binary file for reading.                      **
** opnbwtfl       --  opens a binary file for writing.                      **
** rserror303     --  error message if FSEEK error.                         **
** rserror311     --  error message if error reading datafile.              **
** rserror313     --  error message if error reading temp file.             **
** rserror323     --  error message if error writing to temp file.          **
*****************************************************************************/
void flipmat(char matfl[])
{
  register int i,c;                               /* loop control variables */
  FILE *fp0,*fpd;                                          /* file pointers */
  long p;                                          /* long number for FSEEK */
  char x;                                          /* character (0's & 1's) */

  fp0 = opntrdfl(fp0,matfl);                        /* open 0/1 matrix file */
  fpd = opnbwtfl(fpd,"!@!.0");                        /* create binary file */

  while ((x=fgetc(fp0)) != EOF)             /* convert 0/1 matrix to binary */
    if ((x=='0') || (x=='1'))  /* get a char, if it's 0 or 1, write to file */
      if (fwrite(&x,sizeof(char),1,fpd) == NULL) rserror323();
  if (!feof(fp0)) rserror311(matfl);                  /* error reading file */

  rewind(fp0);                                       /* close up both files */
  fclose(fp0);
  rewind(fpd);
  fclose(fpd);

  fp0 = opntwtfl(fp0,matfl);              /* create file for flipped matrix */
  fpd = opnbrdfl(fpd,"!@!.0");

  for (c=0; c<totchar; ++c) {    /* copy chars in correct order to new file */
    for (i=0; i<ni; ++i) {
      p = sizeof(char)*(i*totchar+c);
      if ((fseek(fpd,p,SEEK_SET)) != NULL) rserror303(matfl);
      if (fread(&x,sizeof(char),1,fpd) == NULL) rserror313();
      if ((fseek(fpd,(long)0,SEEK_CUR)) != NULL) rserror303(matfl);
      fprintf(fp0,"%c",x);
    }
    fprintf(fp0,"\n");
  }

  rewind(fpd);                                            /* close up files */
  fclose(fpd);
  rewind(fp0);
  fclose(fp0);
  system("DEL !@!.0");                                  /* delete temp file */
}                                                /* END OF FUNCTION FLIPMAT */
/****************************************************************************/

