hierclust    <- function(a, method=1, bign=FALSE, diagnostics=FALSE, sim=" ",
                    movie=FALSE, option="thru",alpha=1.0,repres="chull",
                   show="change")
{


# Function to manage the calls to 'hc' and 'hclus', and to establish 
# appropriate attributes assoc. with resulting hierarchical clusterings.
# 'hclus': already-present S-Plus function; performs hier. clust. of 
#          distance or similarity input.  
# 'hc:     new function, performs hier. clust. of raw input matrix.  Eucl. 
#          dist. is used. Many criteria and options are available.  One 
#          criterion (Ward's) is supported with storage-efficient algorithm, 
#          i.e. just orig. matrix stored, and not dist. matrix.

# Author:  F. Murtagh, May 1992

#    Check whether we're dealing with input dissimilarities or an array
     if (is.matrix(a)) dat  <- "matdat"
     if (!is.matrix(a)) dat <- "dissdat"
     

     if (!movie && dat == "dissdat") {

#       Distances as input; use function 'hclust'
        
#       'hclust' assumes 'method' = 'connected', 'compact', or 'average';
#       establish appropriate default:

        if (method==1) method <- "compact"

        hier <- hclust(a, method)

#       Note: there is a bug in S+ vis-a-vis plotting of sim.-based result.
#       We need to do the following:
#           hier <- hclust(sim=a, method) 
#           hier$height <- max(hier$height) - hier$height  }
#       Here, we will ignore this.


     }

    else if (!movie)  {

#       Non-distances as input, - i.e. raw data
#       In the help file, warn the user to normalize (etc.) data first!

        if (method==1) 
           meth   <- "Ward's min. variance or error sum of squares method."
        if (method==2) 
           meth   <- "Single linkage or nearest neighbor method."
        if (method=="connected") {
           meth   <- "Single linkage or nearest neighbor method."
           method <- 2 }
        if (method==3) 
           meth   <- "Complete linkage or diameter method."
        if (method=="compact") { 
           meth   <- "Complete linkage or diameter method."
           method <- 3 }
        if (method==4) 
           meth   <- "Average linkage (group average or UPGMA) method."
        if (method=="average") { 
           meth   <- "Average linkage (group average or UPGMA) method."
           method <- 4 }
        if (method==5)  
           meth   <- "McQuitty's or WPGMA method."
        if (method==6) 
           meth   <- "Median (Gower's or WPGMC) method."
        if (method==7) 
           meth   <- "Centroid or UPGMC method."
        if (diagnostics == TRUE && method >= 1 && method <= 7) cat(meth,"\n")
        if (method >7 || method <1) 
           stop("method must be an integer between 1 and 7.") 


        hier <- hc(a, method, bign)    


    }

    else if (movie) {

#   Hierarchical clustering with real-time representation of agglomerations.


  if (!is.matrix(a)) stop("First input argument must be a matrix.\n")
  n                    <- nrow(a)
  m                    <- ncol(a)
#-----------------------------------------------------------------------------
# Input parameters: description and preliminary tests on validity.
#
#
# a is input data matrix, - no NAs allowed.
#
# method = 1 (default), 2, 3, or 4:
# 1 = minimum variance method (with the further possibility to prioritize
# linearity through use of alpha < 1.0); 2 = single linkage method; 3 =
# complete linkage method; 4 = average linkege method.
  if (!(method==1 || method==2 || method==3 || method==4))
     stop("method must be 1 (default), 2, 3, or 4.")
#
# option = "thru" (default) or "prompt":
# Go right thru all n-1 agglomerations, or ask the user whether to continue
# at each agglomeration?  Here is what we recommend: On the first occassion,
# go right through.  Check out the cluster criterion values reported on in the
# command window.  Then using the 'prompt' option, go as far as the desired
# partition, and print it out.
#
  if (!(option=="thru"||option=="prompt"))
     stop("option must be `thru` (default) or `prompt`.")
#
# alpha = 1.0 (default) or 0.0 < alpha <= 1.0:
# Only used by the minimum variance agglomerative criterion. Alpha downweights
# the variance component on the prinipal axis of the cluster, and thereby
# redefines the cluster criterion such that linear clusters are preferred.  A
# value of alpha = 1.0 gives exactly the traditional Ward's minimum variance
# method.  See Murtagh and Raftery (1984) for more on this altered criterion.
#
  if (alpha <= 0.0 || alpha > 1.0)
     stop("coefficient alpha must be > 0, <= 1. (Default 1.0).")
#
# repres = "chull" (default), "lines", "gauss":
# Representation of clusters: For input data which has dimensionality greater
# than 2, only "chull" is allowed (since the representations were found to be
# too problematic, given that the clustering takes place in m-dimensions, and
# the the representation is necessarily a 2-dimensional projection of this).
# Representation "chull" = convex hull, or straight line in case
# of colinear points.  (Note that we test for collinearity, using principal
# components analysis, since otherwise the S "hull" routine blows up.)
# Representation "lines" = lines of best fit (least squares fit is used,
# and the extremities are defined by the intersection of the min. and max. x

# values on this LS fit line). "gauss" = 1-sigma circles.  Side-effect of
# latter: warning messages indicate whenever circles overlap plot boundaries.
#
  if (!(repres=="chull"||repres=="lines"||repres=="gauss"))
     stop("repres must be one of: chull, lines, gauss.")
  if (m>2 && repres!="chull")
  warning("Only `chull' representation permitted for dim. > 2. Using this.\n")
#
# show = "change" or "all":
# In this method's present implementation, the clusters at each stage are
# 'highlighted' by means of a different symbol.  Option show = "change" has
# this highlighting changed at each agglomeration, so that only the most recent
# agglomerands are shown.  Option show = "all" does not turn off the highlight-
# ing in this way.
#
  if (!(show=="change"||show=="all"))
     stop("show option must be `change` (default) or `all`.")
#-----------------------------------------------------------------------------
#
  storage.mode(a)      <- "double"
  iklass               <- matrix(0.0, n, n) # Will store cluster assgmnts.
  storage.mode(iklass) <- "integer"
  membr                <- double(n)         # Singleton, cluster cardinalities
  flag                 <- logical(n)        # Bookkeeping: agglomerated yet?
  ia                   <- integer(n)        # Subnodes of a given node in the
  ib                   <- integer(n)        # ... hierarchy.
  crit                 <- double(n)         # Criterion val. at each level.
  potcl                <- integer(n)        # Bookkeeping: potential clus. mems
  potcl1               <- integer(n)        # Ditto
  potcl2               <- integer(n)        # Ditto
  vect1                <- double(m)
  vect2                <- double(m)
  array1               <- matrix(0.0, m, m)
  storage.mode(array1) <- "double"
  array2               <- matrix(0.0, m, m)
  storage.mode(array2) <- "double"
  centr                <- double(m)
  dat2                 <- double(n*m)
  ileast               <- 0                 # Agglomerand
  jleast               <- 0                 # Agglomerand
  dleast               <- 0.0               # Criterion value
  classx               <- 0                 # X-values corresp. to a cluster
  classy               <- 0                 # Y-values corresp. to a cluster
#  if (method==1) cat("   Alpha: ",round(as.single(alpha),5),"\n")
#
  if (method==1) meth <- "minimum variance"
  if (method==2) meth <- "single linkage"
  if (method==3) meth <- "complete linkage"
  if (method==4) meth <- "average linkage"

  ejn.col <-rainbow(10)   # EJN in
#
# x and y labels for plot; use principal components if dim. > 2
  if (m == 2)
     {
               xvalues              <- a[,1]
               yvalues              <- a[,2]
               xlabel               <- "coordinate 1"
               ylabel               <- "coordinate 2"
               if (method==1) {
                   titl                 <- paste("Hier. clust. ",
                                             "- crit.: ",meth,
                                             "- alpha: ",
                                              round(as.double(alpha),5) ) }
                   else       {
                   titl                 <- paste("Hier. clust. ",
                                             "- crit.: ",meth)  }
      }
  else
     {
               prco                 <- pca(a, 2)        # PCA on covariances
               xvalues              <- prco$rproj[,1]
               yvalues              <- prco$rproj[,2]
               pc1                  <- prco$evals[1]*100.0/sum(prco$evals)
               pc2                  <- prco$evals[2]*100.0/sum(prco$evals)
               xlabel               <- paste("principal component 1 (",
                                             round(as.double(pc1),2),
                                             "% of variance)")
               ylabel               <- paste("principal component 2 (",
                                             round(as.double(pc2),2),
                                             "% of variance)")
               if (method==1) {
                   titl                 <- paste("Hier. clust. ",
                                             "- crit.: ",meth,
                                             "- alpha: ",
                                              round(as.double(alpha),5),
                                              " - dim.: ",
                                              m)  }
                   else       {
                   titl                 <- paste("Hier. clust. ",
                                             "- crit.: ",meth,
                                              " - dim.: ",
                                              m)  }

     }
#
#  Set up plot
   plot(xvalues, yvalues, type="n", xlab=xlabel,ylab=ylabel,main=titl)
   points(xvalues, yvalues, type="n") # EJN in
   text(xvalues, yvalues, 1:10)       # EJN in
   # EJN out: points(xvalues, yvalues, type = "p")
#
#  init: Initialize
   initx <- .Fortran("init",
                     membr  = as.double(membr),
                     flag   = as.logical(flag),
                     iklass = as.matrix(iklass),
                     n      = as.integer(n),
                     m      = as.integer(m),
                     PACKAGE = "multiv")
#
  membr   <- initx$membr
  flag    <- initx$flag
  iklass  <- initx$iklass
#
  nmin1   <- n-1
  col.ind <- 1   # EJN in
  for (ncl in nmin1:1)
            {
#
             if (method == 1)
                {
#                gbd: Get best dissimilarity (method==1 case)
                 gbdx <- .Fortran("gbd",
                                  membr  = as.double(membr),
                                  flag   = as.logical(flag),
                                  dleast = as.double(dleast),
                                  ileast = as.integer(ileast),
                                  jleast = as.integer(jleast),
                                  ncl    = as.integer(ncl),
                                  ia     = as.integer(ia),
                                  ib     = as.integer(ib),
                                  iklass = as.matrix(iklass),
                                  potcl  = as.integer(potcl),
                                  array1 = as.matrix(array1),
                                  array2 = as.matrix(array2),
                                  vect1  = as.double(vect1),
                                  vect2  = as.double(vect2),
                                  dat2   = as.double(dat2),
                                  n      = as.integer(n),
                                  m      = as.integer(m),
                                  alpha  = as.double(alpha),
                                  a      = as.matrix(a),
                                  centr  = as.double(centr),
                                  PACKAGE = "multiv")
                  }
             else
                 {
#                 gbd2: Get best dissimilarity (method!=1 case)
                  gbdx <- .Fortran("gbd2",
                                   membr  = as.double(membr),
                                   flag   = as.logical(flag),
                                   dleast = as.double(dleast),
                                   ileast = as.integer(ileast),
                                   jleast = as.integer(jleast),
                                   ncl    = as.integer(ncl),
                                   ia     = as.integer(ia),
                                   ib     = as.integer(ib),
                                   iklass = as.matrix(iklass),
                                   potcl1 = as.integer(potcl1),
                                   potcl2 = as.integer(potcl2),
                                   array1 = as.matrix(array1),
                                   array2 = as.matrix(array2),
                                   vect1  = as.double(vect1),
                                   vect2  = as.double(vect2),
                                   dat2   = as.double(dat2),
                                   n      = as.integer(n),
                                   m      = as.integer(m),
                                   a      = as.matrix(a),
                                   method = as.integer(method),
                                   PACKAGE = "multiv")
                 }
#
       ileast <- gbdx$ileast
       jleast <- gbdx$jleast
       dleast <- gbdx$dleast
       membr  <- gbdx$membr
       flag   <- gbdx$flag
       ia     <- gbdx$ia
       ib     <- gbdx$ib
       iklass <- gbdx$iklass
       array1 <- gbdx$array1
       array2 <- gbdx$array2
       vect1  <- gbdx$vect1
       vect2  <- gbdx$vect2
       dat2   <- gbdx$dat2
       if (method == 1) {
           potcl  <- gbdx$potcl
           centr  <- gbdx$centr  }
       else  {
           potcl1 <- gbdx$potcl1
           potcl2 <- gbdx$potcl2 }
#
       lev    <- n-ncl
       cat("   Cluster ",lev," criterion value: ",round(dleast,5),"\n")
#
#      agg: Agglomerate
       aggx <- .Fortran("agg",
                        ileast = as.integer(ileast),
                        jleast = as.integer(jleast),
                        dleast = as.double(dleast),
                        ncl    = as.integer(ncl),
                        ia     = as.integer(ia),
                        ib     = as.integer(ib),
                        crit   = as.double(crit),
                        membr  = as.double(membr),
                        flag   = as.logical(flag),
                        n      = as.integer(n),
                        PACKAGE = "multiv")
                                        #
             ileast <- aggx$ileast
             jleast <- aggx$jleast
       dleast <- aggx$dleast
       ia     <- aggx$ia
       ib     <- aggx$ib
       crit   <- aggx$crit
       membr  <- aggx$membr
       flag   <- aggx$flag
#
#      gncm: Get new cluster memberships
       gncmx <- .Fortran("gncm",
                         ileast = as.integer(ileast),
                         jleast = as.integer(jleast),
                         ncl    = as.integer(ncl),
                         ia     = as.integer(ia),
                         ib     = as.integer(ib),
                         membr  = as.double(membr),
                         iklass = as.matrix(iklass),
                         flag   = as.logical(flag),
                         n      = as.integer(n),
                         PACKAGE = "multiv")
                                        #
       ileast <- gncmx$ileast
       jleast <- gncmx$jleast
       ia     <- gncmx$ia
       ib     <- gncmx$ib
       membr  <- gncmx$membr
       iklass <- gncmx$iklass
       flag   <- gncmx$flag
#
       if (ncl < nmin1 && show == "change")   {
#              Remove highlighting on last-agglomerated points:
               if (length(classx) > 1) {
                  lines(classx, classy, type="p", pch=4, col=0) }
#              In the case of "lines", remove them - else they cloud up things:
               if (length(classx) > 1 && repres=="lines") {
                  lines(c(xmin,xmax),c(ymin,ymax),col=0) }
#              In the case of "gauss", remove circles:
               if (length(classx) > 1 && repres=="gauss") {
                  symbols(mean(classx),mean(classy),
                  circles=sigma,add=TRUE,inches=FALSE,col=0)    }    }
       classvect <- iklass[n-ncl,]
       classx    <- xvalues[classvect==1]
       classy    <- yvalues[classvect==1]
       lines(classx, classy, type="p", pch=4)
#
       if (repres=="chull")  {
          if (length(classx)==2) {
   # EJN out         lines(classx, classy, type="l")
           lines(classx, classy, col=ejn.col[col.ind])    # EJN in


          } else if (length(classx)>2)  {
#            Assess linearity using PCA on covariances
             prco <- pca(cbind(classx,classy), 2)
#            Collinearity (arbitrarily!) defined as three thousandths of 2nd
#            eigenvalue (would be zero if data precisely collinear; 0.003
#            allows for small tolerance.)
             collin <- prco$evals[2]*1000.0
             if (collin <= 3)  {
                xmn <- min(classx)
                xmx <- max(classx)
                ymn <- min(classy)
                ymx <- max(classy)
                lines(c(xmn,xmx),c(ymx,ymn),type="l", col=ejn.col[col.ind])   #EJN in
       # EJN out    lines(c(xmn,xmx),c(ymn,ymx),type="l")
              }
             else  {
                hull <- chull(classx, classy)
                polygon(classx[hull], classy[hull], border=ejn.col[col.ind]) # EJN in
   # EJN out                polygon(classx[hull], classy[hull])
             }
           }
        }
        if (m==2 && repres=="gauss") {
           classn <- length(classx)
#          Use var in population terms - looks better for small 'classn':
           sigma <- sqrt(var(classx)*(classn-1)/classn+
                         var(classy)*(classn-1)/classn)
           symbols(mean(classx),mean(classy),circles=sigma,add=TRUE,inches=FALSE)  }
        if (m==2 && repres=="lines")  {
           z<-lsfit(classx,classy)
           xmin   <- min(classx)
           interc <- z$coef[1]
           slope  <- z$coef[2]
           ymin   <- slope*xmin+interc
           xmax   <- max(classx)
           ymax   <- slope*xmax+interc
   # EJN out        lines(c(xmin,xmax),c(ymin,ymax))
           lines(c(xmin,xmax),c(ymin,ymax), col=ejn.col[col.ind])    # EJN in
         }
#
        if (ncl > 1 && option == "prompt") {
           cat("Proceed? [y]\n")
           resp <- scan("",what=character(),1)
        if (length(resp)==0) resp <- "y"
        if (resp=="n" || resp=="N") {
           list(lev=lev,class=iklass[1:lev,])
           return()   }
        }
        col.ind <- col.ind + 1   # EJN in
             
#
# End of "for (ncl in nmin1:1)" loop:
   }
#
#
    part <- list(lev=lev,class=iklass[1:lev,])

# End of movie case:
  }

#   Finally return with what we came for.
#   For movie option, don't bother generating any output.

    if (movie) invisible(part)
    if (!movie) hier

}