n.baskets <- 2733
numb <- 5                            # Number of items in baskets
   # Simulation of baskets
set.seed(12345, "default")           # Try to make replication possible
   # Create a matrix to represent the basket (holds 5 items) 
baskets <- matrix(0, n.baskets, numb)
heading <- c("Milk", "Peanut.Butter", "Bread", "Cereal", "Jelly")
dimnames(baskets) <- list (NULL, heading)
   # Put "Peanut.Butter", "Bread","Jelly" in baskets 1-82
baskets[1:82, c("Peanut.Butter", "Bread","Jelly")] <- 1
   # and "Peanut.Butter","Jelly" in 83 to 100
baskets[83:100,c("Peanut.Butter","Jelly")] <- 1
   # (P)eanut Butter & (J)elly can only appear together
   #        in the first 100 baskets
   # n.@, b.@, e.@ represent the number of the item @,
   #    the starting and ending baskets
   # sample(100,n.@) produces n.@ random integers from 1-100
   # Scatter some (M)ilk and (C)ereal in the first 100 baskets
n.M <- 60
baskets[sample(100, n.M), c("Milk")] <- 1
n.C <- 55
baskets[sample(100, n.C), c("Cereal")] <- 1
   # sample(e.@ - b.@, n.@) + b.@
   # e.@ - b.@ gives the range [1, e.@ - b.@] from which to choose n.J
   # adding b.@ makes the ramge [b.@+1, e.@ - b.@ + b.@] 
n.J <- 800; b.J <- 100;  e.J <- 1503
baskets[sample(e.J - b.J, n.J) + b.J, c("Jelly")] <- 1

n.P <- 700; b.P <- 1514;  e.P <- 2733
baskets[sample(e.P - b.P, n.P) + b.P, c("Peanut.Butter")] <- 1
   # Now put more Milk, Cereal, and Bread in some baskets
n.M <- 500; b.M <- 100;  e.M <- 1103
baskets[sample(e.M - b.M, n.M) + b.M, c("Milk")] <- 1

n.M <- 450; b.M <- 200;  e.M <- 2733
baskets[sample(e.M - b.M, n.M) + b.M, c("Milk")] <- 1

n.C <- 780; b.C <- 600; e.C <- 1600
baskets[sample(e.C - b.C, n.C) + b.C, c("Cereal")] <- 1
n.B <- 1200; b.B <- 100;  e.B <- 2733
baskets[sample(e.B - b.B, n.B) + b.B, c("Bread")] <- 1
   # End of simulation
# write.table(baskets, quote=F, file="I:/2016-17/STAT5703/Data/baskets.dat")
   # 
baskets <- read.table("I:/2016-17/STAT5703/Data/baskets.dat")


(item.in.basket <- apply (baskets, 2, sum))
(percent.in.basket <- round(item.in.basket/n.baskets*100, 2))

# All unique pairs
n.comb.2 <- choose(numb,2)
double <- matrix(0, n.comb.2, 2)
ind <- 1
for (i in 1:numb) {
   j <- i+1
   while (j <= numb) {  # for can run 6:5 ...
      double[ind,] <- c(i,j)
      ind <- ind + 1
      j <- j + 1
   }
}

    # double
matrix(heading[double], n.comb.2, 2)

double.counts <- matrix(0, n.comb.2, 3)
for (i in 1:dim(double)[1]){
   double.counts[i,] <-c(double[i,],sum(floor(apply(baskets[,double[i,]],1,sum)/2)))
}
matrix(c(heading[double.counts[,1:2]], double.counts[,3]), n.comb.2, 3)

# All unique triples
n.comb.3 <- choose(numb,3)
triple <- matrix(0, n.comb.3, 3)
ind <- 1
for (i in 1:numb) {
   j <- i+1
   while (j <= numb) {  # for can run 6:5 ...
      k <- j + 1
      while (k <= numb) { 
         triple[ind, ] <- c(i,j,k)
         ind <- ind + 1
         k <- k + 1
      }
      j <- j + 1
   }
}
   # triple
matrix(heading[triple], n.comb.3, 3)

triple.counts <- matrix(0, n.comb.3, 4)
for (i in 1:dim(triple)[1]){
   triple.counts[i,] <-c(triple[i,],sum(floor(apply(baskets[,triple[i,]],1,sum)/3)))
}
matrix(c(heading[triple.counts[,1:3]],triple.counts[,4]), n.comb.3, 4)


  # quads
matrix(heading[quad], n.comb.4, 5)

quad.counts <- matrix(0, n.comb.4, 5)
for (i in 1:dim(quad)[1]){
   qu
# All unique triples
n.comb.3 <- choose(numb,3)
triple <- matrix(0, n.comb.3, 3)
ind <- 1
for (i in 1:numb) {
   j <- i+1
   while (j <= numb) {  # for can run 6:5 ...
      k <- j + 1
      while (k <= numb) { 
         triple[ind, ] <- c(i,j,k)
         ind <- ind + 1
         k <- k + 1
      }
      j <- j + 1
   }
}
   # triple
matrix(heading[triple], n.comb.3, 3)

triple.counts <- matrix(0, n.comb.3, 4)
for (i in 1:dim(triple)[1]){
   triple.counts[i,] <-c(triple[i,],sum(floor(apply(baskets[,triple[i,]],1,sum)/3)))
}
matrix(c(heading[triple.counts[,1:3]],triple.counts[,4]), n.comb.3, 4)

# All unique quads
n.comb.4 <- choose(numb,4)
quad <- matrix(0, n.comb.4, 4)
ind <- 1
for (i in 1:numb) {
   j <- i+1
   while (j <= numb) {  # for can run 6:5 ...
      k <- j + 1
      while (k <= numb) {
         l <- k + 1
         while (l <= numb) { 
            quad[ind, ] <- c(i,j,k,l)
            ind <- ind + 1
            l <- l + 1
          }
          k <- k + 1
      }
      j <- j + 1
   }
}
ad.counts[i,] <-c(quad[i,],sum(floor(apply(baskets[,quad[i,]],1,sum)/4)))
}

matrix(c(heading[quad.counts[,1:4]],quad.counts[,5]), n.comb.4, 5)

   # Look for influences
for (i in 1:length(item.in.basket)){    # Run through all the singles
   for (j in 1:nrow(triple)){      # For each single, look at each double
      one.in <- i == double[j,]
      if (any(one.in)) {               # Test to see if single in current double
         s.c <- item.in.basket[i]     # It is, so we get the appropriate counts.
         d.c <- double.counts[j, 3]
         item <- double[j, which(!one.in)]
         other <- heading[i]
         cat("When ", other,
               " is purchased (", s.c, "/", n.baskets, " = ",
               round(100*s.c/n.baskets, 2),"%) Support(", other, ")\n   ",
               heading[item], " was purchased (", d.c, "/", s.c,
               " = ", round(100*d.c/s.c, 2),"%)  Confidence\n", sep = "")
         cat("   Overall ", heading[item], " purchase rate is ", percent.in.basket[item],
                  "%)  Support(", heading[item], "),\n", sep = "")
         cat("   Lift ", round(10000*d.c/s.c, 0)/percent.in.basket[item], "%\n", sep = "")
      }
   }
}

for (i in 1:nrow(double)){    # Run through all the doubles
   for (j in 1:nrow(triple)){ # For each double, look at each triple
      one.in <- double[i,1] == triple[j,]
      two.in <- double[i,2] == triple[j,]
      if (sum(two.in*1 + one.in*1) == 2) { # Test to see if double in current triple
         d.c <- double.counts[i, 3]     # It is, so we get the appropriate counts.
         t.c <- triple.counts[j, 4]
         item <- triple[j, which(!(two.in|one.in))]
         others <- paste(heading[double[i,1]],heading[double[i,2]], sep = "/")
         cat("When ", others,
               " are purchased (", d.c, "/", n.baskets, " = ",
               round(100*d.c/n.baskets, 2),"%) Support(", others, ")\n   ",
               heading[item], " was purchased (", t.c,"/",d.c,
               " = ", round(100*t.c/d.c, 2),"%)  Confidence\n", sep = "")
         cat("   Overall ", heading[item], " purchase rate is ", percent.in.basket[item],
                  "%)  Support(", heading[item], "),\n", sep = "")
         cat("   Lift ", round(10000*t.c/d.c, 0)/percent.in.basket[item], "%\n", sep = "")
      }
   }
}

for (i in 1:nrow(triple)){    # Run through all the triples
   for (j in 1:nrow(quad)){ # For each triple, look at each quad
      one.in <- triple[i, 1] == quad[j,]
      two.in <- triple[i, 2] == quad[j,]
      three.in <- triple[i, 3] == quad[j,]
      if (sum(two.in*1 + one.in*1 + three.in*1) == 3) { # Test to see if triple in current quad
         t.c <- triple.counts[i, 4]     # It is, so we get the appropriate counts.
         q.c <- quad.counts[j, 5]
         item <- quad[j, which(!(three.in|two.in|one.in))]
         others <- paste(heading[triple[i,]], collapse = "/")
         cat("When ", others,
               " are purchased (", t.c, "/", n.baskets, " = ",
               round(100*t.c/n.baskets, 2),"%) Support(", others, ")\n   ",
               heading[item], " was purchased (", q.c,"/", t.c,
               " = ", round(100*q.c/t.c, 2),"%)  Confidence\n", sep = "")
         cat("   Overall ", heading[item], " purchase rate is ", percent.in.basket[item],
                  "%)  Support(", heading[item], "),\n", sep = "")
         cat("   Lift ", round(10000*q.c/t.c, 0)/percent.in.basket[item], "%\n", sep = "")
      }
   }
}

library(arules)
rules <- apriori(apply(baskets, 2, as.numeric), parameter = list(supp = 0.01, conf = 0.01, target = "rules"))

summary(rules)
inspect(rules)


   # For use by apriori
Heading <- c("Milk", "Peanut.Butter", "Bread", "Cereal", "Jelly")
write(Heading[which(baskets[1,]==1)],
      ncolumns = 5,
      file = "I:/2016-17/STAT5703/Data/MB.tab",                     
      append = FALSE)
for(i in 2:n.baskets) {
  if (sum(baskets[i,]==1)>0){
    write(Heading[which(baskets[i,]==1)],
          ncolumns = 5,
          file = "I:/2016-17/STAT5703/Data/MB.tab",
          append = TRUE)
  } else {  # Put meat in any empty baskets
    write("Meat",
          ncolumns = 5,
          file = "I:/2016-17/STAT5703/Data/MB.tab",
            append = TRUE)
  }
}