import java.io.*;
import java.util.*;
public class Apriori {
public static void
main(String[] args) {
AprioriCalculation ap =
new AprioriCalculation();
ap.aprioriProcess();
}
}
class AprioriCalculation
Vector<String>
candidates=new Vector<String>();
String
configFile="config.txt";
String
transaFile="transa.txt";
String
outputFile="apriori-output.txt";
int numItems;
int numTransactions;
double minSup;
String oneVal[];
String itemSep = " ";
public void aprioriProcess()
{
Date d;
long start, end;
int itemsetNumber=0;
//get config
getConfig();
System.out.println("Apriori algorithm has started.\n");
//start timer
d = new Date();
start = d.getTime();
//while not complete
do
{
itemsetNumber++;
generateCandidates(itemsetNumber);
calculateFrequentItemsets(itemsetNumber);
if(candidates.size()!=0)
{
System.out.println("Frequent " + itemsetNumber +
"-itemsets");
System.out.println(candidates);
}
}while(candidates.size()>1);
//end timer
d = new Date();
end = d.getTime();
System.out.println("Execution time is:
"+((double)(end-start)/1000) + " seconds.");
}
public static String getInput()
{
String
input="";
BufferedReader reader =
new BufferedReader(new InputStreamReader(System.in));
try
{
input =
reader.readLine();
}
catch (Exception e)
{
System.out.println(e);
}
return input;
}
private void getConfig()
{
FileWriter fw;
BufferedWriter file_out;
String
input="";
System.out.println("Default Configuration: ");
System.out.println("\tRegular transaction file with '" +
itemSep + "' item separator.");
System.out.println("\tConfig
File: " + configFile);
System.out.println("\tTransa File: " + transaFile);
System.out.println("\tOutput File: " + outputFile);
System.out.println("\nPress 'C' to change the item separator,
configuration file and transaction files");
System.out.print("or
any other key to continue. ");
input=getInput();
if(input.compareToIgnoreCase("c")==0)
{
System.out.print("Enter new transaction filename (return for
'"+transaFile+"'): ");
input=getInput();
if(input.compareToIgnoreCase("")!=0)
transaFile=input;
System.out.print("Enter new configuration filename (return for
'"+configFile+"'): ");
input=getInput();
if(input.compareToIgnoreCase("")!=0)
configFile=input;
System.out.print("Enter new output filename (return for
'"+outputFile+"'): ");
input=getInput();
if(input.compareToIgnoreCase("")!=0)
outputFile=input;
System.out.println("Filenames changed");
System.out.print("Enter the separating character(s) for items
(return for '"+itemSep+"'): ");
input=getInput();
if(input.compareToIgnoreCase("")!=0)
itemSep=input;
}
try
{
FileInputStream
file_in = new FileInputStream(configFile);
BufferedReader
data_in = new BufferedReader(new InputStreamReader(file_in));
//number of items
numItems=Integer.valueOf(data_in.readLine()).intValue();
//number of
transactions
numTransactions=Integer.valueOf(data_in.readLine()).intValue();
//minsup
minSup=(Double.valueOf(data_in.readLine()).doubleValue());
//output config info
to the user
System.out.print("\nInput configuration: "+numItems+"
items, "+numTransactions+" transactions, ");
System.out.println("minsup = "+minSup+"%");
System.out.println();
minSup/=100.0;
oneVal = new
String[numItems];
System.out.print("Enter 'y' to change the value each row recognizes
as a '1':");
if(getInput().compareToIgnoreCase("y")==0)
{
for(int i=0;
i<oneVal.length; i++)
{
System.out.print("Enter value for column #" + (i+1) + ":
");
oneVal[i] =
getInput();
}
}
else
for(int i=0; i<oneVal.length;
i++)
oneVal[i]="1";
fw= new
FileWriter(outputFile);
file_out = new
BufferedWriter(fw);
file_out.write(numTransactions + "\n");
file_out.write(numItems
+ "\n******\n");
file_out.close();
}
catch(IOException
e)
{
System.out.println(e);
}
}
private void
generateCandidates(int n)
{
Vector<String>
tempCandidates = new Vector<String>();
String str1, str2;
StringTokenizer st1, st2;
if(n==1)
{
for(int i=1;
i<=numItems; i++)
{
tempCandidates.add(Integer.toString(i));
}
}
else if(n==2) {
for(int i=0;
i<candidates.size(); i++)
{
st1 = new
StringTokenizer(candidates.get(i));
str1 =
st1.nextToken();
for(int j=i+1;
j<candidates.size(); j++)
{
st2 = new
StringTokenizer(candidates.elementAt(j));
str2 =
st2.nextToken();
tempCandidates.add(str1 + " " + str2);
}
}
}
else
{
for(int i=0;
i<candidates.size(); i++)
{
for(int j=i+1;
j<candidates.size(); j++)
{
str1 = new
String();
str2 = new
String();
st1 = new
StringTokenizer(candidates.get(i));
st2 = new
StringTokenizer(candidates.get(j));
for(int s=0;
s<n-2; s++)
{
str1 =
str1 + " " + st1.nextToken();
str2 =
str2 + " " + st2.nextToken();
}
if(str2.compareToIgnoreCase(str1)==0)
tempCandidates.add((str1 + " " + st1.nextToken() + "
" + st2.nextToken()).trim());
}
}
}
candidates.clear();
candidates = new
Vector<String>(tempCandidates);
tempCandidates.clear();
}
private void
calculateFrequentItemsets(int n)
{
Vector<String>
frequentCandidates = new Vector<String>();
FileInputStream
file_in;
BufferedReader data_in;
FileWriter fw;
BufferedWriter file_out;
StringTokenizer st,
stFile;
boolean match;
boolean trans[] = new
boolean[numItems];
int count[] = new
int[candidates.size()];
try
{
//output file
fw= new
FileWriter(outputFile, true);
file_out = new
BufferedWriter(fw);
//load the
transaction file
file_in = new
FileInputStream(transaFile);
data_in = new
BufferedReader(new InputStreamReader(file_in));
for(int i=0;
i<numTransactions; i++)
{
stFile = new
StringTokenizer(data_in.readLine(), itemSep); //read a line from the file to
the tokenizer
for(int j=0;
j<numItems; j++)
{
trans[j]=(stFile.nextToken().compareToIgnoreCase(oneVal[j])==0); //if it
is not a 0, assign the value to true
}
for(int c=0;
c<candidates.size(); c++)
{
match =
false;
st = new
StringTokenizer(candidates.get(c));
while(st.hasMoreTokens())
{
match
= (trans[Integer.valueOf(st.nextToken())-1]);
if(!match)
break;
}
if(match)
count[c]++;
}
}
for(int i=0;
i<candidates.size(); i++)
{
if((count[i]/(double)numTransactions)>=minSup)
{
frequentCandidates.add(candidates.get(i));
file_out.write(candidates.get(i) + "," +
count[i]/(double)numTransactions + "\n");
}
}
file_out.write("-\n");
file_out.close();
}
catch(IOException e)
{
System.out.println(e);
}
candidates.clear();
candidates = new
Vector<String>(frequentCandidates);
frequentCandidates.clear();
}
}
/*OUTPUT:
Default Configuration:
Regular transaction file
with ' ' item separator.
Config File: config.txt
Transa File: transa.txt
Output File:
apriori-output.txt
Press 'C' to change the item separator, configuration file and
transaction files
or any other key to continue.
Input configuration: 5 items, 4 transactions, minsup = 50.0%
Enter 'y' to change the value each row recognizes as a '1':
Apriori algorithm has started.
Frequent 1-itemsets
[1, 2, 3, 5]
Frequent 2-itemsets
[1 3, 2 3, 2 5, 3 5]
Frequent 3-itemsets
[2 3 5]
Execution time is: 0.021 seconds.
*/