import java.io.*; 
import java.util.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.util.*;
import org.apache.hadoop.mapreduce.lib.input.*; // for FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.*;

public class mapcounter extends Mapper<LongWritable, Text, IntWritable, catval>
{
    int tid; // taks id within job - used as part of output key
    int filenum; // file number this mapper is reading from

    // override the setup function to provide addtional configuration:
    protected void setup(Context context) throws IOException, InterruptedException
    {
        Configuration config = context.getConfiguration();
        String taskid = config.get("mapred.task.partition"); // get task id
        tid = Integer.parseInt(taskid);


	// Another possibility is to get the current file name, but
	// config.get("map.input.file") is not working (Hadoop .20.2 bug). 
        // Instead, one has to call
        String filename = 
	    ((FileSplit) context.getInputSplit()).getPath().getName();
	filenum = Integer.parseInt(filename.substring(3,5));
	super.setup(context); //  default setup.
    }// setup

    public static long countcats(String line)
    {
	long count =0; // local counter
	int i = 0; // position counter
	while (i<line.length()-2)
	    {
		if (line.substring(i,i+3).equals("CAT")) count++;
		i++;
	    }// counter loop.
	return count;
    }//countcats

    protected void map(LongWritable inkey, Text invalue, Context context) throws IOException, InterruptedException
    { 
	// split line by white spaces into vector of strings
	if (invalue==null || invalue.toString()==null || invalue.toString().length()<2) { return; }
	String line = invalue.toString();
	long count = countcats(line);
	IntWritable outkey = new IntWritable(filenum);
	catval outval = new catval(count,line.substring(0,2),line.substring(line.length()-2,line.length()),filenum);

	outval.linenum = ((int)(inkey.get()))/17; //tid; // task id
	context.write(outkey,outval);
    }//map
}//mapcounter