datafu.pig.bags
Class CountEach

java.lang.Object
  extended by org.apache.pig.EvalFunc<T>
      extended by org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>
          extended by datafu.pig.bags.CountEach
All Implemented Interfaces:
org.apache.pig.Accumulator<org.apache.pig.data.DataBag>

public class CountEach
extends org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>

Generates a count of the number of times each distinct tuple appears in a bag.

Example:

 DEFINE CountEach datafu.pig.bags.CountEach();
 DEFINE CountEachFlatten datafu.pig.bags.CountEach('flatten');
 
 -- input: 
 -- ({(A),(A),(C),(B)})
 input = LOAD 'input' AS (B: bag {T: tuple(alpha:CHARARRAY, numeric:INT)});
 
 -- output: 
 -- {((A),2),((C),1),((B),1)}
 output = FOREACH input GENERATE CountEach(B); 
 
 -- output_flatten: 
 -- ({(A,2),(C,1),(B,1)})
 output_flatten = FOREACH input GENERATE CountEachFlatten(B);
  
 


Field Summary
 
Fields inherited from class org.apache.pig.EvalFunc
log, pigLogger, reporter, returnType
 
Constructor Summary
CountEach()
           
CountEach(java.lang.String arg)
           
 
Method Summary
 void accumulate(org.apache.pig.data.Tuple input)
           
 void cleanup()
           
 org.apache.pig.data.DataBag getValue()
           
 org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
           
 
Methods inherited from class org.apache.pig.AccumulatorEvalFunc
exec
 
Methods inherited from class org.apache.pig.EvalFunc
finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

CountEach

public CountEach()

CountEach

public CountEach(java.lang.String arg)
Method Detail

accumulate

public void accumulate(org.apache.pig.data.Tuple input)
                throws java.io.IOException
Specified by:
accumulate in interface org.apache.pig.Accumulator<org.apache.pig.data.DataBag>
Specified by:
accumulate in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>
Throws:
java.io.IOException

getValue

public org.apache.pig.data.DataBag getValue()
Specified by:
getValue in interface org.apache.pig.Accumulator<org.apache.pig.data.DataBag>
Specified by:
getValue in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>

cleanup

public void cleanup()
Specified by:
cleanup in interface org.apache.pig.Accumulator<org.apache.pig.data.DataBag>
Specified by:
cleanup in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>

outputSchema

public org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
Overrides:
outputSchema in class org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>


Matthew Hayes, Sam Shah