public class DistinctBy
extends org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>
define DistinctBy datafu.pig.bags.DistinctBy('0');
-- input:
-- ({(a, 1),(a,1),(b, 2),(b,22),(c, 3),(d, 4)})
input = LOAD 'input' AS (B: bag {T: tuple(alpha:CHARARRAY, numeric:INT)});
output = FOREACH input GENERATE DistinctBy(B);
-- output:
-- ({(a,1),(b,2),(c,3),(d,4)})
Constructor and Description |
---|
DistinctBy(java.lang.String... fields) |
Modifier and Type | Method and Description |
---|---|
void |
accumulate(org.apache.pig.data.Tuple input) |
void |
cleanup() |
org.apache.pig.data.DataBag |
getValue() |
org.apache.pig.impl.logicalLayer.schema.Schema |
outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input) |
allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn
public void accumulate(org.apache.pig.data.Tuple input) throws java.io.IOException
accumulate
in interface org.apache.pig.Accumulator<org.apache.pig.data.DataBag>
accumulate
in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>
java.io.IOException
public void cleanup()
cleanup
in interface org.apache.pig.Accumulator<org.apache.pig.data.DataBag>
cleanup
in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>
public org.apache.pig.data.DataBag getValue()
getValue
in interface org.apache.pig.Accumulator<org.apache.pig.data.DataBag>
getValue
in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.DataBag>
public org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
outputSchema
in class org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>