datafu.pig.stats
Class VAR

java.lang.Object
  extended by org.apache.pig.EvalFunc<java.lang.Double>
      extended by datafu.pig.stats.VAR
All Implemented Interfaces:
org.apache.pig.Accumulator<java.lang.Double>, org.apache.pig.Algebraic

public class VAR
extends org.apache.pig.EvalFunc<java.lang.Double>
implements org.apache.pig.Algebraic, org.apache.pig.Accumulator<java.lang.Double>

Generates the Variance of a set of Values. This UDF uses the fact that variance(x) = average(x^2) - average(x)^2 This class implements * Algebraic, so if possible the execution will performed in a distributed fashion. VAR implements the Accumulator interface as well. Input: Bag of int, long, double, float or bytearray Output: Double

Example:

 define VAR datafu.pig.stats.VAR();
 
 -- input: 1,2,3,4,10,5,6,7,8,9
 input = LOAD 'input' AS (val:int);
 grouped = GROUP input ALL;
 variance = FOREACH grouped GENERATE VAR(input.val) AS variance;
 


Nested Class Summary
static class VAR.Final
           
static class VAR.Initial
           
static class VAR.Intermediate
           
 
Field Summary
 
Fields inherited from class org.apache.pig.EvalFunc
log, pigLogger, reporter, returnType
 
Constructor Summary
VAR()
           
 
Method Summary
 void accumulate(org.apache.pig.data.Tuple b)
           
 void cleanup()
           
protected static org.apache.pig.data.Tuple combine(org.apache.pig.data.DataBag values)
           
protected static long count(org.apache.pig.data.Tuple input)
           
 java.lang.Double exec(org.apache.pig.data.Tuple input)
           
 java.util.List<org.apache.pig.FuncSpec> getArgToFuncMapping()
           
 java.lang.String getFinal()
           
 java.lang.String getInitial()
           
 java.lang.String getIntermed()
           
 java.lang.Double getValue()
           
 org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
           
protected static java.lang.Double sum(org.apache.pig.data.Tuple input)
           
protected static java.lang.Double sumSquare(org.apache.pig.data.Tuple input)
           
 
Methods inherited from class org.apache.pig.EvalFunc
finish, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

VAR

public VAR()
Method Detail

exec

public java.lang.Double exec(org.apache.pig.data.Tuple input)
                      throws java.io.IOException
Specified by:
exec in class org.apache.pig.EvalFunc<java.lang.Double>
Throws:
java.io.IOException

getInitial

public java.lang.String getInitial()
Specified by:
getInitial in interface org.apache.pig.Algebraic

getIntermed

public java.lang.String getIntermed()
Specified by:
getIntermed in interface org.apache.pig.Algebraic

getFinal

public java.lang.String getFinal()
Specified by:
getFinal in interface org.apache.pig.Algebraic

combine

protected static org.apache.pig.data.Tuple combine(org.apache.pig.data.DataBag values)
                                            throws org.apache.pig.backend.executionengine.ExecException
Throws:
org.apache.pig.backend.executionengine.ExecException

count

protected static long count(org.apache.pig.data.Tuple input)
                     throws org.apache.pig.backend.executionengine.ExecException
Throws:
org.apache.pig.backend.executionengine.ExecException

sum

protected static java.lang.Double sum(org.apache.pig.data.Tuple input)
                               throws org.apache.pig.backend.executionengine.ExecException,
                                      java.io.IOException
Throws:
org.apache.pig.backend.executionengine.ExecException
java.io.IOException

sumSquare

protected static java.lang.Double sumSquare(org.apache.pig.data.Tuple input)
                                     throws org.apache.pig.backend.executionengine.ExecException,
                                            java.io.IOException
Throws:
org.apache.pig.backend.executionengine.ExecException
java.io.IOException

outputSchema

public org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
Overrides:
outputSchema in class org.apache.pig.EvalFunc<java.lang.Double>

getArgToFuncMapping

public java.util.List<org.apache.pig.FuncSpec> getArgToFuncMapping()
                                                            throws org.apache.pig.impl.logicalLayer.FrontendException
Overrides:
getArgToFuncMapping in class org.apache.pig.EvalFunc<java.lang.Double>
Throws:
org.apache.pig.impl.logicalLayer.FrontendException

accumulate

public void accumulate(org.apache.pig.data.Tuple b)
                throws java.io.IOException
Specified by:
accumulate in interface org.apache.pig.Accumulator<java.lang.Double>
Throws:
java.io.IOException

cleanup

public void cleanup()
Specified by:
cleanup in interface org.apache.pig.Accumulator<java.lang.Double>

getValue

public java.lang.Double getValue()
Specified by:
getValue in interface org.apache.pig.Accumulator<java.lang.Double>


Matthew Hayes, Sam Shah