datafu.pig.linkanalysis
Class PageRankImpl

java.lang.Object
  extended by datafu.pig.linkanalysis.PageRankImpl

public class PageRankImpl
extends java.lang.Object

An implementation of PageRank, used by the PageRank UDF. It is not intended to be used directly.


Constructor Summary
PageRankImpl()
           
 
Method Summary
 void addNode(java.lang.Integer sourceId, java.util.ArrayList<java.util.Map<java.lang.String,java.lang.Object>> sourceEdges)
           
 void addNode(java.lang.Integer sourceId, java.util.ArrayList<java.util.Map<java.lang.String,java.lang.Object>> sourceEdges, float bias)
           
 void clear()
           
 void commit(datafu.pig.linkanalysis.ProgressIndicator progressIndicator)
           
 void disableDanglingNodeHandling()
          Disables dangling node handling (disabled by default).
 void disableEdgeDiskCaching()
          Disable disk caching of edges once there are too many (disabled by default).
 void disableNodeBiasing()
           
 void distribute(datafu.pig.linkanalysis.ProgressIndicator progressIndicator)
           
 long edgeCount()
           
 void enableDanglingNodeHandling()
          Enables dangling node handling (disabled by default).
 void enableEdgeDiskCaching()
          Enable disk caching of edges once there are too many (disabled by default).
 void enableNodeBiasing()
           
 float getAlpha()
          Gets the page rank alpha value.
 long getEdgeCachingThreshold()
          Gets the number of edges past which they will be cached on disk instead of in memory.
 float getNodeBias(int nodeId)
           
 it.unimi.dsi.fastutil.ints.Int2IntMap.FastEntrySet getNodeIds()
           
 float getNodeRank(int nodeId)
           
 float getTotalRankChange()
           
 void init()
           
 void init(datafu.pig.linkanalysis.ProgressIndicator progressIndicator)
           
 boolean isEdgeDiskCachingEnabled()
          Gets whether edge disk caching is enabled.
 boolean isNodeBiasingEnabled()
           
 boolean isUsingEdgeDiskCache()
          Gets whether disk is being used to cache edges.
 float nextIteration()
           
 float nextIteration(datafu.pig.linkanalysis.ProgressIndicator progressIndicator)
           
 long nodeCount()
           
 void setAlpha(float alpha)
          Sets the page rank alpha value (default is 0.85);
 void setEdgeCachingThreshold(long count)
          Set the number of edges past which they will be cached on disk instead of in memory.
 void setNodeBias(int nodeId, float bias)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

PageRankImpl

public PageRankImpl()
Method Detail

clear

public void clear()
           throws java.io.IOException
Throws:
java.io.IOException

getAlpha

public float getAlpha()
Gets the page rank alpha value.

Returns:
alpha

setAlpha

public void setAlpha(float alpha)
Sets the page rank alpha value (default is 0.85);

Parameters:
alpha -

isNodeBiasingEnabled

public boolean isNodeBiasingEnabled()

enableNodeBiasing

public void enableNodeBiasing()

disableNodeBiasing

public void disableNodeBiasing()

isUsingEdgeDiskCache

public boolean isUsingEdgeDiskCache()
Gets whether disk is being used to cache edges.

Returns:
True if the edges are cached on disk.

enableEdgeDiskCaching

public void enableEdgeDiskCaching()
Enable disk caching of edges once there are too many (disabled by default).


disableEdgeDiskCaching

public void disableEdgeDiskCaching()
Disable disk caching of edges once there are too many (disabled by default).


isEdgeDiskCachingEnabled

public boolean isEdgeDiskCachingEnabled()
Gets whether edge disk caching is enabled.

Returns:
True if edge disk caching is enabled.

getEdgeCachingThreshold

public long getEdgeCachingThreshold()
Gets the number of edges past which they will be cached on disk instead of in memory. Edge disk caching must be enabled for this to have any effect.

Returns:
Edge count past which caching occurs

setEdgeCachingThreshold

public void setEdgeCachingThreshold(long count)
Set the number of edges past which they will be cached on disk instead of in memory. Edge disk caching must be enabled for this to have any effect.

Parameters:
count - Edge count past which caching occurs

enableDanglingNodeHandling

public void enableDanglingNodeHandling()
Enables dangling node handling (disabled by default).


disableDanglingNodeHandling

public void disableDanglingNodeHandling()
Disables dangling node handling (disabled by default).


nodeCount

public long nodeCount()

edgeCount

public long edgeCount()

getNodeIds

public it.unimi.dsi.fastutil.ints.Int2IntMap.FastEntrySet getNodeIds()

getNodeRank

public float getNodeRank(int nodeId)

getTotalRankChange

public float getTotalRankChange()

getNodeBias

public float getNodeBias(int nodeId)

setNodeBias

public void setNodeBias(int nodeId,
                        float bias)

addNode

public void addNode(java.lang.Integer sourceId,
                    java.util.ArrayList<java.util.Map<java.lang.String,java.lang.Object>> sourceEdges)
             throws java.io.IOException
Throws:
java.io.IOException

addNode

public void addNode(java.lang.Integer sourceId,
                    java.util.ArrayList<java.util.Map<java.lang.String,java.lang.Object>> sourceEdges,
                    float bias)
             throws java.io.IOException
Throws:
java.io.IOException

init

public void init()
          throws java.io.IOException
Throws:
java.io.IOException

init

public void init(datafu.pig.linkanalysis.ProgressIndicator progressIndicator)
          throws java.io.IOException
Throws:
java.io.IOException

nextIteration

public float nextIteration(datafu.pig.linkanalysis.ProgressIndicator progressIndicator)
                    throws java.io.IOException
Throws:
java.io.IOException

nextIteration

public float nextIteration()
                    throws java.io.IOException
Throws:
java.io.IOException

distribute

public void distribute(datafu.pig.linkanalysis.ProgressIndicator progressIndicator)
                throws java.io.IOException
Throws:
java.io.IOException

commit

public void commit(datafu.pig.linkanalysis.ProgressIndicator progressIndicator)


Matthew Hayes, Sam Shah