public class OrcFlowFileWriter
extends java.lang.Object
implements org.apache.hadoop.hive.ql.io.orc.Writer, org.apache.hadoop.hive.ql.io.orc.MemoryManager.Callback
This class is synchronized so that multi-threaded access is ok. In particular, because the MemoryManager is shared between writers, this class assumes that checkMemory may be called from a separate thread.
Constructor and Description |
---|
OrcFlowFileWriter(java.io.OutputStream flowFileOutputStream,
org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector,
long stripeSize,
org.apache.hadoop.hive.ql.io.orc.CompressionKind compress,
int bufferSize,
int rowIndexStride,
org.apache.hadoop.hive.ql.io.orc.MemoryManager memoryManager,
boolean addBlockPadding,
org.apache.hadoop.hive.ql.io.orc.OrcFile.Version version,
org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterCallback callback,
org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy encodingStrategy,
org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy compressionStrategy,
float paddingTolerance,
long blockSizeValue,
java.lang.String bloomFilterColumnNames,
double bloomFilterFpp) |
Modifier and Type | Method and Description |
---|---|
void |
addRow(java.lang.Object row) |
void |
addRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch batch) |
void |
addUserMetadata(java.lang.String name,
java.nio.ByteBuffer value) |
void |
appendStripe(byte[] stripe,
int offset,
int length,
org.apache.hadoop.hive.ql.io.orc.StripeInformation stripeInfo,
org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics stripeStatistics) |
void |
appendUserMetadata(java.util.List<org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem> userMetadata) |
boolean |
checkMemory(double newScale) |
void |
close() |
static org.apache.hadoop.hive.ql.io.orc.CompressionCodec |
createCodec(org.apache.hadoop.hive.ql.io.orc.CompressionKind kind) |
long |
getNumberOfRows()
Row count gets updated when flushing the stripes.
|
long |
getRawDataSize()
Raw data size will be compute when writing the file footer.
|
java.io.OutputStream |
getStream() |
long |
writeIntermediateFooter() |
public OrcFlowFileWriter(java.io.OutputStream flowFileOutputStream, org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector, long stripeSize, org.apache.hadoop.hive.ql.io.orc.CompressionKind compress, int bufferSize, int rowIndexStride, org.apache.hadoop.hive.ql.io.orc.MemoryManager memoryManager, boolean addBlockPadding, org.apache.hadoop.hive.ql.io.orc.OrcFile.Version version, org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterCallback callback, org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy encodingStrategy, org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy compressionStrategy, float paddingTolerance, long blockSizeValue, java.lang.String bloomFilterColumnNames, double bloomFilterFpp) throws java.io.IOException
java.io.IOException
public static org.apache.hadoop.hive.ql.io.orc.CompressionCodec createCodec(org.apache.hadoop.hive.ql.io.orc.CompressionKind kind)
public boolean checkMemory(double newScale) throws java.io.IOException
checkMemory
in interface org.apache.hadoop.hive.ql.io.orc.MemoryManager.Callback
java.io.IOException
public java.io.OutputStream getStream() throws java.io.IOException
java.io.IOException
public void addUserMetadata(java.lang.String name, java.nio.ByteBuffer value)
addUserMetadata
in interface org.apache.hadoop.hive.ql.io.orc.Writer
public void addRow(java.lang.Object row) throws java.io.IOException
addRow
in interface org.apache.hadoop.hive.ql.io.orc.Writer
java.io.IOException
public void addRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch batch) throws java.io.IOException
java.io.IOException
public void close() throws java.io.IOException
close
in interface org.apache.hadoop.hive.ql.io.orc.Writer
java.io.IOException
public long getRawDataSize()
getRawDataSize
in interface org.apache.hadoop.hive.ql.io.orc.Writer
public long getNumberOfRows()
getNumberOfRows
in interface org.apache.hadoop.hive.ql.io.orc.Writer
public long writeIntermediateFooter() throws java.io.IOException
writeIntermediateFooter
in interface org.apache.hadoop.hive.ql.io.orc.Writer
java.io.IOException
public void appendStripe(byte[] stripe, int offset, int length, org.apache.hadoop.hive.ql.io.orc.StripeInformation stripeInfo, org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics stripeStatistics) throws java.io.IOException
appendStripe
in interface org.apache.hadoop.hive.ql.io.orc.Writer
java.io.IOException
public void appendUserMetadata(java.util.List<org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem> userMetadata)
appendUserMetadata
in interface org.apache.hadoop.hive.ql.io.orc.Writer