@TriggerWhenEmpty @InputRequirement(value=INPUT_FORBIDDEN) @Tags(value={"hadoop","HDFS","get","fetch","ingest","source","filesystem","restricted"}) @CapabilityDescription(value="Fetch files from Hadoop Distributed File System (HDFS) into FlowFiles. This Processor will delete the file from HDFS after fetching it.") @WritesAttribute(attribute="filename",description="The name of the file that was read from HDFS.") @WritesAttribute(attribute="path",description="The path is set to the relative path of the file\'s directory on HDFS. For example, if the Directory property is set to /tmp, then files picked up from /tmp will have the path attribute set to \"./\". If the Recurse Subdirectories property is set to true and a file is picked up from /tmp/abc/1/2/3, then the path attribute will be set to \"abc/1/2/3\".") @SeeAlso(value={PutHDFS.class,ListHDFS.class}) @Restricted(value="Provides operator the ability to retrieve and delete any file that NiFi has access to in HDFS or the local filesystem.") public class GetHDFS extends AbstractHadoopProcessor
Modifier and Type | Class and Description |
---|---|
protected static class |
GetHDFS.ProcessorConfiguration
Holder for a snapshot in time of some processor properties that are passed around.
|
AbstractHadoopProcessor.HdfsResources, AbstractHadoopProcessor.ValidationResources
Modifier and Type | Field and Description |
---|---|
static PropertyDescriptor |
BATCH_SIZE |
static PropertyDescriptor |
BUFFER_SIZE |
static int |
BUFFER_SIZE_DEFAULT |
static java.lang.String |
BUFFER_SIZE_KEY |
static PropertyDescriptor |
FILE_FILTER_REGEX |
static PropertyDescriptor |
FILTER_MATCH_NAME_ONLY |
static PropertyDescriptor |
IGNORE_DOTTED_FILES |
static PropertyDescriptor |
KEEP_SOURCE_FILE |
static PropertyDescriptor |
MAX_AGE |
static int |
MAX_WORKING_QUEUE_SIZE |
static PropertyDescriptor |
MIN_AGE |
static PropertyDescriptor |
POLLING_INTERVAL |
protected GetHDFS.ProcessorConfiguration |
processorConfig |
static PropertyDescriptor |
RECURSE_SUBDIRS |
static Relationship |
REL_SUCCESS |
ABSOLUTE_HDFS_PATH_ATTRIBUTE, ADDITIONAL_CLASSPATH_RESOURCES, COMPRESSION_CODEC, DIRECTORY, HADOOP_CONFIGURATION_RESOURCES, KERBEROS_RELOGIN_PERIOD, kerberosProperties, properties
Constructor and Description |
---|
GetHDFS() |
Modifier and Type | Method and Description |
---|---|
protected java.util.Collection<ValidationResult> |
customValidate(ValidationContext context)
Allows subclasses to perform their own validation on the already set
properties.
|
java.util.Set<Relationship> |
getRelationships() |
protected java.util.List<PropertyDescriptor> |
getSupportedPropertyDescriptors()
Allows subclasses to register which property descriptor objects are
supported.
|
void |
onScheduled(ProcessContext context) |
void |
onTrigger(ProcessContext context,
ProcessSession session) |
protected java.util.Set<org.apache.hadoop.fs.Path> |
performListing(ProcessContext context)
Do a listing of HDFS if the POLLING_INTERVAL has lapsed.
|
protected void |
processBatchOfFiles(java.util.List<org.apache.hadoop.fs.Path> files,
ProcessContext context,
ProcessSession session) |
protected java.util.Set<org.apache.hadoop.fs.Path> |
selectFiles(org.apache.hadoop.fs.FileSystem hdfs,
org.apache.hadoop.fs.Path dir,
java.util.Set<org.apache.hadoop.fs.Path> filesVisited)
Poll HDFS for files to process that match the configured file filters.
|
abstractOnScheduled, abstractOnStopped, checkHdfsUriForTimeout, getCompressionCodec, getConfiguration, getFileSystem, getFileSystem, getFileSystemAsUser, getKerberosProperties, getPathDifference, getUserGroupInformation, init, isTicketOld, preProcessConfiguration, tryKerberosRelogin
onTrigger
getControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTrue
equals, getPropertyDescriptor, getPropertyDescriptors, getSupportedDynamicPropertyDescriptor, hashCode, onPropertyModified, validate
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
getPropertyDescriptor, getPropertyDescriptors, onPropertyModified, validate
public static final java.lang.String BUFFER_SIZE_KEY
public static final int BUFFER_SIZE_DEFAULT
public static final int MAX_WORKING_QUEUE_SIZE
public static final Relationship REL_SUCCESS
public static final PropertyDescriptor RECURSE_SUBDIRS
public static final PropertyDescriptor KEEP_SOURCE_FILE
public static final PropertyDescriptor FILE_FILTER_REGEX
public static final PropertyDescriptor FILTER_MATCH_NAME_ONLY
public static final PropertyDescriptor IGNORE_DOTTED_FILES
public static final PropertyDescriptor MIN_AGE
public static final PropertyDescriptor MAX_AGE
public static final PropertyDescriptor BATCH_SIZE
public static final PropertyDescriptor POLLING_INTERVAL
public static final PropertyDescriptor BUFFER_SIZE
protected GetHDFS.ProcessorConfiguration processorConfig
public java.util.Set<Relationship> getRelationships()
getRelationships
in interface Processor
getRelationships
in class AbstractSessionFactoryProcessor
protected java.util.List<PropertyDescriptor> getSupportedPropertyDescriptors()
AbstractConfigurableComponent
getSupportedPropertyDescriptors
in class AbstractHadoopProcessor
protected java.util.Collection<ValidationResult> customValidate(ValidationContext context)
AbstractConfigurableComponent
customValidate
in class AbstractHadoopProcessor
context
- provides a mechanism for obtaining externally
managed values, such as property values and supplies convenience methods
for operating on those values@OnScheduled public void onScheduled(ProcessContext context) throws java.io.IOException
java.io.IOException
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException
onTrigger
in class AbstractProcessor
ProcessException
protected void processBatchOfFiles(java.util.List<org.apache.hadoop.fs.Path> files, ProcessContext context, ProcessSession session)
protected java.util.Set<org.apache.hadoop.fs.Path> performListing(ProcessContext context) throws java.io.IOException
context
- contextjava.io.IOException
- exprotected java.util.Set<org.apache.hadoop.fs.Path> selectFiles(org.apache.hadoop.fs.FileSystem hdfs, org.apache.hadoop.fs.Path dir, java.util.Set<org.apache.hadoop.fs.Path> filesVisited) throws java.io.IOException
hdfs
- hdfsdir
- dirfilesVisited
- filesVisitedjava.io.IOException
- ex