@TriggerWhenEmpty @InputRequirement(value=INPUT_FORBIDDEN) @Tags(value={"hadoop","HDFS","get","fetch","ingest","source","filesystem","restricted"}) @CapabilityDescription(value="Fetch files from Hadoop Distributed File System (HDFS) into FlowFiles. This Processor will delete the file from HDFS after fetching it.") @WritesAttribute(attribute="filename",description="The name of the file that was read from HDFS.") @WritesAttribute(attribute="path",description="The path is set to the relative path of the file\'s directory on HDFS. For example, if the Directory property is set to /tmp, then files picked up from /tmp will have the path attribute set to \"./\". If the Recurse Subdirectories property is set to true and a file is picked up from /tmp/abc/1/2/3, then the path attribute will be set to \"abc/1/2/3\".") @SeeAlso(value={PutHDFS.class,ListHDFS.class}) @Restricted(value="Provides operator the ability to retrieve and delete any file that NiFi has access to in HDFS or the local filesystem.") public class GetHDFS extends AbstractHadoopProcessor
| Modifier and Type | Class and Description |
|---|---|
protected static class |
GetHDFS.ProcessorConfiguration
Holder for a snapshot in time of some processor properties that are passed around.
|
AbstractHadoopProcessor.HdfsResources, AbstractHadoopProcessor.ValidationResources| Modifier and Type | Field and Description |
|---|---|
static PropertyDescriptor |
BATCH_SIZE |
static PropertyDescriptor |
BUFFER_SIZE |
static int |
BUFFER_SIZE_DEFAULT |
static java.lang.String |
BUFFER_SIZE_KEY |
static PropertyDescriptor |
FILE_FILTER_REGEX |
static PropertyDescriptor |
FILTER_MATCH_NAME_ONLY |
static PropertyDescriptor |
IGNORE_DOTTED_FILES |
static PropertyDescriptor |
KEEP_SOURCE_FILE |
static PropertyDescriptor |
MAX_AGE |
static int |
MAX_WORKING_QUEUE_SIZE |
static PropertyDescriptor |
MIN_AGE |
static PropertyDescriptor |
POLLING_INTERVAL |
protected GetHDFS.ProcessorConfiguration |
processorConfig |
static PropertyDescriptor |
RECURSE_SUBDIRS |
static Relationship |
REL_SUCCESS |
ABSOLUTE_HDFS_PATH_ATTRIBUTE, ADDITIONAL_CLASSPATH_RESOURCES, COMPRESSION_CODEC, DIRECTORY, HADOOP_CONFIGURATION_RESOURCES, KERBEROS_RELOGIN_PERIOD, kerberosProperties, properties| Constructor and Description |
|---|
GetHDFS() |
| Modifier and Type | Method and Description |
|---|---|
protected java.util.Collection<ValidationResult> |
customValidate(ValidationContext context)
Allows subclasses to perform their own validation on the already set
properties.
|
java.util.Set<Relationship> |
getRelationships() |
protected java.util.List<PropertyDescriptor> |
getSupportedPropertyDescriptors()
Allows subclasses to register which property descriptor objects are
supported.
|
void |
onScheduled(ProcessContext context) |
void |
onTrigger(ProcessContext context,
ProcessSession session) |
protected java.util.Set<org.apache.hadoop.fs.Path> |
performListing(ProcessContext context)
Do a listing of HDFS if the POLLING_INTERVAL has lapsed.
|
protected void |
processBatchOfFiles(java.util.List<org.apache.hadoop.fs.Path> files,
ProcessContext context,
ProcessSession session) |
protected java.util.Set<org.apache.hadoop.fs.Path> |
selectFiles(org.apache.hadoop.fs.FileSystem hdfs,
org.apache.hadoop.fs.Path dir,
java.util.Set<org.apache.hadoop.fs.Path> filesVisited)
Poll HDFS for files to process that match the configured file filters.
|
abstractOnScheduled, abstractOnStopped, checkHdfsUriForTimeout, getCompressionCodec, getConfiguration, getFileSystem, getFileSystem, getFileSystemAsUser, getKerberosProperties, getPathDifference, getUserGroupInformation, init, isTicketOld, preProcessConfiguration, tryKerberosReloginonTriggergetControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTrueequals, getPropertyDescriptor, getPropertyDescriptors, getSupportedDynamicPropertyDescriptor, hashCode, onPropertyModified, validateclone, finalize, getClass, notify, notifyAll, wait, wait, waitgetPropertyDescriptor, getPropertyDescriptors, onPropertyModified, validatepublic static final java.lang.String BUFFER_SIZE_KEY
public static final int BUFFER_SIZE_DEFAULT
public static final int MAX_WORKING_QUEUE_SIZE
public static final Relationship REL_SUCCESS
public static final PropertyDescriptor RECURSE_SUBDIRS
public static final PropertyDescriptor KEEP_SOURCE_FILE
public static final PropertyDescriptor FILE_FILTER_REGEX
public static final PropertyDescriptor FILTER_MATCH_NAME_ONLY
public static final PropertyDescriptor IGNORE_DOTTED_FILES
public static final PropertyDescriptor MIN_AGE
public static final PropertyDescriptor MAX_AGE
public static final PropertyDescriptor BATCH_SIZE
public static final PropertyDescriptor POLLING_INTERVAL
public static final PropertyDescriptor BUFFER_SIZE
protected GetHDFS.ProcessorConfiguration processorConfig
public java.util.Set<Relationship> getRelationships()
getRelationships in interface ProcessorgetRelationships in class AbstractSessionFactoryProcessorprotected java.util.List<PropertyDescriptor> getSupportedPropertyDescriptors()
AbstractConfigurableComponentgetSupportedPropertyDescriptors in class AbstractHadoopProcessorprotected java.util.Collection<ValidationResult> customValidate(ValidationContext context)
AbstractConfigurableComponentcustomValidate in class AbstractHadoopProcessorcontext - provides a mechanism for obtaining externally
managed values, such as property values and supplies convenience methods
for operating on those values@OnScheduled public void onScheduled(ProcessContext context) throws java.io.IOException
java.io.IOExceptionpublic void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException
onTrigger in class AbstractProcessorProcessExceptionprotected void processBatchOfFiles(java.util.List<org.apache.hadoop.fs.Path> files,
ProcessContext context,
ProcessSession session)
protected java.util.Set<org.apache.hadoop.fs.Path> performListing(ProcessContext context) throws java.io.IOException
context - contextjava.io.IOException - exprotected java.util.Set<org.apache.hadoop.fs.Path> selectFiles(org.apache.hadoop.fs.FileSystem hdfs,
org.apache.hadoop.fs.Path dir,
java.util.Set<org.apache.hadoop.fs.Path> filesVisited)
throws java.io.IOException
hdfs - hdfsdir - dirfilesVisited - filesVisitedjava.io.IOException - ex