|
The SAX Parser locator Facility
|
This is yet another, working sample of maintaining location in a SAX parsing handler That is, getting accurate line/column numbers out of it. Scraping this to start from should give you a leg-up on your SAX parser whatever its purpose.
What's missing? Well, pretty quickly—depending on what you need a SAX parser for—you'll need some kind of stack storage and management. I have some examples of that elsewhere.
package com.windofkeltia.processor;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;
import com.windofkeltia.sax.Position;
public class Analyzer
{
private static final SAXParserFactory factory = SAXParserFactory.newInstance();
private final InputStream inputStream;
private final SAXParser parser;
private final AnalyzerHandler handler;
/**
* Look for an element, begin displaying output (which is just parsing status)
* until that element's close is found, then stop displaying output again.
*/
public Analyzer( InputStream flowfile, final String contentElement )
throws ParserConfigurationException, SAXException
{
parser = factory.newSAXParser();
inputStream = flowfile;
handler = new AnalyzerHandler( contentElement );
}
public void parse() throws IOException, SAXException
{
parser.parse( inputStream, handler );
}
public Position getStart() { return handler.getStart(); }
public Position getEnd() { return handler.getEnd(); }
}
Each entry point (handler method) prints out status including line- and column number and the element name (and attributes if relevant).
package com.windofkeltia.processor;
import java.util.HashMap;
import java.util.Map;
import static java.util.Objects.isNull;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.helpers.DefaultHandler;
import com.windofkeltia.sax.Position;
import com.windofkeltia.utilities.StringUtilities;
public class AnalyzerHandler extends DefaultHandler
{
private Locator locator;
private Position position = new Position(); // starting element position we maintain
private Position start = null;
private Position end = null;
private boolean outputOn = false; // allows us to start/stop some (output) action
private final String contentElement;
public AnalyzerHandler( final String contentElement )
{
super();
this.contentElement = contentElement;
System.out.println( "AnalyzerHandler():" );
}
public void startElement( String uri, String localName, String elementName, Attributes attributes )
{
if( elementName.equals( contentElement ) )
{
outputOn = true;
return;
}
if( outputOn )
{
if( isNull( start ) )
start = position;
StringBuilder sb = new StringBuilder();
sb.append( position.line ).append( ',' ).append( position.column ).append( ' ' );
Map< String, String > javaAttributes = getAttributesAsJavaMap( attributes );
sb.append( elementName );
System.out.println( sb );
}
}
public void endElement( String uri, String localName, String elementName )
{
Position start = position;
Position end = new Position( locator.getLineNumber(), locator.getColumnNumber() );
if( elementName.equals( contentElement ) )
outputOn = false;
if( !outputOn )
return;
if( isNull( end ) )
end = position;
StringBuilder sb = new StringBuilder();
sb.append( position.line ).append( ',' ).append( position.column ).append( ' ' );
sb.append( elementName );
System.out.println( sb );
// update the starting point for the next element
updateElementPoint( locator );
}
public void characters( char[] ch, int start, int length )
{
updateElementPoint( locator ); // now update the starting point
if( !outputOn )
return;
StringBuilder sb = new StringBuilder();
String characters = new String( ch, start, length ).trim();
if( !StringUtilities.isEmpty( characters ) )
{
sb.append( position.line ).append( ',' ).append( position.column ).append( ' ' );
sb.append( '\"' ).append( characters.trim() ).append( '\"' );
System.out.println( sb );
}
}
public void comment( char[] ch, int start, int length )
{
if( !outputOn )
return;
String comment = new String( ch, start, length );
System.out.println( " comment(): \"" + comment.trim() + "\"" );
}
public void startDocument() { if( outputOn ) System.out.println( " startDocument():" ); }
public void endDocument() { if( outputOn ) System.out.println( " endDocument():" ); }
public void setDocumentLocator( Locator location ) { locator = location; }
public Position getStart() { return start; }
public Position getEnd() { return end; }
private void updateElementPoint( Locator locator )
{
Position location = new Position( locator.getLineNumber(), locator.getColumnNumber() );
if( position.compareTo( location ) < 0 )
position = location;
}
/**
* Here's how to make SAX' attributes "Java-useful." If we had uri (namespaces) defined,
* we'd have to get a lot more serious about how to use uri, localName and qName.
*/
private Map< String, String > getAttributesAsJavaMap( Attributes saxAttributes )
{
int attrLength = saxAttributes.getLength();
Map< String, String > javaAttributes = new HashMap<>( attrLength );
for( int attr = 0; attr < attrLength; attr++ )
{
String attribute = saxAttributes.getQName( attr );
String value = saxAttributes.getValue( attr );
javaAttributes.put( attribute, value );
}
return javaAttributes;
}
private String javaAttributesAsString( Map< String, String > javaAttributes )
{
if( javaAttributes.size() == 0 )
return "";
StringBuilder sb = new StringBuilder();
for( Map.Entry< String, String > attribute : javaAttributes.entrySet() )
sb.append( attribute.getKey() )
.append( "=\"" )
.append( StringUtilities.smash( attribute.getValue() ) )
.append( "\", " );
sb.setLength( sb.length()-2 );
return sb.toString();
}
}
package com.windofkeltia.sax;
public class Position
{
public int line;
public int column;
public Position() { this.line = 1; this.column = 1; }
public Position( int line, int column ) { this.line = line; this.column = column; }
public void setLine( int line ) { this.line = line; }
public void setColumn( int column ) { this.column = column; }
public int getLine() { return line; }
public int getColumn() { return column; }
public int compareTo( Position position )
{
// if our location is past recorded line...
if( position.getLine() > getLine() )
return -1;
// if on recorded line, but past recorded column...
else if( position.getLine() == getLine() && position.getColumn() > getColumn() )
return -1;
// if on recorded line and also at recorded column...
else if( position.getLine() == getLine() && position.getColumn() == getColumn() )
return 0;
// we're before current line and/or current column...
else
return 1;
}
}
public static boolean isEmpty( String string ) { return( string == null || string.length() < 1 ); }
/**
* Remove all newlines, multiple spaces, tabs, etc. to neutralize this string. This is helpful
* when comparing test results where white space doesn't count. This does not remove all
* spaces--only multiple, adjacent ones.
*/
public static String smash( String string )
{
if( isEmpty( string ) )
return string;
String result = "";
String smashed = string.replaceAll( "\t", " " ).replaceAll( "\n", "" );
int length = smashed.length();
for( int ch = 0; ch < length; ch++ )
{
char c = smashed.charAt( ch );
if( c == ' ' )
{
while( ch+1 < length && smashed.charAt( ch+1 ) == ' ' )
ch++;
result += ' ';
}
else
{
result += c;
}
}
return result;
}
Much transformation (to simplfy) went into this so I don't guarantee it works as-is. Anyway, the output is supposed to be (simply) what's in CCD_BODY.
package com.windofkeltia.processor;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class AnalyzerTest
{
@After public void tearDown() { }
@Before public void setUp() { TestUtilities.setUp( name ); }
private static final boolean VERBOSE = true;
private static final String CCD_BODY = ""
+ "<ClinicalDocument xmlns=\"urn:hl7-org:v3\" xmlns:sdtc=\"urn:hl7-org:sdtc\"\n"
+ " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:schemaLocation=\"urn:hl7-org:v3\n"
+ " http://xreg2.nist.gov:8080/hitspValidation/schema/cdar2c32/infrastructure/cda/C32_CDA.xsd\">\n"
+ " <realmCode code=\"US\"/>\n"
+ " <typeId root=\"2.16.840.1.113883.1.3\" extension=\"POCD_HD000040\"/>\n"
+ " <component>\n"
+ " <stuff>\n"
+ " ...\n"
+ " </stuff>\n"
+ " </component>\n"
+ "</ClinicalDocument>\n";
private static final String FROM_JDBCTOXML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<record>\n"
+ " <justforfun>\n"
+ " ...\n"
+ " </justforfun>\n"
+ " <ccdmessage>\n"
+ CCD_BODY
+ " </ccdmessage>\n"
+ "</record>\n";
@Test
public void testAnalyzer() throws ParserConfigurationException, SAXException, IOException
{
String[] content = FROM_JDBCTOXML.split( "\n" );
System.out.println( "Input:" );
int lineNumber = 1;
for( String line : content )
System.out.println( lineNumber++ + " " + line );
System.out.println();
Analyzer analyzer = new Analyzer( new ByteArrayInputStream( FROM_JDBCTOXML.getBytes() ), "ccdmessage" );
analyzer.parse();
}
}