The SAX Parser locator Facility
Part 2

Russell Bateman
last update:




This details how to enhance SampleHandler, introduced in another note, to make use of ParserHandlerPrinter to output lines. We're not going to show columns, but only lines.

Caveat

As previouly noted, we read in SAX documentation that...

The return value from the method is intended only as an approximation for the sake of diagnostics; it is not intended to provide sufficient information to edit the character content of the original XML document. For example, when lines contain combining character sequences, wide characters, surrogate pairs, or bi-directional text, the value may not correspond to the column in a text editor's display.

More accurately, the Locator records the current line number at the end of parsing the construct, which simply means that the line number to associate with any, given construct, corresponds to the file line on which that construct ended. See very simple example below.

Adaptation of SampleHandler...

This is the original SAX parser-handler sample code given elsewhere, but enhanced to work offering, dependent upon a verbosity setting, printed output by ParserHandlerPrinter, a utility that parallels any SAX parser handler and prints out status information, in particular, the line number.

The integration of ParserHandlePrinter happens initially at the very bottom of this code and meaningfully at the end of each of the principal SAX parser-handler methods.

As will be observed, the SAX parser handler here must be written to exploit the SAX locator carefully.

Note class Position in this code. Its utility isn't shown in our example because it goes beyond basic Locator, but it attempts to record the line number at which a construct begins (rather than, as natively by Locator, the finishing line of the construct).

SampleHandler.java:
Click to show code.
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.helpers.DefaultHandler;

public class SampleHandler extends DefaultHandler
{
  private Locator  locator;
  private Position position = new Position(); // starting element position we maintain

  /** The SAX parser will call this to update locator as needed. */
  public void setDocumentLocator( Locator location ) { locator = location; }

  public void startDocument() { if( printerVerbosity > 0 ) printer.startDocument(); }
  public void endDocument()   { if( printerVerbosity > 0 ) printer.endDocument();   }

  public void startElement( String uri, String localName, String qName, Attributes attributes )
  {
    // do startElement() work here...
    if( printerVerbosity > 0 )
      printer.startElement( locator, qName, attributes );
  }

  public void endElement( String uri, String localName, String qName )
  {
    // We could get our source position--at the end
    Position start = position;
    Position end   = new Position( locator.getLineNumber(), locator.getColumnNumber() );

    // do endElement() work here

    // update the starting point for the next element
    updateElementPoint( locator );

    if( printerVerbosity > 0 )
      printer.endElement( locator, qName );
  }

  public void characters( char[] ch, int start, int length )
  {
    updateElementPoint( locator );  // now update the starting point

    if( printerVerbosity > 0 )
      printer.characters( locator, new String( ch, start, length ).trim() );
  }

  public void ignorableWhitespace( char[] ch, int start, int length )
  {
    updateElementPoint( locator ); // now update the starting point
  }

  private void updateElementPoint( Locator locator )
  {
    Position location = new Position( locator.getLineNumber(), locator.getColumnNumber() );
    if( position.compareTo( location ) < 0 )
      position = location;
  }

  /** Wrap and maintain the SAX locator to make it more accurate. */
  static class Position
  {
    private int line;
    private int column;

    public Position()                       { this.line   = 1;    this.column = 1; }
    public Position( int line, int column ) { this.line   = line; this.column = column; }
    public void setLine( int line )         { this.line   = line; }
    public void setColumn( int column )     { this.column = column; }

    public int getLine()                    { return line; }
    public int getColumn()                  { return column; }

    public int compareTo( Position position )
    {
      // if our location is past recorded line...
      if( position.getLine() > getLine() )
        return -1;
      // if on recorded line, but past recorded column...
      else if( position.getLine() == getLine() && position.getColumn() > getColumn() )
        return -1;
      // if on recorded line and also at recorded column...
      else if( position.getLine() == getLine() && position.getColumn() == getColumn() )
        return 0;
      // we're before current line and/or current column...
      else
        return 1;
    }
  }

  private ParserHandlerPrinter printer;

  private int printerVerbosity = 0;

  /** Use this from JUnit tests to set the level of debug verbosity. If not done, printer will be quiet. */
  public void setPrinterVerbosity( int verbosity )
  {
    printerVerbosity = verbosity;

    if( printerVerbosity > 0 )
      printer = new ParserHandlerPrinter( printerVerbosity );
  }
}

Awakening the ParserHandlerPrinter utility from JUnit

We modify SampleHandlerTest, not shown here, to incorporate code to awaken the verbose printing of handler status during the SAX parse—demonstrating using ParserHandlerPrinter from SampleHandler which consumes the SAX Locator for reporting of line numbers.

ParserHandlerPrinterTest.java
public class ParserHandlerPrinterTest
{
  private static final boolean VERBOSE      = true;//TestUtilities.VERBOSE;
  private static final String  CONTENT_PATH = TestUtilities.TEST_FODDER + "sample.xml";

  @Test
  public void test() throws Exception
  {
    if( VERBOSE )
      System.out.println( "" + TestUtilities.getLinesInFile( CONTENT_PATH ) );

    System.out.println( "Note that line numbers correspond with the end of the identified construct:\n" );

    SAXParserFactory factory = SAXParserFactory.newInstance();
    SAXParser        parser  = factory.newSAXParser();
    XMLReader        reader  = parser.getXMLReader();
    SampleHandler    handler = new SampleHandler();

    handler.setPrinterVerbosity( 3 );

    parser.parse( CONTENT_PATH, handler );
  }
}

Input to the JUnit test case above

<medication startdate="202205180900"
         enddate="202205180900"
         dose="50"
         unit="mg">
  This was the patient's Vicodin.
</medication>

Output from the JUnit test case

Note that line numbers correspond with the end of the identified construct:

[start of document]
  4 <medication dose="50" unit="mg" enddate="202205180900" startdate="202205180900">
  6   This was the patient's Vicodin.
  6 </medication>
[end of document]

The printer utility code...

This utility has not been updated to make use of Position maintained in SampleHandler. (We'll integrate that elsewhere when we get around to it.)

ParserHandlerPrinter.java
import java.util.HashMap;
import java.util.Map;

import org.xml.sax.Attributes;
import org.xml.sax.Locator;

import com.windofkeltia.utilities.StringUtilities;

/**
* Conceived as JUnit-only. Use setPrinterVerbosity() in whatever in whatever SAX
* parser handler consumes this to set the level before instantiating the parser.
*/
public class ParserHandlerPrinter
{
  private final int verbosity;

  /** Quiet version of this utility--utters nothing to the console. */
  public ParserHandlerPrinter() { verbosity = 0; }

  /**
  * Enable this utility at any of several levels:
  * 0 - no output (quiet mode)
  * 1 - minimal output
  * 2 - verbose output without line numbers
  * 3 - verbose output including line numbers
  */
  public ParserHandlerPrinter( int verbosity ) { this.verbosity = verbosity; }

  public void startElement( Locator locator, final String elementName, Attributes attributes )
  {
    StringBuilder sb = new StringBuilder();
    switch( verbosity )
    {
      case 3 :
        sb.append( getLineNumber( locator ) );
      case 2 :
      case 1 :
        sb.append( " <" ).append( elementName );
        if( attributes.getLength() > 0 )
          sb.append( javaAttributesAsString( attributes ) );
        sb.append( ">" );
        System.out.println( sb );
      case 0 :
        break;
    }
  }

  public void characters( Locator locator, final String characters )
  {
    if( characters.length() < 1 )
      return;

    StringBuilder sb = new StringBuilder();
    switch( verbosity )
    {
      case 3 :
        sb.append( getLineNumber( locator ) );
      case 2 :
      case 1 :
        sb.append( LINE_NUMBER_INDENT ).append( characters );
        System.out.println( sb );
      case 0 :
        break;
    }
  }

  public void endElement( Locator locator, final String elementName )
  {
    StringBuilder sb = new StringBuilder();
    switch( verbosity )
    {
      case 3 :
        sb.append( getLineNumber( locator ) );
      case 2 :
      case 1 :
        sb.append( " " );
        System.out.println( sb );
      case 0 :
        break;
    }
  }

  public void endElement( Locator locator, final String elementName, StringBuilder text )
  {
    StringBuilder sb = new StringBuilder();
    switch( verbosity )
    {
      case 3 :
        sb.append( getLineNumber( locator ) );
      case 2 :
      case 1 :
        sb.append( " " );
        System.out.println( sb );
      case 0 :
        break;
    }
  }

  public void startDocument() { if( verbosity > 2 ) System.out.println( "[start of document]" ); }
  public void endDocument()   { if( verbosity > 2 ) System.out.println( "[end of document]" );   }

  private static int    LINE_NUMBER_PLACES = 3;
  private static String LINE_NUMBER_INDENT = "   ";

  public static void setLineNumberPlaces( int places ) { LINE_NUMBER_PLACES = places; }

  public static void setLineNumberIndent( int indent )
  {
    StringBuilder sb = new StringBuilder();
    while( indent-- > 0 )
      sb.append( ' ' );
    LINE_NUMBER_INDENT = sb.toString();
  }

  private static String getLineNumber( Locator locator )
  {
    return StringUtilities.padStringLeft( locator.getLineNumber() + "", LINE_NUMBER_PLACES );
  }

  private static String javaAttributesAsString( Attributes attributes )
  {
    Map< String, String > javaAttributes = getAttributesAsJavaMap( attributes );

    if( javaAttributes.size() == 0 )
      return "";

    StringBuilder sb = new StringBuilder();

    for( Map.Entry< String, String > attribute : javaAttributes.entrySet() )
      sb.append( " " ).append( attribute.getKey() ).append( "=\"" ).append( attribute.getValue() ).append( "\"" );

    return sb.toString();
  }

  private static Map< String, String > getAttributesAsJavaMap( Attributes attributes )
  {
    int                   attributeLength = attributes.getLength();
    Map< String, String > javaAttributes  = new HashMap<>( attributeLength );

    for( int count = 0; count < attributeLength; count++ )
    {
      String attribute = attributes.getQName( count );
      String value     = attributes.getValue( count );
      javaAttributes.put( attribute, value );
    }

    return javaAttributes;
  }
}