E-mail address notes


Regular expressions to validate e-mail addresses

Here's the pattern for matching a single address. It does not handle every possible combination of valid characters, only rather what are the most common ones.

[\w-]+(\.[\w-]+)*@[\w-]+(\.[\w-]+)+

This isn't a "real" JUnit test and one case "fails to fail"—I haven't had time to look closely:

package com.etretatlogiciels.fun;

import org.junit.Test;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class EmailAddressValidator
{
  // @formatter:off
  // some e-mail addresses and address lists to test:
  private static final String[] ATTEMPTS =
      {
          "xyz",                            // should fail
          "[email protected]",                    // should pass
          "[email protected], xyz",               // should fail
          "[email protected], [email protected]",  // should pass
          "fø[email protected], [email protected]"   // should pass
      };

  // basic patterns that will match an e-mail address and a list of things:
  private static final String EMAIL_ADDRESS_PATTERN = "[\\w-]+(\\.[\\w-]+)*@[\\w-]+(\\.[\\w-]+)+";
  private static final String LIST_PATTERN          = "%1$s(,\\s*%1$s)*";

  // use String.format() to tuck the e-mail address pattern into the greater list pattern:
  private static final String EMAIL_LIST_PATTERN    = String.format( LIST_PATTERN, EMAIL_ADDRESS_PATTERN );

  // now compile the whole pattern; we're counting on Unicode addresses:
  private static final Pattern emailListPattern = Pattern.compile( EMAIL_LIST_PATTERN, Pattern.UNICODE_CHARACTER_CLASS );

  @Test
  public void test()
  {
    // let's test...
    for( String attempt : ATTEMPTS )
    {
      // create a matcher to use for this attempt:
      Matcher matcher = emailListPattern.matcher( attempt );

      System.out.println( "  " + attempt + ( ( matcher.find() )
                                             ? " matches our validator"
                                             : " does not pass our validator" ) );
    }
  }
}

Here's the output:

xyz does not pass our validator
[email protected] matches our validator
[email protected], xyz matches our validator ✓ should have failed
[email protected], [email protected] matches our validator
fø[email protected], [email protected] matches our validator