import java.util.*;

/**
    DomainNameChecker - validation of domain names.

    This is part of the Backus-Naur notation of the spec for a hostname, in RFC 1738:<BR>
    <blockquote><pre>
        hostport       = host [ ":" port ]
        host           = hostname | hostnumber
        hostname       = *[ domainlabel "." ] toplabel
        domainlabel    = alphadigit | alphadigit *[ alphadigit | "-" ] alphadigit
        toplabel       = alpha | alpha *[ alphadigit | "-" ] alphadigit
        alphadigit     = alpha | digit
        hostnumber     = digits "." digits "." digits "." digits
        port = digits
        alpha = [a..z] | [A..Z]
        digit = [0..9]

        Additional notes:
        Portnumbers are restricted to [0..65535] (16 bit)
        Each number in a numeric IP is restricted to [0..255] (8 bit), this might change
        in the new IPv6 spec (where each number is 32 bit).
    </pre></blockquote>


      In plain English:<BR>
      (a) A domain name can only have alphabetic, digit, hyphen and period characters in it. <BR>
      (b) Each domain label (i.e. in www.cnn.com, 'www', and 'cnn' are domain labels ) must
          contain at least one alpha or digit character and MUST start with an alpha or digit
          character. Hyphens are allowed if they are not the first character.<BR>
      (c) The top label (i.e. in www.cnn.com. 'com' is the top label) MUST only start with an alpha
          character. Succeeding characters can be alpha, digit, or hyphen characters.<BR>
      (d) The IP number (see the hostnumber spec above) must have 3 periods minimum/maximum, and can
          only contain digits and periods. It must also contain 4 numbers. <BR>
      (e) If the hostname only contains one word, it is subject to the restrictions for the top label.
          (i.e. machine names) <BR>


     *************************************************************************
     
     DomainNameChecker - validation of domain names.
     
     Copyright (C) 1999  Shazron Abdullah

     * This library is free software; you can redistribute it and/or
     * modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation; either
     * version 2.1 of the License, or (at your option) any later version.
     * 
     * This library is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
     * 
     * You should have received a copy of the GNU Lesser General Public
     * License along with this library; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

     @author Shazron Abdullah
 */
public class DomainNameChecker
{
    private static final String HYPHEN = "-";
    private static final String PERIOD = ".";
    private static final String ALPHAS = "abcdefghijklmnopqrstuvwxyz";
    private static final String DIGITS = "0123456789";
    private static final String LEGALCHARS = HYPHEN + PERIOD + ALPHAS + DIGITS;

    private static String last_error = "";

    /**
     * Get the last error (string) encountered.
     */
    public static String getLastError()
    {
        return last_error;
    }

    /**
     * Domain names are legal based on Note (a)
     * i.e. in www.cnn.com, "www.cnn.com" is a domain name.
     */
    public static boolean isLegalDomainName(String theString)
    {
        theString = theString.toLowerCase();

        if ( isSubsetOf(theString, PERIOD + DIGITS) ) // possible numeric IP
            return  isLegalNumericIP(theString);

        // next check if it contains legal chars
        boolean hasLegalChars = isSubsetOf(theString, LEGALCHARS);
        if ( ! hasLegalChars )
        {
            last_error = "Domain name does not have legal chars. --> " + theString;
            //System.out.println(last_error);
            return false;
        }

        // next check if the hostnames are legal
        StringTokenizer strtok = new StringTokenizer(theString, PERIOD);

        if ( strtok.countTokens() == 1 )
            return isLegalTopLabel(strtok.nextToken());

        while ( strtok.hasMoreTokens() )
        {
            if ( ! isLegalHostName(strtok.nextToken()) )
                return false;
        }

        return true;
    }

    /**
     * Host names are legal based on Note (b)
     * i.e. in www.cnn.com, 'www' and 'cnn' are host names.
     */
    public static boolean isLegalHostName(String theString)
    {
        theString = theString.toLowerCase();

        try {
            char first_char = theString.charAt(0);
            if ( ! isSubsetOf(String.valueOf(first_char), ALPHAS + DIGITS) ) // first char is not an alpha/digit
            {
                last_error = "First char of host name is not an alpha/digit. --> " + theString;
                //System.out.println(last_error);
                return false;
            }

            if ( ! isSubsetOf(theString, ALPHAS + DIGITS + HYPHEN ) ) // legal chars for the rest
            {
                //System.out.println("Host name does not have legal chars. --> " + theString);
                return false;
            }

        } catch ( StringIndexOutOfBoundsException sioob ) { return false; }

        return true;
    }


    /**
     * Top labels are legal based on Note (c)
     * i.e. in www.cnn.com, 'com' is the top label
     */
    public static boolean isLegalTopLabel(String theString)
    {
        theString = theString.toLowerCase();

        try {
            char first_char = theString.charAt(0);
            if ( ! isSubsetOf(String.valueOf(first_char), ALPHAS) ) // first char is not an alpha
            {
                last_error = "First char of top label name is not an alpha. --> " + theString;
                //System.out.println(last_error);
                return false;
            }

            if ( ! isSubsetOf(theString, ALPHAS + DIGITS + HYPHEN ) ) // legal chars for the rest
            {
                last_error = "Top label name does not have legal chars. --> " + theString;
                //System.out.println(last_error);
                return false;
            }

        } catch ( StringIndexOutOfBoundsException sioob ) { return false; }

        return true;
    }

    /**
     * IP addresses are legal based on Note (d)
     * Numeric IPs are the form '198.168.0.1' (note: not checking the boundaries of the digits)
     */
    public static boolean isLegalNumericIP(String theString)
    {
        theString = theString.toLowerCase();

        // check that it only contains periods and digits
        boolean legal_chars = isSubsetOf(theString, PERIOD + DIGITS );
        if ( ! legal_chars )
        {
            last_error = "Numeric IP does not have legal chars. --> " + theString;
            //System.out.println(last_error);
            return false;
        }

        // check for 4 numbers (and consequently, 3 periods)
        StringTokenizer strtok = new StringTokenizer(theString, PERIOD);
        if ( ! (strtok.countTokens() == 4) )
        {
            last_error = "Numeric IP does not have 4 numbers (or 3 periods). --> " + theString;
            //System.out.println(last_error);
            return false;
        }
        return true;
    }

    /**
     * Port numbers are legal between 0 and 65535 inclusive
     */
    public static boolean isLegalPortNumber(String theString)
    {
        try {
            int theNumber = Integer.parseInt(theString);
            if ( theNumber >= 0 && theNumber <= 65535 )
                return true;

        } catch (NumberFormatException nfe ) { return false; }

        return false;
    }

    /**
     * Alphas are defined as [a..z] or [A..Z]
     */
    public static boolean isAlpha(String theString)
    {
        // Can't use java.lang.Character.isLetter()
        return isSubsetOf(theString, ALPHAS);
    }

    /**
     * Digits are defined as [0..9]
     */
    public static boolean isDigit(String theString)
    {
        // Can't use java.lang.Character.isDigit()
        return isSubsetOf(theString, DIGITS);
    }

    /**
     * Goes through the characters in 'testString', and will return true if all
     * the characters in testString occur in the character set 'set'.
     */
    public static boolean isSubsetOf(String testString, String set)
    {
        boolean retVal = true;
        testString = testString.toLowerCase();

        char testArray[] = testString.toCharArray();

        for ( int i=0; i < testArray.length; i++ )
        {
            if ( set.indexOf(testArray[i]) == -1 ) // the char doesn't occur in the set
            {
                retVal = false;
                break;
            }
        }
        return retVal;
    }

    /**
     * Goes through the characters in testString, and will return true once it finds
     * any character that occurs in the character set illegalCharSet.
     */
    public static boolean containsCharsInSet(String testString, String illegalCharSet)
    {
        boolean retVal = false;
        testString = testString.toLowerCase();

        char testArray[] = testString.toCharArray();

        for ( int i=0; i < testArray.length; i++ )
        {
            if ( illegalCharSet.indexOf(testArray[i]) != -1 ) // the char occurs in the set
            {
                retVal = true;
                break;
            }
        }
        return retVal;
    }

}