/*
 * Copyright 1997-1999 Sun Microsystems, Inc. All Rights Reserved.
 */

package javax.mail.internet;

/**
 * ̃NX RFC822 y MIME wb_ RFC822 y MIME ɂw肳{IȋLɃg[N܂B<p>
 * 
 * ̃NX͐܂肽܂ꂽwb_ (CRLF SPACE V[PX܂ރwb_) ܂B
 * ܂肽݂͕Ԃg[Nō폜܂B
 */
public final class HeaderTokenizer {

	/**
	 *  Token NX HeaderTokenizer ɂԂg[N\܂B
	 */
	public static final class Token {

		private int type;
		private String value;

		/**
		 * ATOM g[Ň^łB
		 */
		public static final int ATOM = -1;

		/**
		 * pg[Ň^łB
		 * value tB[hɂ͈pȂ̕񂪊i[܂B
		 */
		public static final int QUOTEDSTRING = -2;

		/**
		 * Rgg[Ň^łB
		 * value tB[hɂ̓Rg̊JnyяILȂ̃Rg񂪊i[܂B
		 */
		public static final int COMMENT = -3;

		/**
		 * ͂̏Ig[Ň^łB
		 */
		public static final int EOF = -4;

		/**
		 * RXgN^łB
		 * 
		 * @param type g[Ň^
		 * @param value g[Nl
		 */
		public Token(final int type, final String value) {
			this.type = type;
			this.value = value;
		}

		/**
		 * g[Ň^Ԃ܂Bg[N؂蕶͐䕶\ꍇA
		 * ^͕̂̂łAɕϊ܂BłȂꍇAl͎̉ꂩłB
		 * <ul>
		 * <li><code>ATOM</code> SPACEACTLA"("A<"> ͎w肳ꂽ SPECIALS ŋ؂ꂽ ASCII ̃V[PX 
		 * <li><code>QUOTEDSTRING</code> p̓ ASCII V[PX
		 * <li><code>COMMENT</code> "("  ")" ̓ ASCII V[PX 
		 * <li><code>EOF</code> wb_̏I
		 * </ul>
		 */
		public int getType() {
			return type;
		}

		/**
		 * ǂݍ܂ꂽ΂̃g[N̒lԂ܂B
		 * ݂̃g[Np̏ꍇÃtB[hɂ͈p̖{i[܂B
		 * ݂̃g[NRg̏ꍇÃtB[hɂ̓Rg̖{i[܂B
		 * 
		 * @return g[Nl
		 */
		public String getValue() {
			return value;
		}

	}

	private String string;			// the string to be tokenized
	private boolean skipComments;	// should comments be skipped ?
	private String delimiters;		// delimiter string
	private int currentPos;		// current parse position
	private int maxPos;			// string length
	private int nextPos;			// track start of next Token for next()
	private int peekPos;			// track start of next Token for peek()

	/**
	 * RFC822 ɌŗLłB
	 */
	public final static String RFC822 = "()<>@,;:\\\"\t .[]";

	/**
	 * MIME ɌŗLłB
	 */
	public final static String MIME = "()<>@,;:\\\"\t []/?=";

	// EOF g[N
	private final static Token EOFToken = new Token(Token.EOF, null);

	/**
	 * RFC822 `̃wb_gpRXgN^łB
	 * 
	 * @param header g[N RFC822 wb_
	 * @param delimiters ATOMS ؂邽߂Ɏgp؂蕶̃ZbgB
	 * ʏ <code>RFC822</code>  <code>MIME</code> 
	 * @param skipComments true ̏ꍇARg̓XLbvAg[NƂĕԂȂ
	 */
	public HeaderTokenizer(
		final String header,
		final String delimiters,
		final boolean skipComments) {

		string = (header == null) ? "" : header; // paranoia ?!
		this.skipComments = skipComments;
		this.delimiters = delimiters;
		currentPos = nextPos = peekPos = 0;
		maxPos = string.length();
	}

	/**
	 * RXgN^łBRg͖Ag[NƂĕԂ܂B
	 * 
	 * @param header g[Nwb_
	 * @param delimiters gp؂蕶
	 */
	public HeaderTokenizer(final String header, final String delimiters) {
		this(header, delimiters, true);
	}

	/**
	 * RXgN^łB
	 * ATOMS ̋؂ɂRFC822 `ς݋؂蕶 - RFC822 - gp܂B
	 * ARg̓XLbvAg[NƂĕԂ܂B
	 */
	public HeaderTokenizer(final String header)  {
		this(header, RFC822);
	}

	/**
	 *  String ̎̃g[N\͂܂B<p>
	 * 
	 * NCAg next() Ăяo[vɗ܂A
	 * EOF Token Ԃ܂ŘAăg[N\͂܂B
	 * 
	 * @return  Token
	 * @throws ParseException \͂Ɏsꍇ
	 */
	public Token next() throws ParseException { 
		currentPos = nextPos; // setup currentPos
		Token tk = getNext();
		nextPos = peekPos = currentPos; // update currentPos and peekPos
		return tk;
	}

	/**
	 * ̃g[N܂B\̓Xg[ۂɃg[N폜܂B
	 * ̃\bh𕡐ĂяoƁA<code>next()</code> Ăяo܂ŁAAIɃg[NԂ܂B<p>
	 * 
	 * @return  Token
	 * @throws ParseException \͂Ɏsꍇ
	 */
	public Token peek() throws ParseException {
		currentPos = peekPos; // setup currentPos
		Token tk = getNext();
		peekPos = currentPos; // update peekPos
		return tk;
	}

	/**
	 * Header ̎cԂ܂B
	 * 
	 * @return wb_̎c̕BɃwb_̏IɈʒuꍇ null Ԃ܂B
	 */
	public String getRemainder() {
		return string.substring(nextPos);
	}

	/*
	 * Return the next token starting from 'currentPos'. After the
	 * parse, 'currentPos' is updated to point to the start of the 
	 * next token.
	 */
	private Token getNext() throws ParseException {
		// If we're already at end of string, return EOF
		if (currentPos >= maxPos)
			return EOFToken;

		// Skip white-space, position currentPos beyond the space
		if (skipWhiteSpace() == Token.EOF)
			return EOFToken;

		int start; 
		boolean filter = false;

		char c = string.charAt(currentPos);

		// Check or Skip comments and position currentPos
		// beyond the comment
		while (c == '(') {
			// Parsing comment ..
			int nesting;
			for (start = ++currentPos, nesting = 1; nesting > 0 && currentPos < maxPos; currentPos++) {
				c = string.charAt(currentPos);
				if (c == '\\') {  // Escape sequence
					currentPos++; // skip the escaped character
					filter = true;
				} else if (c == '\r')
					filter = true;
				else if (c == '(')
					nesting++;
				else if (c == ')')
					nesting--;
			}
			if (nesting != 0)
				throw new ParseException("Unbalanced comments");

			if (!skipComments) {
				// Return the comment, if we are asked to.
				// Note that the comment start & end markers are ignored.
				String s;
				if (filter) // need to go thru the token again.
					s = filterToken(string, start, currentPos-1);
				else
					s = string.substring(start,currentPos-1);

				return new Token(Token.COMMENT, s);
			}

			// Skip any whitespace after the comment.
			if (skipWhiteSpace() == Token.EOF)
				return EOFToken;
			c = string.charAt(currentPos);
		}

		// Check for quoted-string and position currentPos 
		//  beyond the terminating quote
		if (c == '"') {
			for (start = ++currentPos; currentPos < maxPos; currentPos++) {
				c = string.charAt(currentPos);
				if (c == '\\') { // Escape sequence
					currentPos++;
					filter = true;
				} else if (c == '\r')
					filter = true;
				else if (c == '"') {
					currentPos++;
					String s;

					if (filter)
						s = filterToken(string, start, currentPos-1);
					else
						s = string.substring(start,currentPos-1);

					return new Token(Token.QUOTEDSTRING, s);
				}
			}
			throw new ParseException("Unbalanced quoted string");
		}

		// Check for SPECIAL or CTL
		if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
			currentPos++; // re-position currentPos
			char ch[] = new char[1];
			ch[0] = c;
			return new Token(c, new String(ch));
		}

		// Check for ATOM
		for (start = currentPos; currentPos < maxPos; currentPos++) {
			c = string.charAt(currentPos);
			// ATOM is delimited by either SPACE, CTL, "(", <"> 
			// or the specified SPECIALS
			if (c < 040 || c >= 0177 || c == '(' || c == ' ' || c == '"' || delimiters.indexOf(c) >= 0)
				break;
		}
		return new Token(Token.ATOM, string.substring(start, currentPos));
	}

	// Skip SPACE, HT, CR and NL
	private int skipWhiteSpace() {
		for (; currentPos < maxPos; currentPos++) {
			char c;
			if (((c = string.charAt(currentPos)) != ' ') && (c != '\t') && (c != '\r') && (c != '\n'))
				return currentPos;
		}
		return Token.EOF;
	}

	/* Process escape sequences and embedded LWSPs from a comment or
	 * quoted string.
	 */
	private static String filterToken(final String s, final int start, final int end) {
		StringBuffer sb = new StringBuffer();
		char c;
		boolean gotEscape = false;
		boolean gotCR = false;

		for (int i = start; i < end; i++) {
			c = s.charAt(i);
			if (c == '\n' && gotCR) {
				// This LF is part of an unescaped 
				// CRLF sequence (i.e, LWSP). Skip it.
				gotCR = false;
				continue;
			}

			gotCR = false;
			if (!gotEscape) {
				// Previous character was NOT '\'
				if (c == '\\') // skip this character
					gotEscape = true;
				else if (c == '\r') // skip this character
					gotCR = true;
				else // append this character
					sb.append(c);
			} else {
				// Previous character was '\'. So no need to 
				// bother with any special processing, just 
				// append this character
				sb.append(c);
				gotEscape = false;
			}
		}
		return sb.toString();
	}

}
