/*******************************************************************************
 * blancoCsv
 * Copyright (C) 2005-2012 Toshiki IGA
 * 
 * This library is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library.  If not, see <http://www.gnu.org/licenses/>.
 *******************************************************************************/
/*******************************************************************************
 * Copyright (c) 2005-2012 Toshiki IGA and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors:
 *      Toshiki IGA - initial API and implementation
 *******************************************************************************/
/*******************************************************************************
 * Copyright 2005-2012 Toshiki IGA and others.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package blanco.csv.runtime;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * CSV t@C̓ǂݍݏȂ߂̃p[T[NXłB
 * 
 * <UL>
 * <LI>RFC4180 ̎dlx[XƂƂȂĂ܂B
 * <LI>Iɂ CSV ͂ɂ܂B</LI>
 * <LI>ŏIIɂ blancoCsv  CSV
 * t@Cǂݍݏ̑ւڎw܂Aʂ͑Oo[WƂ̌݊ێ̂߂ɂ̃NX͗p܂B</LI>
 * </UL>
 * 
 * @author Toshiki Iga
 * @see http://www.ietf.org/rfc/rfc4180.txt
 */
public class BlancoCsvLexicalParser {
	/**
	 * ̃p[T[ѕt郊[_[EIuWFNgB
	 */
	protected BufferedReader reader = null;

	/**
	 *  CSV t@C̃f~^[B
	 * 
	 * {I CSV t@C̃f~^[ , łAȊO̒lݒ肵ꍇ̂߂ɃtB[h܂B
	 */
	protected char delimiter = ',';

	/**
	 * CSV p[T[EIuWFNg쐬邽߂̃RXgN^B
	 * 
	 * @param reader
	 *            ̃p[T[ѕt̃[_[EIuWFNgB
	 */
	public BlancoCsvLexicalParser(final BufferedReader reader) {
		this.reader = reader;
	}

	/**
	 * CSV p[T[EIuWFNg쐬邽߂̃RXgN^B
	 * 
	 * f~^[J}ȊOɐݒ肵ꍇɗpRXgN^łB
	 * 
	 * @param reader
	 *            ̃p[T[ѕt̃[_[EIuWFNgB
	 * @param delimiter
	 *             CSV t@C̃f~^[B
	 */
	public BlancoCsvLexicalParser(final BufferedReader reader,
			final char delimiter) {
		this.reader = reader;
		this.delimiter = delimiter;
	}

	/**
	 * т̃[_[N[Y܂B
	 * 
	 * @throws IOException
	 */
	public void close() throws IOException {
		if (reader != null) {
			reader.close();
			reader = null;
		}
	}

	/**
	 * ^ꂽ CSV t@Cp[Xĕ̃Xg߂܂B
	 * 
	 * @return ǂݍ 1 s̃f[^𕶎̃XgւƕΐBCSV t@C̏I[ɓBꍇ null ߂܂B
	 * @throws IOException
	 *             o͗OꍇB
	 */
	public final List<String> read() throws IOException {
		if (reader == null) {
			throw new IllegalArgumentException(
					"reader  null ̏Ԃ read() \bhĂяo܂B");
		}

		// file = [header CRLF] record *(CRLF record) [CRLF]

		for (;;) {
			reader.mark(1);

			final int iRead = reader.read();
			if (iRead < 0) {
				// t@CI[ɓBꍇɂ null ߂܂B
				reader = null;
				return null;
			}

			final char cRead = (char) iRead;
			switch (cRead) {
			case 0x0d:
			case 0x0a:
				break;
			default:
				reader.reset();
				final List<String> resultRecordList = parseRecord();
				if (resultRecordList.size() == 0) {
					// CSV t@C̏I[ɓB܂B
					return null;
				}

				return resultRecordList;
			}
		}
	}

	/**
	 * ^瓾[_[̉ӏ烌R[hǂݍ݂܂B
	 * 
	 * ̃\bh header ̓ǂݍݏ˂Ă܂B
	 * 
	 * @return ǂݍ 1 s̃f[^B
	 * @throws IOException
	 *             o͗OꍇB
	 */
	List<String> parseRecord() throws IOException {
		// R[h CRLF ɂĕ̂Ƃ܂B
		// record = field *(COMMA field)

		final List<String> fieldList = new ArrayList<String>();

		// ȌtB[hǂB
		boolean isPrevCaseIsField = false;

		outerLoop: for (;;) {
			reader.mark(1);
			final int iRead = reader.read();
			if (iRead < 0) {
				reader.reset();

				// CSV t@C̏I[ɓB܂B
				if (isPrevCaseIsField == false) {
					// CSV ̋̃tB[h܂B
					fieldList.add(""); //$NON-NLS-1$
				} else {
					isPrevCaseIsField = false;
				}
				break outerLoop;
			}

			final char cRead = (char) iRead;
			if (cRead == 0x0d || cRead == 0x0a) {
				reader.reset();
				if (isPrevCaseIsField == false) {
					// CSV ̋̃tB[h܂B
					fieldList.add(""); //$NON-NLS-1$
				} else {
					isPrevCaseIsField = false;
				}
				break outerLoop;
			} else if (cRead == delimiter) {
				if (isPrevCaseIsField == false) {
					// CSV ̋̃tB[h܂B
					fieldList.add(""); //$NON-NLS-1$
				} else {
					isPrevCaseIsField = false;
				}
			} else {
				reader.reset();
				fieldList.add(parseField());
				isPrevCaseIsField = true;
			}
		}

		return fieldList;
	}

	/**
	 * CSV tB[hǂݍݏ܂B
	 * 
	 * @return ǂݍ CSV tB[h̒lB
	 * @throws IOException
	 *             o͗OꍇB
	 */
	String parseField() throws IOException {
		// field = (escaped / non-escaped)

		final StringBuffer bufField = new StringBuffer();

		outerLoop: for (;;) {
			reader.mark(1);
			final int iRead = reader.read();
			if (iRead < 0) {
				reader.reset();
				break outerLoop;
			}

			final char cRead = (char) iRead;
			if (cRead == 0x0d || cRead == 0x0a) {
				// CSV t@Cɂĉs܂B
				reader.reset();
				break outerLoop;
			} else if (cRead == delimiter) {
				// CSV tB[h܂B
				// ӁFJ}͂ŏ͂Ǎďoŏ邱ƂƂ܂B
				reader.reset();
				break outerLoop;
			} else if (cRead == '"') {
				// _uNI[ǵẢӏŏ܂B
				reader.reset();
				bufField.append(parseEscaped());
			} else {
				reader.reset();
				bufField.append(parseNonEscaped());
			}
		}

		return bufField.toString();
	}

	/**
	 * CSV _uNI[g̃GXP[vƂȂ킸ɁAʏ̃tB[h{܂B
	 * 
	 * @return ǂݍ CSV tB[h̒lB
	 * @throws IOException
	 *             o͗OꍇB
	 */
	String parseNonEscaped() throws IOException {
		final StringBuffer bufField = new StringBuffer();

		outerLoop: for (;;) {
			reader.mark(1);
			final int iRead = reader.read();
			if (iRead < 0) {
				reader.reset();
				break outerLoop;
			}

			final char cRead = (char) iRead;
			if (cRead == 0x0d || cRead == 0x0a) {
				// CSV sB
				reader.reset();
				break outerLoop;
			} else if (cRead == delimiter) {
				// COMMA = %x2C
				// CSV tB[hIB
				reader.reset();
				break outerLoop;
			} else {
				bufField.append(cRead);
			}
		}

		return bufField.toString();
	}

	/**
	 * _uNI[g CSV GXP[vɓԂ̓ǂݍݏ{܂B
	 * 
	 * @return ǂݍ CSV tB[h̒lB
	 * @throws IOException
	 *             o͗OꍇB
	 */
	String parseEscaped() throws IOException {
		final StringBuffer bufField = new StringBuffer();

		// CSV _uNI[g̊Jnŏ܂B
		{
			final int iRead = reader.read();
			if (iRead < 0) {
				throw new IllegalArgumentException(
						"_uNI[g CSV GXP[v́̕A_uNI[gJnĂKv܂B"); //$NON-NLS-1$
			}

			final char cRead = (char) iRead;
			if (cRead != '"') {
				throw new IllegalArgumentException(
						"_uNI[g CSV GXP[v́̕A_uNI[gJnĂKv܂B"); //$NON-NLS-1$
			}
		}

		outerLoop: for (;;) {
			reader.mark(1);
			final int iRead = reader.read();
			if (iRead < 0) {
				reader.reset();
				break;
			}
			final char cRead = (char) iRead;

			switch (cRead) {
			case '"': {
				reader.mark(1);
				final int iRead2 = reader.read();
				if (iRead2 < 0) {
					reader.reset();
					break outerLoop;
				}

				final char cRead2 = (char) iRead2;
				switch (cRead2) {
				case '"':
					// CSV _uNI[gg̓_uNI[gŃGXP[v܂B
					bufField.append(cRead2);
					break;
				default:
					// CSV _uNI[g̏IłB
					reader.reset();
					// CSV 𒆒f܂B
					break outerLoop;
				}
			}
				break;
			default:
				bufField.append(cRead);
				break;
			}
		}

		return bufField.toString();
	}
}
