/*
 * Copyright 2000-2004 The Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.marevol.utils.logparser;

import java.util.ArrayList;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class DefaultLogParser implements LogParser
{
    /**
     * Logger for this class
     */
    private static final Log log = LogFactory.getLog(LogParser.class);

    private String format;

    private ArrayList codeIndexList;

    private ArrayList codeList;

    private ArrayList codeSeparatorList;

    private int startIndex;

    public DefaultLogParser()
    {
    }

    /**
     * %S host (the client hostname, or address of the computer making the request) 
     * %s numerical IP address of client (if recorded in a separate field; used when %S is empty) 
     * %r file requested 
     * %q query string (part of filename after ?, if recorded in a separate field) 
     * %B browser 
     * %A browser with +'s instead of spaces 
     * %f referrer 
     * %u user (tip: a cookie or session id can usefully be defined as %u too) 
     * %v virtual host (the server hostname, also called the virtual domain) 
     * %d day of the month 
     * %m month in digits 
     * %M month, three letter English abbreviation 
     * %y year, last two digits 
     * %Y year, four digits 
     * %Z year, two or four digits (less efficient) 
     * %h hour of the day 
     * %n minute of the hour 
     * %a a or A for am, or p or P for pm, if %h is in the 12-hour clock. (So to match "am" you need %am and to match "AM" you need %aM) 
     * %U "Unix time" (seconds since beginning of 1970, GMT). If it includes decimals, use %U.%j 
     * %b number of bytes transferred 
     * %t processing time in seconds 
     * %T processing time in milliseconds 
     * %D processing time in microseconds 
     * %c HTTP status code 
     * %C code words used instead of HTTP status code in some servers -- only used internally 
     * %j junk: ignore this field (field can be empty too) 
     * %w white space: spaces or tabs 
     * %W optional white space 
     * %% % sign 
     * \n new line 
     * \t tab stop 
     * \\ single backslash
     */
    public void setFormat(String format)
    {
        if (log.isDebugEnabled())
        {
            log.debug("setFormat(String) - start format=" + format);
        }

        this.format = format;

        codeIndexList = new ArrayList();
        codeList = new ArrayList();
        codeSeparatorList = new ArrayList();

        int index = format.indexOf("%");
        while (index >= 0)
        {
            try
            {
                codeList.add(format.substring(index, index + 2));
                codeIndexList.add(new Integer(index));
            }
            catch (IndexOutOfBoundsException e)
            {
                log.error("Invalid log format.", e);
            }
            int newIndex = format.indexOf("%", index + 2);
            try
            {
                if (newIndex >= 0)
                {
                    codeSeparatorList.add(format.substring(index + 2, newIndex));
                }
                else
                {
                    codeSeparatorList.add(format.substring(index + 2));
                }
            }
            catch (IndexOutOfBoundsException e)
            {
                log.error("Index is out of bound.", e);
            }
            index = newIndex;
        }

        if (log.isDebugEnabled())
        {
            for (int i = 0; i < codeIndexList.size(); i++)
            {
                log.debug("setFormat(String) - i=" + i + ", codeIndexList.get(i)=" + codeIndexList.get(i));
            }
            for (int i = 0; i < codeList.size(); i++)
            {
                log.debug("setFormat(String) - i=" + i + ", codeList.get(i)=" + codeList.get(i));
            }
            for (int i = 0; i < codeSeparatorList.size(); i++)
            {
                log.debug("setFormat(String) - i=" + i + ", codeSeparatorList.get(i)='" + codeSeparatorList.get(i)
                        + "'");
            }
            log.debug("setFormat(String) - end");
        }
    }

    public String getFormat()
    {
        return format;
    }

    public LogEntry parse(String logEntry)
    {
        if (log.isDebugEnabled())
        {
            log.debug("parse(String) -  : logEntry=" + logEntry);
        }

        LogEntry entry = new LogEntry();
        int start = ((Integer) codeIndexList.get(0)).intValue();
        for (int i = 0; i < codeIndexList.size(); i++)
        {
            String separator = (String) codeSeparatorList.get(i);
            int end = logEntry.indexOf(separator, start);
            if (log.isDebugEnabled())
            {
                log.debug("parse(String) -  : start=" + start + ", end=" + end);
            }

            if (end >= 0)
            {
                try
                {
                    if (separator != null && !separator.equals(""))
                    {
                        entry.put((String) codeList.get(i), logEntry.substring(start, end));
                    }
                    else
                    {
                        entry.put((String) codeList.get(i), logEntry.substring(start));
                    }
                }
                catch (IndexOutOfBoundsException e)
                {
                    log.error("Index is out of bound.", e);
                }
            }
            start = end + separator.length();
        }

        return entry;
    }
}
