/*
 * Decompiled with CFR 0.152.
 */
package jp.ac.dendai.cdl.mori.wikie.io;

import java.io.IOException;
import java.io.InputStream;
import jp.ac.dendai.cdl.mori.wikie.io.LineReader;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.RecordReader;

public class XMLRecordReader
implements RecordReader<LongWritable, Text> {
    private CompressionCodecFactory compressionCodecs = null;
    private long start;
    private long pos;
    private long end;
    private LineReader in;
    private String startTag;
    private String endTag;

    public XMLRecordReader(Configuration job, FileSplit split) throws IOException {
        this.start = split.getStart();
        this.end = this.start + split.getLength();
        this.startTag = job.get("wikie.io.startTag");
        this.endTag = job.get("wikie.io.endTag");
        Path file = split.getPath();
        this.compressionCodecs = new CompressionCodecFactory(job);
        CompressionCodec codec = this.compressionCodecs.getCodec(file);
        FileSystem fs = file.getFileSystem(job);
        FSDataInputStream fileIn = fs.open(split.getPath());
        boolean skipFirstLine = false;
        if (codec != null) {
            this.in = new LineReader((InputStream)codec.createInputStream((InputStream)fileIn), job);
            this.end = Long.MAX_VALUE;
        } else {
            if (this.start != 0L) {
                skipFirstLine = true;
                --this.start;
                fileIn.seek(this.start);
            }
            this.in = new LineReader((InputStream)fileIn, job);
        }
        if (skipFirstLine) {
            this.start += (long)this.in.readLine(new Text());
        }
        this.pos = this.start;
    }

    public synchronized boolean next(LongWritable key, Text value) throws IOException {
        int newSize;
        if (this.pos >= this.end) {
            return false;
        }
        key.set(this.pos);
        Text tmp = new Text();
        StringBuffer page = new StringBuffer();
        boolean withinTarget = false;
        while ((newSize = this.in.readLine(tmp)) > 0) {
            this.pos += (long)newSize;
            String line = tmp.toString().trim();
            if (withinTarget) {
                page.append(line);
                if (line.matches("</" + this.endTag + ">")) {
                    value.set(page.toString());
                    if (StringUtils.isBlank(value.toString())) {
                        System.out.println();
                    }
                    return true;
                }
            }
            if (!line.matches("<" + this.startTag + ">")) continue;
            page.append(line);
            withinTarget = true;
        }
        return false;
    }

    public LongWritable createKey() {
        return new LongWritable();
    }

    public Text createValue() {
        return new Text();
    }

    public void close() throws IOException {
        if (this.in != null) {
            this.in.close();
        }
    }

    public long getPos() throws IOException {
        return this.pos;
    }

    public float getProgress() throws IOException {
        if (this.start == this.end) {
            return 0.0f;
        }
        return Math.min(1.0f, (float)(this.pos - this.start) / (float)(this.end - this.start));
    }
}

