=pod
CSVをフィルタ

patterns
    正規表現を","で区切る
reverse
    0:  合致するものを出力
    1:  合致しないものを出力
html_text
    defined:
        判定対象列をHTMLと仮定し、HTML中のテキスト部分を検索対象とする
=cut

use strict;
use warnings;

use Getopt::Long;
use IO::File;
use Data::Dumper;
use utf8;
use Encode;

use Util;
use CSVAsHash;

binmode STDOUT => ":encoding(sjis)";
binmode STDERR => ":encoding(sjis)";

my %opts=(header=>1,reverse=>0);
&main_proc();

sub main_proc{
    my $res=GetOptions(\%opts,'in_file=s','out_file=s','col_name=s','patterns=s','tab','header:i','reverse:i','html_text');
    $res || die "GetOptions";
    print decode("cp932",Dumper(\%opts));
    my $sep_char;
    if (defined($opts{tab})) {
        $sep_char="\t";
    }else{
        $sep_char=",";
    }
    
    #in_fileはワイルドカード対応
    my $in_file;
    my @files = glob $opts{in_file};
    if (scalar(@files)==0){
        die decode("cp932",$opts{in_file})."はありません";
    }
    $in_file = $files[0];
    
    my $csvobj = CSVAsHash->new(csv_header=>$opts{header},csv_sep_char=>$sep_char);
    my $aoh_in = $csvobj->csv2aoh($in_file);
    my $org_cnt=scalar(@$aoh_in);
    printf "%s:%d件入力\n",decode("cp932",$in_file),$org_cnt;
    
    my @patterns=split(/,/,decode("cp932",$opts{patterns}));
    my $aoh_filter;
    if (defined($opts{html_text})){
        $aoh_filter = CSVAsHash::filter_aoh($aoh_in,decode("cp932",$opts{col_name}),\@patterns,reverse=>$opts{reverse},html_text=>1);
    }else{
        $aoh_filter = CSVAsHash::filter_aoh($aoh_in,decode("cp932",$opts{col_name}),\@patterns,reverse=>$opts{reverse});
    }
    $csvobj->aoh2csv($aoh_filter,$opts{out_file});
    printf "%s:%d件出力しました\n",decode("cp932",$opts{out_file}),scalar(@$aoh_filter);
}

