From owner-man-jp@jp.FreeBSD.org Mon Sep  8 13:16:58 2003
Received: (from daemon@localhost)
	by castle.jp.FreeBSD.org (8.11.6p2+3.4W/8.11.3) id h884GwM74378;
	Mon, 8 Sep 2003 13:16:58 +0900 (JST)
	(envelope-from owner-man-jp@jp.FreeBSD.org)
Received: from sccrmhc13.comcast.net (sccrmhc13.comcast.net [204.127.202.64])
	by castle.jp.FreeBSD.org (8.11.6p2+3.4W/8.11.3) with ESMTP/inet id h884Gvw74372
	for <man-jp@jp.freebsd.org>; Mon, 8 Sep 2003 13:16:57 +0900 (JST)
	(envelope-from horikawa@jp.freebsd.org)
Received: from localhost (12-252-35-167.client.attbi.com[12.252.35.167](untrusted sender))
          by comcast.net (sccrmhc13) with SMTP
          id <2003090804164901600g0re0e>; Mon, 8 Sep 2003 04:16:49 +0000
Message-Id: <20030907.221446.112627828.horikawa@attbi.com>
To: man-jp@jp.FreeBSD.org, koizumistr@minos.ocn.ne.jp
From: Kazuo Horikawa <horikawa@jp.FreeBSD.org>
In-Reply-To: <89451D70-DAF9-11D7-9845-00039368D9D2@minos.ocn.ne.jp>
References: <89451D70-DAF9-11D7-9845-00039368D9D2@minos.ocn.ne.jp>
X-Mailer: Mew version 3.2 on Emacs 21.3 / Mule 5.0 (SAKAKI)
Mime-Version: 1.0
Content-Type: Text/Plain; charset=iso-2022-jp
Content-Transfer-Encoding: 7bit
Reply-To: man-jp@jp.FreeBSD.org
Precedence: list
Date: Sun, 07 Sep 2003 22:14:46 -0600
X-Sequence: man-jp 4735
Subject: [man-jp 4735] Re: jman's bug?(3)
Sender: owner-man-jp@jp.FreeBSD.org
X-Originator: horikawa@jp.FreeBSD.org
X-Distribute: distribute version 2.1 (Alpha) patchlevel 24e+030902

$BKY@n$G$9!#(B

Koizumi Satoru <koizumistr@minos.ocn.ne.jp> wrote:
> $BFs!"(BFreeBSD $BF|K\8l%^%K%e%"%k8!:w(B(http://www.jp.freebsd.org/man-
> jp/search.html)$B$G(B
> atacontrol$B$rI=<($5$;$k$H!":n<T$N$H$3$m$,2<5-$N$h$&$K$J$C$F$7$^$$$^$9!#(B
> 
> ===
> $B:n<T(B
>       atacontrol $B%f!<%F%#%j%F%#$O!"(B S/ren Schmidt <sos@FreeBSD.org> $B$,=q(B
> $B$-$^$7$?!#(B
> 
>       $B$3$N%^%K%e%"%k%Z!<%8$O!"(B S/ren Schmidt <sos@FreeBSD.org> $B$,=q(B
> $B$-$^$7$?!#(B
> ===
> S/ren $B$HI=<($5$l$F$$$k$H$3$m$O!"(B $B<j85$N(B jman $B$G$O(B Soren $B$K$J$C$F$$$^$9!#(B

$B$3$l!"D>$7$?$D$b$j$G$9!#(B

+^Ho
+^H+^Ho^Ho
_^H+^H_^Ho

$B$NI=<($K$bLdBj$,$"$C$?$N$G!"$$$:$l$b(B &bull; $B$K$J$k$h$&$K$7$^$7$?!#(B

$B:#$N$H$3$m<!$N$h$&$J46$8$GJQ49$7$F$$$^$9!#(B

groff_char.7 $B$r8+$k$H$^$@>/$7(B HTML $BCf$K(B ^H $B$,4^$^$l$F$7$^$C$F$$$^$9$,!"(B
$B$3$i$i$bL5$/$J$k$h$&$K9M$($?$$$H;W$$$^$9!#(B

#! /usr/bin/perl

#
# subroutine to substitute meta characters:
# (1) First of all, we replace & with &amp;
# (2) Then, we substitute -mtty-char output e.g., replace "\cHo with &ouml;
# (3) Lastly, we substitute <, >, and "
#


%meta1 = (
	 # replace '&' first, as & is used to represents other HTML entities
	 '&'		=>	'&amp;',
	  );

%meta2long = (
	 '\+\cH\+\cHo\cHo'	=>	'&bull;',	# overstrike bullet?
	 '_\cH\+\cH_\cHo'	=>	'&bull;',	# underline bullet?
	  );

%meta2 = (
	 # BEGIN Latin-1 characters
	 # nbsp
	 ',\cHi'	=>	'&iexcl;',
	 '/\cHc'	=>	'&cent;',
	 '-\cHL'	=>	'&pound;' ,
	 'o\cHx'	=>	'&curren;',
	 '=\cHY'	=>	'&yen;',
	 # brvbar
	 # sect
	 # uml
	 # copy
	 # ordf
	 # laquo
	 # not
	 # shy
	 # reg
	 # macr
	 # deg
	 # plusmn
	 # sup2
	 # sup3
	 # acute
	 ',\cHu'	=>	'&micro;',
	 # para
	 # middot
	 # cedil
	 # sup1
	 # ordm
	 # raquo
	 # frac14
	 # frac12
	 # frac34
	 '\'\cHc'	=>	'&iquest;',
	 '`\cHA'	=>	'&Agrave;',
	 '\'\cHA'	=>	'&Aacute;',
	 '\^\cHA'	=>	'&Acirc;',
	 '~\cHA'	=>	'&Atilde;',
	 '"\cHA'	=>	'&Auml;',
	 'o\cHA'	=>	'&Aring;',
	 # AElig
	 ',\cHC'	=>	'&Ccedil;',
	 '`\cHE'	=>	'&Egrave;',
	 '\'\cHE'	=>	'&Eacute;',
	 '\^\cHE'	=>	'&Ecirc;',
	 '"\cHE'	=>	'&Euml;',
	 '`\cHI'	=>	'&Igrave;',
	 '\'\cHI'	=>	'&Iacute;',
	 '\^\cHI'	=>	'&Icirc;',
	 '"\cHI'	=>	'&Iuml;',
	 '-\cHD'	=>	'&ETH;',
	 '~\cHN'	=>	'&Ntilde;',
	 '`\cHO'	=>	'&Ograve;',
	 '\'\cHO'	=>	'&Oacute;',
	 '\^\cHO'	=>	'&Ocirc;',
	 '~\cHO'	=>	'&Otilde;',
	 '"\cHO'	=>	'&Ouml;',
	 # &times;
	 '/\cHO'	=>	'&Oslash;',
	 '`\cHU'	=>	'&Ugrave;',
	 '\'\cHU'	=>	'&Uacute;',
	 '\^\cHU'	=>	'&Ucirc;',
	 '"\cHU'	=>	'&Uuml;',
	 '\'\cHY'	=>	'&Yacute;',
	 'I\cHb'	=>	'&THORN;',
	 '`\cHa'	=>	'&agrave;',
	 '\'\cHa'	=>	'&aacute;',
	 '\^\cHa'	=>	'&acirc;',
	 '~\cHa'	=>	'&atilde;',
	 '"\cHa'	=>	'&auml;',
	 'o\cHa'	=>	'&aring;',
	 # &aelig;
	 ',\cHc'	=>	'&ccedil;',
	 '`\cHe'	=>	'&egrave;',
	 '\'\cHe'	=>	'&eacute;',
	 '\^\cHe'	=>	'&ecirc;',
	 '"\cHe'	=>	'&euml;',
	 '`\cHi'	=>	'&igrave;',
	 '\'\cHi'	=>	'&iacute;',
	 '\^\cHi'	=>	'&icirc;',
	 '"\cHi'	=>	'&iuml;',
	 # &eth;	`\cH'\cHo
	 '~\cHn'	=>	'&ntilde;',
	 '`\cHo'	=>	'&ograve;',
	 '\'\cHo'	=>	'&oacute;',
	 '\^\cHo'	=>	'&ocirc;',
	 '~\cHo'	=>	'&otilde;',
	 '"\cHo'	=>	'&ouml;',
	 # &divide;
	 '/\cHo'	=>	'&oslash;',
	 '`\cHu'	=>	'&ugrave;',
	 '\'\cHu'	=>	'&uacute;',
	 '\^\cHu'	=>	'&ucirc;',
	 '"\cHu'	=>	'&uuml;',
	 '\'\cHy'	=>	'&yacute;',
	 'p\cHb'	=>	'&thorn;',
	 '"\cHy'	=>	'&yuml;',
	 # END Latin-1 characters

	 # greeks ...

	 '\+\cHo'	=>	'&bull;',
	 '\|\cHv'	=>	'&darr;',
	 '\|\cH\^'	=>	'&uarr;',
	 '\=\cHv'	=>	'&dArr;',
	 '\=\cH\^'	=>	'&uArr;',
	 '-\cHV'	=>	'&forall;',
	 '\(=\cH_'	=>	'&sube;',
	 '=\H_\)'	=>	'&supe;',
	 'O\cH\+'	=>	'&oplus;',
	 'O\cHx'	=>	'&otimes;',
	 # '_\cH\|'	=>	'&perp;',	

	 # Others
	 '\|\cH='	=>	'&Dagger;',
	 '\|\cH-'	=>	'&dagger;',
	 # glorin	,\cHf
	 # Lslash	/\cHL
	 # lslash	/\cHl
	 # hungarumlaut	"\cH_
	 # breve	'\cH`
	 # notelement	/\cHE
	 # angle	_\cH/
	 # integral	'\cH,\cHI\cH_
	 # radical	\\\cH_/
	  );

%meta3 = (
	 '<'		=>	'&lt;',
	 '>'		=>	'&gt;',
	 '\"'		=>	'&quot;',
	 );

sub cnvt_meta {
    my $myline = shift;

    my @groff_char;
    my @html_entity;
    my $mychar;
    my $myentity;

    @groff_char = keys %meta1;
    @html_entity = values %meta1;
    while (@groff_char) {
	$mychar = pop(@groff_char);
	$myentity = pop(@html_entity);
	$myline =~ s/$mychar/$myentity/g;
    }

    @groff_char = keys %meta2long;
    @html_entity = values %meta2long;
    while (@groff_char) {
	$mychar = pop(@groff_char);
	$myentity = pop(@html_entity);
	$myline =~ s/$mychar/$myentity/g;
    }

    @groff_char = keys %meta2;
    @html_entity = values %meta2;
    while (@groff_char) {
	$mychar = pop(@groff_char);
	$myentity = pop(@html_entity);
	$myline =~ s/$mychar/$myentity/g;
    }

    @groff_char = keys %meta3;
    @html_entity = values %meta3;
    while (@groff_char) {
	$mychar = pop(@groff_char);
	$myentity = pop(@html_entity);
	$myline =~ s/$mychar/$myentity/g;
    }

    $myline;
}

#
# subroutine to substitute other meta characters
#
sub cnvt_em {
    local($mo,			# HTML tag to open enphasizing
	  $mc,			# HTML tag to close enphasizing
	  $_)			# a line to substitute
	= @_;

    if ($_ eq "\n") {
	$repeated_nl > 3 && return "";	# discard the excessive newlines
	$repeated_nl++;
	return $_;
    }
    $repeated_nl = 0;

    s/([-_\w]\cH)+/$1/g;

    # Process underlining
    s/_\cH([\x80-\xff][\x80-\xff])/<u>$1<\/u>/g;
    s/_\cH&(\w+);/<u>&$1;<\/u>/g;
    s/_\cH(.)/<u>$1<\/u>/g;
    s/<\/u><u>//go;			# concatinate repeated underlines

    # Process overstriking
    s/([\x80-\xff][\x80-\xff])\cH\1/$mo$1$mc/g;	# $B$"(B^H$B$"(B -> <b>$B$"(B</b>
    s/(.)\cH\1/$mo$1$mc/g;		# a^Ha -> <b>a</b>
    s/(&.*;)\cH\1/$mo$1$mc/g;		# &^H& -> <b>&</b>
    s/$mc$mo//go;			# concatinate repeated enphasis

    # Process SH or Sh outputs
    /^$mo\S/ && do {
	if (s/^$mo(\S+)$mc$/<font size="+1">$mo<a name="sect$num" href="\#toc$num">$1<\/a>$mc<\/font>/) {
	    push(@lbl, $1); ++$num;
	} elsif (s/^$mo(\S+)$mc\s+$mo(\S+)$mc$/<font size="+1">$mo<a name="sect$num" href="\#toc$num">$1 $2<\/a>$mc<\/font>/) {
	    push(@lbl, "$1 $2"); ++$num;
	} elsif (s/^$mo(\S+)$mc\s+$mo(\S+)$mc\s+$mo(\S+)$mc$/<font size="+1">$mo<a name="sect$num" href="\#toc$num">$1 $2 $3<\/a>$mc<\/font>/) {
	    push(@lbl, "$1 $2 $3"); ++$num;
	}
    };

    # Process links
    if ($#pairs >= 1) {
	if ($FORM{'cmd'}) {
	    if (s/([\w\.\-\/]+)\b\(([$linksections])\)/<a href="$cgidir\/$prog?sect=$2&$addopt$1">$1($2)<\/a>/g) {
		s/man=S\/key/man=skey/;
	    }
	} else {
	    unless (/^\b\S+\b\([$linksections]\)/) {
		if (s/\b([\w\.\-\/]+)\b\(([$linksections])\)/<a href="$cgidir\/$prog?sect=$2&$addopt$1">$1($2)<\/a>/g) {
		    s/man=S\/key/man=skey/;
		}
	    }
	}
    } else {
	unless (/^\b\S+\b\([$linksections]\)/) {
	    s/\b([\w\.\-]+)\b\(([$linksections])\)/<a href="$cgidir\/$prog?$addopt$1.$2">$1($2)<\/a>/g;
	}
    }
    $_;
}

#
# MAIN
#

$rcs_id= '$Id: roff2html.pl,v 2.2 2000/09/26 19:27:54 horikawa Exp $'; #'
($filter) = ($0 =~ /([^\/]+)$/);
if ($rcs_id =~ /\s(\S+),v\s(\S+)\s(\d+\/\d+\/\d+)/) {
    $pname = "$1"; $revision = $2; $rcsdate = $3;
    $id= "$pname Revision $revision $rcsdate by Yoshishige ARAI " .
        '<ryo2@on.rim.or.jp>';
}
@incdir = ('/usr/local/www/cgi-bin/yoyaku/',
          '/proj/manjpadm/bin');
unshift(@INC, @incdir);

## --- $B@_Dj(B
$dtd = 'i18n';			# default doctype (2.0, 3.2, i18n, msie)
$jpcode = 'jis';		# jis, euc or sjis
$etag = 0;			# emphasis  0:<B> 1:<I> 2:<EM> 3:<STRONG>
$lang = 'japanese';		# japanese or english
$linksections = '15678';	# "185234679ln"
$cgidir = '/cgi';               # ServerRoot$B$+$i$NAjBP%Q%9(B
$prog = 'mroff.cgi';		# $B%j%s%/@h$N(BCGI$BL>(B

## ---
require 'jcode.pl';
require 'getopts.pl';
#require 'menv.pl';

@emo =  ('<b>','<i>','<em>', '<strong>');
@emc =  ('</b>','</i>','</em>', '</strong>');
%dtype = ('2.0', "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0 Strict//EN\">",
	  '3.2', "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">",
	  '4.0', "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Draft//EN\">",
	  'i18n', "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML i18n//EN\">",
	  'msie', "<!DOCTYPE HTML PUBLIC \"-//Microsoft//DTD Internet Explorer 3.0 Tables//EN\">",
	  'hotjava', "<!DOCTYPE HTML PUBLIC \"-//Sun Microsystems Corp.//DTD HotJava HTML//EN\">",
);
%charset = ('euc','EUC-JP', 'jis','ISO-2022-JP', 'sjis', 'Shift_JIS');

@pairs = ('dummy',1,1);
$addopt = "%OPT%";		# mroff.cgi $B$K$h$C$FCV49$5$l$kJ8;zNs(B

$help = <<EOF;
$id
usage: $filter [-o OUTFILE] [-l SECTIONS] [-a -d 2.0|3.2|4.0|i18n|msie -m MANNAME]

\t-o OUTFILE\toutput file. (stdout is default)
\t-l SECTIONS\tsections to create links on HTML.
\t-j\t\tISO-2022-JP (default).
\t-e\t\tJapanese EUC.
\t-s\t\tShift JIS.
\t-a\t\tadd headers as a test.
\t-d TYPE\tspecify doctype declaration.(use with -a)
\t-m MANNAME\tman page name. (use with -a)
example:
\tgroff -Tnippon -mandoc MANSOURCE | $filter -l '15678'> DESTFILE
EOF


&Getopts('ejshad:l:m:o:') || die $help;
$opt_h && die $help;
defined($opt_d) && do {$dtd = $opt_d; $opt_a = 1;};
defined($dtype{$dtd}) || die "$filter: no DOCTYPE for HTML$dtd as DTD.\n";
defined($opt_l) && ($linksections = $opt_l);
defined($opt_e) && ($jpcode = 'euc');
defined($opt_j) && ($jpcode = 'jis');
defined($opt_s) && ($jpcode = 'sjis');

$prep = '';
$repeated_nl = 0;				# number of repeated "\n"s
while(<>) {
    $post_replace_regexp = '';
    # $BA0$N9T$,(B hyphen $B$G=*$o$C$F$$$?$H$-(B
    if ($prep) {
      # $B<!$N9T$,%$%s%G%s%H$5$l!"D>8e$,(B XXX(D) $B$N$H$-(B
      if (s/(\s+)(\w+\(\d\))/$2/) {
	$post_replace_regexp = "^($1)(<[^>]+>)($prep)";
	$_ = "$1$prep" . $_;
      } else {
	$_ = "$prep\n$_";
      }
    }
    $prep = '';
    $line = $_;
    if (s/([\d\w\.]+)-$//) { # $BJ8Kv$K(B hyphenation $B$5$l$?J8@a(B XX- $B$,$"$k$H$-(B
	$prep = $1; # $BJ8@a(B XX $B$r(B $prep $B$KJ]B8$7(B
	$line = $`; # $B<!$N9T$K$^$o$9(B ';
    }
    $line = &cnvt_meta($line);
    $line = &cnvt_em($emo[$etag], $emc[$etag], $line);
    if ($post_replace_regexp) {
      # hyphenation$B$7D>$9(B
      $line =~ s/$post_replace_regexp/$2${3}-\n$1/;
    }
    $lang =~ /eng/i || &jcode'convert(*line,$jpcode,'euc'); #'
    $buf .= $line;
}

if ($#lbl >= 0) {
    $num = 0;
    foreach (@lbl) {
	$lbl[$num]= "<a name=\"toc$num\" href=\"\#sect$num\">$lbl[$num]</a>";
	$num++;
    }
    $endbuf = "<b><a href=\"\#toc\">Table of Contents</a></b><br>\n";
    $tmpbuf = "<b><a name=\"toc\">Table of Contents</a></b><br>\n" .
	join(' | ', @lbl);
}
$lang =~ /eng/i || &jcode'convert(*tmpbuf,$jpcode,'euc'); #'
if ($opt_o) {
  open(F, "$opt_o") || die "$filter: cannot open the file \"$opt_o\".";
  select(F);
}
$opt_a && print "$dtype{$dtd}\n<html><head><meta http-equiv=\"Content-type\" content=\"text/html; charset=$charset{$jpcode}\"><title>On-line Manual of ".
    "$opt_m </title></head>\n<body bgcolor=\"#ffffff\">\n".
    "<br><h2>FreeBSD $B%^%K%e%"%k8!:w(B (jman/japropos/jwhatis)</h2>\n";
print "$tmpbuf<pre>$buf</pre><br>\n$endbuf";
$opt_a && print "<hr><a href=\"/man-jp/search.html\">FreeBSD $B%^%K%e%"%k8!:w(B".
    "</a></body></html>\n";
$opt_o && close(F);
exit 0;
--
$BKY@nOBM:(B
