
| Current Path : /usr/share/latex2html/styles/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //usr/share/latex2html/styles/CJK.perl |
########################################################################
# $Id: CJK.perl,v 1.8 2002/04/26 16:06:52 RRM Exp $
# CJK.perl
# Jens Lippmann <lippmann@rbg.informatik.tu-darmstadt.de>,
# Boy Yang <yangboy@math.ntu.edu.tw>,
# Werner Lemberg <xlwy01@uxp1.hrz.uni-dortmund.de>
#
# Extension to LaTeX2HTML V 96.2 to supply support for the
# "CJK" LaTeX package.
#
########################################################################
# Change Log:
# ===========
# jcl = Jens Lippmann
#
# $Log: CJK.perl,v $
# Revision 1.8 2002/04/26 16:06:52 RRM
# -- JIS is EUC-JP, not ISO-2022-JP.
#
# Revision 1.7 2002/04/26 14:17:31 RRM
# -- fixed MIME names for the encodings; thanks to Jungshik Shin for
# the correct names
#
# Revision 1.6 2002/04/24 22:27:00 RRM
# -- automatic recognition of document charset, based upon the
# encoding in the first {CJK} or {CJK*} environment.
#
# Revision 1.5 1999/06/06 14:24:59 MRO
#
#
# -- many cleanups wrt. to TeXlive
# -- changed $* to /m as far as possible. $* is deprecated in perl5, all
# occurrences should be removed.
#
# Revision 1.4 1999/04/09 18:11:27 JCL
# changed my e-Mail address
#
# Revision 1.3 1998/02/19 22:24:26 latex2html
# th-darmstadt -> tu-darmstadt
#
# Revision 1.2 1996/12/17 17:11:41 JCL
# typo
#
# Revision 1.1 1996/12/17 17:07:32 JCL
# - introduced to CVS repository
# - adjusted technical notes according to Werner's proposal
# - added support for CJK* environment
#
# jcl 16-DEC-96 - Created
#
########################################################################
# Notes:
# To may view the results only with a browser configured for the
# specific language.
# To configure the browser, use eg. the "document encoding" menu
# of NetScape.
#
# Technical Notes:
# We use the pre_process hook to change any text coming in to
# LaTeX2HTML such that we convert from the outer representation
# of double byte characters to an inner, LaTeX2HTML specific
# representation.
# The two outer representations recognized are described as follows:
# o standard CJK encodings (GB, KS, Big5, SJIS, etc.)
# Each symbol is formed by two characters, the first in the range
# [\201-\237\241-\376] (octal) or 0x81-0x9F, 0xA1-0xFE (hexadecimal),
# the second in the range
# [\100-\176\200-\377] (octal) or 0x40-0x7E, 0x80-0xFF (hexadecimal).
# o CJK internal encoding (to conveniently use CJK processed files)
# Each symbol is a sequence with a leading character in the range
# [\201-\237\241-\376] or 0x81-0x9F, 0xA1-0xFE,
# a sequence of digits forming the decimal representation of the
# second character from standard encoded form (eg. "65", "128"),
# and a trailing 0xFF.
# The internal LaTeX2HTML representation is the same as the CJK
# encoded form.
# Additionally, we handle TeX's normalized representation of special
# characters (eg. ^^e4), which is helpful when LaTeX2HTML processes
# the .aux file.
#
# The post_process hook will convert the LaTeX2HTML internal coding
# into standard Big5/SJIS encoding, which then remains in the
# HTML text.
#
# The revert_to_raw_tex hook will convert the internal encoding
# back to standard encoding to help with image creation.
#
########################################################################
package main;
# possible values for the 1st optional argument to \begin{CJK}
# and the corresponding charset:
%CJK_charset = (
'Bg5' , 'Big5'
, 'Bg5+' , 'Big5Plus'
, 'Bg5hk' , 'Big5-HKSCS'
, 'GB' , 'gb2312'
, 'GBt' , 'gbt_12345'
, 'GBK' , 'GBK'
# , 'JIS' , 'ISO-2022-JP'
, 'JIS' , 'EUC-JP'
, 'SJIS' , 'Shift_JIS'
, 'KS' , 'EUC-KR'
, 'UTF8' , 'UTF-8'
, 'EUC-TW' , 'X-EUC-TW'
, 'EUC-JP' , 'EUC-JP'
, 'EUC-KR' , 'EUC-KR'
, 'CP949' , 'X-Windows-949'
);
# Use 'Bg5' => 'big5' as default charset, for both input and output,
# unless it is set already with a value for $CJK_AUTO_CHARSET
$CJK_AUTO_CHARSET = '' unless (defined $CJK_AUTO_CHARSET);
$charset = $CHARSET = $CJK_AUTO_CHARSET || $CJK_charset{'Bg5'};
sub pre_pre_process {
# Handle TeX's normalized special character encoding.
# This *might* be done by LaTeX2HTML, too, but yet we don't
# rely on it.
s/\^\^([^0-9a-f])/chr((64+ord($1))&127)/gem;
s/\^\^([0-9a-f][0-9a-f])/chr(hex($1))/gem;
# Care for standard CJK encoding -> l2h internal form.
s/([\201-\237\241-\376])([\100-\176\200-\376])/"$1" . ord($2) . "\377"/gem;
}
sub post_post_process {
# l2h internal form -> standard CJK encoding
s/([\201-\237\241-\376])(\d+)\377/"$1" . chr($2)/ge;
}
sub revert_to_raw_tex_hook {
# l2h internal form -> standard CJK encoding
s/([\201-\237\241-\376])(\d+)\377/"$1" . chr($2)/ge;
}
sub do_cmd_CJKchar {
local($_) = @_;
&get_next_optional_argument;
s/$next_pair_rx/chr($2)/eo;
s/$next_pair_rx/$2\377/o;
$_;
}
# Handle CJK environments.
# The usage of \CJKspace, \CJKnospace is not implemented yet.
#
sub do_env_CJK {
local($_) = @_;
my ($cjk_enc);
# skip font encoding
&get_next_optional_argument;
# handle CJK encoding
$cjk_enc = &missing_braces unless
((s/$next_pair_pr_rx/$cjk_enc = $2; ''/eo)
||(s/$next_pair_rx/$cjk_enc = $2; ''/eo));
$cjk_enc =~ s/^\s+|\s+$//g;
if ($cjk_enc) {
if (!defined $CJK_charset{$cjk_enc}) {
&write_warning ( "unknown charset code: $cjk_enc in CJK environment.");
} elsif (!$CJK_AUTO_CHARSET) {
$CJK_AUTO_CHARSET = $charset = $CHARSET = $CJK_charset{$cjk_enc};
} elsif ($CHARSET eq $CJK_charset{$cjk_enc}) {
# compatible; do nothing.
} else {
&write_warning ( "Only one charset allowed per document: $CHARSET");
&write_warning ( "Ignoring request for ".$CJK_charset{$cjk_enc});
}
}
# skip CJK font family
s/$next_pair_rx//o;
$_;
}
# Handle CJK* environments.
# The usage of \CJKspace, \CJKnospace is not implemented yet.
# We won't catch single newlines following CJK symbols, because
# this would require to suppress the newlines in the HTML output,
# leading to overly long lines.
#
sub do_env_CJKstar {
local($_) = &do_env_CJK;
#CJK symbols eat ensuing white space
s/([\201-\237\241-\376]\d+\377)[ \t]+/\1/g;
$_;
}
# most of the commands here need some action which is not implemented yet.
&ignore_commands(<<_IGNORED_CMDS_);
CJKCJKchar
CJKboldshift
CJKcaption # {}
CJKenc # {}
CJKencfamily # [] # {} # {}
CJKfamily # {}
CJKfontenc # {} # {}
CJKglue
CJKhangul
CJKhangulchar
CJKhanja
CJKkern
CJKlatinchar
CJKnospace
CJKspace
CJKtilde
CJKtolerance
CJKuppercase
Unicode # {} # {}
nbs
standardtilde
_IGNORED_CMDS_
# we need \AtBeginDocument and \AtEndDocument
&ignore_commands(<<_IGNORED_CMDS_);
AtBeginDocument # {}
AtEndDocument # {}
_IGNORED_CMDS_
# This must be the last line.
1;