diff --git a/Makefile.PL b/Makefile.PL
index 59c5d86..a346b17 100755
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -19,14 +19,17 @@ extra_build:
--plugin=orphans --plugin=haiku --plugin=meta
./mdwn2man ikiwiki 1 doc/usage.mdwn > ikiwiki.man
./mdwn2man ikiwiki-mass-rebuild 8 doc/ikiwiki-mass-rebuild.mdwn > ikiwiki-mass-rebuild.man
+ cd l10n && $(MAKE) install
extra_clean:
rm -rf html doc/.ikiwiki
rm -f ikiwiki.man ikiwiki-mass-rebuild.man
+ cd l10n && $(MAKE) distclean
extra_install:
- install -d $(PREFIX)/share/ikiwiki/templates
- cp templates/* $(PREFIX)/share/ikiwiki/templates
+ install -d $(PREFIX)/share/ikiwiki/templates/en
+ cp templates/* $(PREFIX)/share/ikiwiki/templates/en
+ cp -a translations/* $(PREFIX)/share/ikiwiki/templates/ || true
install -d $(PREFIX)/share/ikiwiki/basewiki
cp -a basewiki/* $(PREFIX)/share/ikiwiki/basewiki
diff --git a/l10n/Makefile b/l10n/Makefile
new file mode 100644
index 0000000..ee1311b
--- /dev/null
+++ b/l10n/Makefile
@@ -0,0 +1,38 @@
+TMPL_PROCESS = ./tmpl_process3.pl
+
+SOURCE_DIR = ../templates
+TARGET_DIR = ../translations
+PO_DIR = ../po
+POT_FILE = $(PO_DIR)/templates.pot
+
+LANGUAGES = $(patsubst $(PO_DIR)/%.po, $(TARGET_DIR)/%, $(wildcard $(PO_DIR)/*.po))
+
+all: install
+
+pot: $(POT_FILE)
+
+update: pot $(wildcard $(PO_DIR)/*.po)
+
+install: $(LANGUAGES)
+
+clean:
+ rm -rf $(PO_DIR)/*~ $(LANGUAGES)
+
+distclean: clean
+ rmdir --ignore-fail-on-non-empty $(TARGET_DIR)
+
+%.pot: $(wildcard $(SOURCE_DIR)/*.tmpl)
+ mkdir -p $(dir $@)
+ rm -f $@
+ $(TMPL_PROCESS) create -s $@ -r -i $(SOURCE_DIR)
+
+%.po: $(wildcard $(SOURCE_DIR)/*.tmpl)
+ $(TMPL_PROCESS) update -s $@ -r -i $(SOURCE_DIR)
+ touch $@ # TMPL_PROCESS doesn't touch file if not needed
+
+$(TARGET_DIR)/%: $(PO_DIR)/%.po
+ mkdir -p $@
+ $(TMPL_PROCESS) install -s $< -o $@ -r -i $(SOURCE_DIR)
+ touch $@ # TMPL_PROCESS doesn't touch dir if not needed
+
+.PHONY: update pot
diff --git a/l10n/TmplToken.pm b/l10n/TmplToken.pm
new file mode 100644
index 0000000..0a54d0b
--- /dev/null
+++ b/l10n/TmplToken.pm
@@ -0,0 +1,157 @@
+package TmplToken;
+
+use strict;
+use TmplTokenType;
+require Exporter;
+
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+
+###############################################################################
+
+=head1 NAME
+
+TmplToken.pm - Object representing a scanner token for .tmpl files
+
+=head1 DESCRIPTION
+
+This is a class representing a token scanned from an HTML::Template .tmpl file.
+
+=cut
+
+###############################################################################
+
+$VERSION = 0.01;
+
+@ISA = qw(Exporter);
+@EXPORT_OK = qw();
+
+###############################################################################
+
+sub new {
+ my $this = shift;
+ my $class = ref($this) || $this;
+ my $self = {};
+ bless $self, $class;
+ ($self->{'_string'}, $self->{'_type'}, $self->{'_lc'}, $self->{'_path'}) = @_;
+ return $self;
+}
+
+sub string {
+ my $this = shift;
+ return $this->{'_string'}
+}
+
+sub type {
+ my $this = shift;
+ return $this->{'_type'}
+}
+
+sub pathname {
+ my $this = shift;
+ return $this->{'_path'}
+}
+
+sub line_number {
+ my $this = shift;
+ return $this->{'_lc'}
+}
+
+sub attributes {
+ my $this = shift;
+ return $this->{'_attr'};
+}
+
+sub set_attributes {
+ my $this = shift;
+ $this->{'_attr'} = ref $_[0] eq 'HASH'? $_[0]: \@_;
+ return $this;
+}
+
+# only meaningful for TEXT_PARAMETRIZED tokens
+sub children {
+ my $this = shift;
+ return $this->{'_kids'};
+}
+
+# only meaningful for TEXT_PARAMETRIZED tokens
+sub set_children {
+ my $this = shift;
+ $this->{'_kids'} = ref $_[0] eq 'ARRAY'? $_[0]: \@_;
+ return $this;
+}
+
+# only meaningful for TEXT_PARAMETRIZED tokens
+# FIXME: DIRECTIVE is not necessarily TMPL_VAR !!
+sub parameters_and_fields {
+ my $this = shift;
+ return map { $_->type == TmplTokenType::DIRECTIVE? $_:
+ ($_->type == TmplTokenType::TAG
+ && $_->string =~ /^{'_kids'}};
+}
+
+# only meaningful for TEXT_PARAMETRIZED tokens
+sub anchors {
+ my $this = shift;
+ return map { $_->type == TmplTokenType::TAG && $_->string =~ /^{'_kids'}};
+}
+
+# only meaningful for TEXT_PARAMETRIZED tokens
+sub form {
+ my $this = shift;
+ return $this->{'_form'};
+}
+
+# only meaningful for TEXT_PARAMETRIZED tokens
+sub set_form {
+ my $this = shift;
+ $this->{'_form'} = $_[0];
+ return $this;
+}
+
+sub has_js_data {
+ my $this = shift;
+ return defined $this->{'_js_data'} && ref($this->{'_js_data'}) eq 'ARRAY';
+}
+
+sub js_data {
+ my $this = shift;
+ return $this->{'_js_data'};
+}
+
+sub set_js_data {
+ my $this = shift;
+ $this->{'_js_data'} = $_[0];
+ return $this;
+}
+
+# predefined tests
+
+sub tag_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::TAG;
+}
+
+sub cdata_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::CDATA;
+}
+
+sub text_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::TEXT;
+}
+
+sub text_parametrized_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::TEXT_PARAMETRIZED;
+}
+
+sub directive_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::DIRECTIVE;
+}
+
+###############################################################################
+
+1;
diff --git a/l10n/TmplTokenType.pm b/l10n/TmplTokenType.pm
new file mode 100644
index 0000000..75d4f52
--- /dev/null
+++ b/l10n/TmplTokenType.pm
@@ -0,0 +1,128 @@
+package TmplTokenType;
+
+use strict;
+require Exporter;
+
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+
+###############################################################################
+
+=head1 NAME
+
+TmplTokenType.pm - Types of TmplToken objects
+
+=head1 DESCRIPTION
+
+This is a Java-style "safe enum" singleton class for types of TmplToken objects.
+The predefined constants are
+
+=cut
+
+###############################################################################
+
+$VERSION = 0.01;
+
+@ISA = qw(Exporter);
+@EXPORT_OK = qw(
+ &TEXT
+ &TEXT_PARAMETRIZED
+ &CDATA
+ &TAG
+ &DECL
+ &PI
+ &DIRECTIVE
+ &COMMENT
+ &UNKNOWN
+);
+
+###############################################################################
+
+use vars qw( $_text $_text_parametrized $_cdata
+ $_tag $_decl $_pi $_directive $_comment $_null $_unknown );
+
+BEGIN {
+ my $new = sub {
+ my $this = 'TmplTokenType';#shift;
+ my $class = ref($this) || $this;
+ my $self = {};
+ bless $self, $class;
+ ($self->{'id'}, $self->{'name'}, $self->{'desc'}) = @_;
+ return $self;
+ };
+ $_text = &$new(0, 'TEXT');
+ $_text_parametrized = &$new(8, 'TEXT-PARAMETRIZED');
+ $_cdata = &$new(1, 'CDATA');
+ $_tag = &$new(2, 'TAG');
+ $_decl = &$new(3, 'DECL');
+ $_pi = &$new(4, 'PI');
+ $_directive = &$new(5, 'DIRECTIVE');
+ $_comment = &$new(6, 'COMMENT');
+ $_unknown = &$new(7, 'UNKNOWN');
+}
+
+sub to_string {
+ my $this = shift;
+ return $this->{'name'}
+}
+
+sub TEXT () { $_text }
+sub TEXT_PARAMETRIZED () { $_text_parametrized }
+sub CDATA () { $_cdata }
+sub TAG () { $_tag }
+sub DECL () { $_decl }
+sub PI () { $_pi }
+sub DIRECTIVE () { $_directive }
+sub COMMENT () { $_comment }
+sub UNKNOWN () { $_unknown }
+
+###############################################################################
+
+=over
+
+=item TEXT
+
+normal text (#text in the DTD)
+
+=item TEXT_PARAMETRIZED
+
+parametrized normal text
+(result of simple recognition of text interspersed with directives;
+this has to be explicitly enabled in the scanner)
+
+=item CDATA
+
+normal text (CDATA in the DTD)
+
+=item TAG
+
+something that has the form of an HTML tag
+
+=item DECL
+
+something that has the form of an SGML declaration
+
+=item PI
+
+something that has the form of an SGML processing instruction
+
+=item DIRECTIVE
+
+a HTML::Template directive (whether or not embedded in an SGML comment)
+
+=item COMMENT
+
+something that has the form of an HTML comment
+(and is not recognized as an HTML::Template directive)
+
+=item UNKNOWN
+
+something that is not recognized at all by the scanner
+
+=back
+
+Note that end of file is currently represented by undef,
+instead of a constant predefined by this module.
+
+=cut
+
+1;
diff --git a/l10n/TmplTokenizer.pm b/l10n/TmplTokenizer.pm
new file mode 100644
index 0000000..30d011d
--- /dev/null
+++ b/l10n/TmplTokenizer.pm
@@ -0,0 +1,1150 @@
+package TmplTokenizer;
+
+use strict;
+use TmplTokenType;
+use TmplToken;
+use VerboseWarnings qw( pedantic_p error_normal warn_normal warn_pedantic );
+require Exporter;
+
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+
+###############################################################################
+
+=head1 NAME
+
+TmplTokenizer.pm - Simple-minded tokenizer class for HTML::Template .tmpl files
+
+=head1 DESCRIPTION
+
+Because .tmpl files contains HTML::Template directives
+that tend to confuse real parsers (e.g., HTML::Parse),
+it might be better to create a customized scanner
+to scan the template files for tokens.
+This module is a simple-minded attempt at such a scanner.
+
+=cut
+
+###############################################################################
+
+$VERSION = 0.02;
+
+@ISA = qw(Exporter);
+@EXPORT_OK = qw();
+
+use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p );
+use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p );
+use vars qw( $pedantic_error_markup_in_pcdata_p );
+
+###############################################################################
+
+# Hideous stuff
+use vars qw( $re_directive $re_tmpl_var $re_tmpl_var_escaped $re_tmpl_include );
+use vars qw( $re_directive_control $re_tmpl_endif_endloop );
+BEGIN {
+ # $re_directive must not do any backreferences
+ $re_directive = q{<(?:(?i)(?:!--\s*)?\/?TMPL_(?:VAR|LOOP|INCLUDE|IF|ELSE|UNLESS)(?:\s+(?:[a-zA-Z][-a-zA-Z0-9]*=)?(?:'[^']*'|"[^"]*"|[^\s<>]+))*\s*(?:--)?)>};
+ # TMPL_VAR or TMPL_INCLUDE
+ $re_tmpl_var = q{<(?:(?i)(?:!--\s*)?TMPL_(?:VAR)(?:\s+(?:[a-zA-Z][-a-zA-Z0-9]*=)?(?:'[^']*'|"[^"]*"|[^\s<>]+))*\s*(?:--)?)>};
+ $re_tmpl_include = q{<(?:(?i)(?:!--\s*)?TMPL_(?:INCLUDE)(?:\s+(?:[a-zA-Z][-a-zA-Z0-9]*=)?(?:'[^']*'|"[^"]*"|[^\s<>]+))*\s*(?:--)?)>};
+ # TMPL_VAR ESCAPE=1/HTML/URL
+ $re_tmpl_var_escaped = q{<(?:(?i)(?:!--\s*)?TMPL_(?:VAR|INCLUDE)(?:\s+(?:[a-zA-Z][-a-zA-Z0-9]*=)?(?:'[^']*'|"[^"]*"|[^\s<>]+))\s+ESCAPE=(?:1|HTML|URL)(?:\s+(?:[a-zA-Z][-a-zA-Z0-9]*=)?(?:'[^']*'|"[^"]*"|[^\s<>]+))*\s*(?:--)?)>};
+ # Any control flow directive
+ $re_directive_control = q{<(?:(?i)(?:!--\s*)?\/?TMPL_(?:LOOP|IF|ELSE|UNLESS)(?:\s+(?:[a-zA-Z][-a-zA-Z0-9]*=)?(?:'[^']*'|"[^"]*"|[^\s<>]+))*\s*(?:--)?)>};
+ # /LOOP or /IF or /UNLESS
+ $re_tmpl_endif_endloop = q{<(?:(?i)(?:!--\s*)?\/TMPL_(?:LOOP|IF|UNLESS)(?:\s+(?:[a-zA-Z][-a-zA-Z0-9]*=)?(?:'[^']*'|"[^"]*"|[^\s<>]+))*\s*(?:--)?)>};
+}
+
+# Hideous stuff from subst.pl, slightly modified to use the above hideous stuff
+# Note: The $re_tag's set $1 (), and $3 (rest of string)
+use vars qw( $re_comment $re_entity_name $re_end_entity $re_etag );
+use vars qw( $re_tag_strict $re_tag_compat @re_tag );
+sub re_tag ($) {
+ my($compat) = @_;
+ my $etag = $compat? '>': '<>\/';
+ # This is no longer similar to the original regexp in subst.pl :-(
+ # Note that we don't want <> in compat mode; Mozilla knows about <
+ q{(<\/?(?:|(?:"(?:} . $re_directive . q{|[^"])*"|'(?:} . $re_directive . q{|[^'])*'|--(?:(?!--)(?:$re_directive)*.)*--|(?:}
+ . $re_directive
+ . q{|(?!--)[^"'<>} . $etag . q{]))+))([} . $etag . q{]|(?=<))(.*)};
+}
+BEGIN {
+ $re_comment = '(?:--(?:[^-]|-[^-])*--)';
+ $re_entity_name = '(?:[^&%#;<>\s]+)'; # NOTE: not really correct SGML
+ $re_end_entity = '(?:;|$|(?=\s))'; # semicolon or before-whitespace
+ $re_etag = q{(?:<\/?(?:"[^"]*"|'[^']*'|[^"'>\/])*[>\/])}; # end-tag
+ @re_tag = ($re_tag_strict, $re_tag_compat) = (re_tag(0), re_tag(1));
+}
+
+# End of the hideous stuff
+
+use vars qw( $serial );
+
+###############################################################################
+
+sub FATAL_P () {'fatal-p'}
+sub SYNTAXERROR_P () {'syntaxerror-p'}
+
+sub FILENAME () {'input'}
+sub HANDLE () {'handle'}
+
+sub READAHEAD () {'readahead'}
+sub LINENUM_START () {'lc_0'}
+sub LINENUM () {'lc'}
+sub CDATA_MODE_P () {'cdata-mode-p'}
+sub CDATA_CLOSE () {'cdata-close'}
+sub PCDATA_MODE_P () {'pcdata-mode-p'} # additional submode for CDATA
+sub JS_MODE_P () {'js-mode-p'} # cdata-mode-p must also be true
+
+sub ALLOW_CFORMAT_P () {'allow-cformat-p'}
+
+sub new {
+ my $this = shift;
+ my($input) = @_;
+ my $class = ref($this) || $this;
+ my $self = {};
+ bless $self, $class;
+
+ my $handle = sprintf('TMPLTOKENIZER%d', $serial);
+ $serial += 1;
+
+ no strict;
+ open($handle, "<$input") || die "$input: $!\n";
+ use strict;
+ $self->{+FILENAME} = $input;
+ $self->{+HANDLE} = $handle;
+ $self->{+READAHEAD} = [];
+ return $self;
+}
+
+###############################################################################
+
+# Simple getters
+
+sub filename {
+ my $this = shift;
+ return $this->{+FILENAME};
+}
+
+sub _handle {
+ my $this = shift;
+ return $this->{+HANDLE};
+}
+
+sub fatal_p {
+ my $this = shift;
+ return $this->{+FATAL_P};
+}
+
+sub syntaxerror_p {
+ my $this = shift;
+ return $this->{+SYNTAXERROR_P};
+}
+
+sub has_readahead_p {
+ my $this = shift;
+ return @{$this->{+READAHEAD}};
+}
+
+sub _peek_readahead {
+ my $this = shift;
+ return $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}];
+}
+
+sub line_number_start {
+ my $this = shift;
+ return $this->{+LINENUM_START};
+}
+
+sub line_number {
+ my $this = shift;
+ return $this->{+LINENUM};
+}
+
+sub cdata_mode_p {
+ my $this = shift;
+ return $this->{+CDATA_MODE_P};
+}
+
+sub pcdata_mode_p {
+ my $this = shift;
+ return $this->{+PCDATA_MODE_P};
+}
+
+sub js_mode_p {
+ my $this = shift;
+ return $this->{+JS_MODE_P};
+}
+
+sub cdata_close {
+ my $this = shift;
+ return $this->{+CDATA_CLOSE};
+}
+
+sub allow_cformat_p {
+ my $this = shift;
+ return $this->{+ALLOW_CFORMAT_P};
+}
+
+# Simple setters
+
+sub _set_fatal {
+ my $this = shift;
+ $this->{+FATAL_P} = $_[0];
+ return $this;
+}
+
+sub _set_syntaxerror {
+ my $this = shift;
+ $this->{+SYNTAXERROR_P} = $_[0];
+ return $this;
+}
+
+sub _push_readahead {
+ my $this = shift;
+ push @{$this->{+READAHEAD}}, $_[0];
+ return $this;
+}
+
+sub _pop_readahead {
+ my $this = shift;
+ return pop @{$this->{+READAHEAD}};
+}
+
+sub _append_readahead {
+ my $this = shift;
+ $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}] .= $_[0];
+ return $this;
+}
+
+sub _set_readahead {
+ my $this = shift;
+ $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}] = $_[0];
+ return $this;
+}
+
+sub _increment_line_number {
+ my $this = shift;
+ $this->{+LINENUM} += 1;
+ return $this;
+}
+
+sub _set_line_number_start {
+ my $this = shift;
+ $this->{+LINENUM_START} = $_[0];
+ return $this;
+}
+
+sub _set_cdata_mode {
+ my $this = shift;
+ $this->{+CDATA_MODE_P} = $_[0];
+ return $this;
+}
+
+sub _set_pcdata_mode {
+ my $this = shift;
+ $this->{+PCDATA_MODE_P} = $_[0];
+ return $this;
+}
+
+sub _set_js_mode {
+ my $this = shift;
+ $this->{+JS_MODE_P} = $_[0];
+ return $this;
+}
+
+sub _set_cdata_close {
+ my $this = shift;
+ $this->{+CDATA_CLOSE} = $_[0];
+ return $this;
+}
+
+sub set_allow_cformat {
+ my $this = shift;
+ $this->{+ALLOW_CFORMAT_P} = $_[0];
+ return $this;
+}
+
+###############################################################################
+
+use vars qw( $js_EscapeSequence );
+BEGIN {
+ # Perl quoting is really screwed up, but this common subexp is way too long
+ $js_EscapeSequence = q{\\\\(?:['"\\\\bfnrt]|[^0-7xu]|[0-3]?[0-7]{1,2}|x[\da-fA-F]{2}|u[\da-fA-F]{4})};
+}
+sub parenleft () { '(' }
+sub parenright () { ')' }
+
+sub split_js ($) {
+ my ($s0) = @_;
+ my @it = ();
+ while (length $s0) {
+ if ($s0 =~ /^\s+/s) { # whitespace
+ push @it, $&;
+ $s0 = $';
+ } elsif ($s0 =~ /^\/\/[^\r\n]*(?:[\r\n]|$)/s) { # C++-style comment
+ push @it, $&;
+ $s0 = $';
+ } elsif ($s0 =~ /^\/\*(?:(?!\*\/).)*\*\//s) { # C-style comment
+ push @it, $&;
+ $s0 = $';
+ # Keyword or identifier, ECMA-262 p.13 (section 7.5)
+ } elsif ($s0 =~ /^[A-Z_\$][A-Z\d_\$]*/is) { # IdentifierName
+ push @it, $&;
+ $s0 = $';
+ # Punctuator, ECMA-262 p.13 (section 7.6)
+ } elsif ($s0 =~ /^(?:[\(\){}\[\];]|>>>=|<<=|>>=|[-\+\*\/\&\|\^\%]=|>>>|<<|>>|--|\+\+|\|\||\&\&|==|<=|>=|!=|[=><,!~\?:\.\-\+\*\/\&\|\^\%])/s) {
+ push @it, $&;
+ $s0 = $';
+ # DecimalLiteral, ECMA-262 p.14 (section 7.7.3); note: bug in the spec
+ } elsif ($s0 =~ /^(?:0|[1-9]\d+(?:\.\d*(?:[eE][-\+]?\d+)?)?)/s) {
+ push @it, $&;
+ $s0 = $';
+ # HexIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
+ } elsif ($s0 =~ /^0[xX][\da-fA-F]+/s) {
+ push @it, $&;
+ $s0 = $';
+ # OctalIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
+ } elsif ($s0 =~ /^0[\da-fA-F]+/s) {
+ push @it, $&;
+ $s0 = $';
+ # StringLiteral, ECMA-262 p.17 (section 7.7.4)
+ # XXX SourceCharacter doesn't seem to be defined (?)
+ } elsif ($s0 =~ /^(?:"(?:(?!["\\\r\n]).|$js_EscapeSequence)*"|'(?:(?!['\\\r\n]).|$js_EscapeSequence)*')/os) {
+ push @it, $&;
+ $s0 = $';
+ } elsif ($s0 =~ /^./) { # UNKNOWN TOKEN !!!
+ push @it, $&;
+ $s0 = $';
+ }
+ }
+ return @it;
+}
+
+sub STATE_UNDERSCORE () { 1 }
+sub STATE_PARENLEFT () { 2 }
+sub STATE_STRING_LITERAL () { 3 }
+
+# XXX This is a crazy hack. I don't want to write an ECMAScript parser.
+# XXX A scanner is one thing; a parser another thing.
+sub identify_js_translatables (@) {
+ my @input = @_;
+ my @output = ();
+ # We mark a JavaScript translatable string as in C, i.e., _("literal")
+ # For simplicity, we ONLY look for "_" "(" StringLiteral ")"
+ for (my $i = 0, my $state = 0, my($j, $q, $s); $i <= $#input; $i += 1) {
+ my $reset_state_p = 0;
+ push @output, [0, $input[$i]];
+ if ($input[$i] !~ /\S/s) {
+ ;
+ } elsif ($state == 0) {
+ $state = STATE_UNDERSCORE if $input[$i] eq '_';
+ } elsif ($state == STATE_UNDERSCORE) {
+ $state = $input[$i] eq parenleft ? STATE_PARENLEFT : 0;
+ } elsif ($state == STATE_PARENLEFT) {
+ if ($input[$i] =~ /^(['"])(.*)\1$/s) {
+ ($state, $j, $q, $s) = (STATE_STRING_LITERAL, $#output, $1, $2);
+ } else {
+ $state = 0;
+ }
+ } elsif ($state == STATE_STRING_LITERAL) {
+ if ($input[$i] eq parenright) {
+ $output[$j] = [1, $output[$j]->[1], $q, $s];
+ }
+ $state = 0;
+ } else {
+ die "identify_js_translatables internal error: Unknown state $state"
+ }
+ }
+ return \@output;
+}
+
+###############################################################################
+
+sub _extract_attributes ($;$) {
+ my $this = shift;
+ my($s, $lc) = @_;
+ my %attr;
+ $s = $1 if $s =~ /^<(?:(?!$re_directive_control)\S)+(.*)\/\S$/s # XML-style self-closing tags
+ || $s =~ /^<(?:(?!$re_directive_control)\S)+(.*)\S$/s; # SGML-style tags
+
+ for (my $i = 0; $s =~ /^(?:$re_directive_control)?\s+(?:$re_directive_control)?(?:([a-zA-Z][-a-zA-Z0-9]*)\s*=\s*)?('((?:$re_directive|[^'])*)'|"((?:$re_directive|[^"])*)"|((?:$re_directive|[^\s<>])+))/os;) {
+ my($key, $val, $val_orig, $rest)
+ = ($1, (defined $3? $3: defined $4? $4: $5), $2, $');
+ $i += 1;
+ $attr{+lc($key)} = [$key, $val, $val_orig, $i];
+ $s = $rest;
+ if ($val =~ /$re_tmpl_include/os) {
+ warn_normal "TMPL_INCLUDE in attribute: $val_orig\n", $lc;
+ } elsif ($val =~ /$re_tmpl_var/os && $val !~ /$re_tmpl_var_escaped/os) {
+ # XXX: we probably should not warn if key is "onclick" etc
+ # XXX: there's just no reasonable thing to suggest
+ my $suggest = ($key =~ /^(?:action|archive|background|cite|classid|codebase|data|datasrc|for|href|longdesc|profile|src|usemap)$/i? 'URL': 'HTML');
+ undef $suggest if $key =~ /^(?:onblur|onchange|onclick|ondblclick|onfocus|onkeydown|onkeypress|onkeyup|onload|onmousedown|onmousemove|onmouseout|onmouseover|onmouseup|onreset|onselect|onsubmit|onunload)$/i;
+ warn_pedantic
+ "Suggest ESCAPE=$suggest for TMPL_VAR in attribute \"$key\""
+ . ": $val_orig",
+ $lc, \$pedantic_tmpl_var_use_in_nonpedantic_mode_p
+ if defined $suggest && (pedantic_p || !$pedantic_tmpl_var_use_in_nonpedantic_mode_p);
+ } elsif ($val_orig !~ /^['"]/) {
+ my $t = $val; $t =~ s/$re_directive_control//os;
+ warn_pedantic
+ "Unquoted attribute contains character(s) that should be quoted"
+ . ": $val_orig",
+ $lc, \$pedantic_attribute_error_in_nonpedantic_mode_p
+ if $t =~ /[^-\.A-Za-z0-9]/s;
+ }
+ }
+ my $s2 = $s; $s2 =~ s/$re_tmpl_endif_endloop//g; # for the next check
+ if ($s2 =~ /\S/s) { # should never happen
+ if ($s =~ /^([^\n]*)\n/s) { # this is even worse
+ error_normal("Completely confused while extracting attributes: $1", $lc);
+ error_normal((scalar(split(/\n/, $s)) - 1) . " more line(s) not shown.", undef);
+ $this->_set_fatal( 1 );
+ } else {
+ # There's something wrong with the attribute syntax.
+ # We might be able to deduce a likely cause by looking more.
+ if ($s =~ /^[a-z0-9]/is && "" =~ /^$re_tag_compat$/s) {
+ warn_normal "Probably missing whitespace before or missing quotation mark near: $s\n", $lc;
+ } else {
+ warn_normal "Strange attribute syntax: $s\n", $lc;
+ }
+ }
+ }
+ return \%attr;
+}
+
+sub _next_token_internal {
+ my $this = shift;
+ my($h) = @_;
+ my($it, $kind);
+ my $eof_p = 0;
+ $this->_pop_readahead if $this->has_readahead_p
+ && !ref $this->_peek_readahead
+ && !length $this->_peek_readahead;
+ if (!$this->has_readahead_p) {
+ my $next = scalar <$h>;
+ $eof_p = !defined $next;
+ if (!$eof_p) {
+ $this->_increment_line_number;
+ $this->_push_readahead( $next );
+ }
+ }
+ $this->_set_line_number_start( $this->line_number ); # remember 1st line num
+ if ($this->has_readahead_p && ref $this->_peek_readahead) { # TmplToken obj.
+ ($it, $kind) = ($this->_pop_readahead, undef);
+ } elsif ($eof_p && !$this->has_readahead_p) { # nothing left to do
+ ;
+ } elsif ($this->_peek_readahead =~ /^\s+/s) { # whitespace
+ ($kind, $it) = (TmplTokenType::TEXT, $&);
+ $this->_set_readahead( $' );
+ # FIXME the following (the [<\s] part) is an unreliable HACK :-(
+ } elsif ($this->_peek_readahead =~ /^(?:[^<]|<[<\s])*(?:[^<\s])/s) { # non-space normal text
+ ($kind, $it) = (TmplTokenType::TEXT, $&);
+ $this->_set_readahead( $' );
+ warn_normal "Unescaped < in $it\n", $this->line_number_start
+ if !$this->cdata_mode_p && $it =~ /cdata_close;;) {
+ if ($this->cdata_mode_p) {
+ my $next = $this->_pop_readahead;
+ if ($next =~ /^$cdata_close/is) {
+ ($kind, $it) = (TmplTokenType::TAG, $&);
+ $this->_push_readahead( $' );
+ $ok_p = 1;
+ } elsif ($next =~ /^((?:(?!$cdata_close).)+)($cdata_close)/is) {
+ ($kind, $it) = (TmplTokenType::TEXT, $1);
+ $this->_push_readahead( "$2$'" );
+ $ok_p = 1;
+ } else {
+ ($kind, $it) = (TmplTokenType::TEXT, $next);
+ $ok_p = 1;
+ }
+ } elsif ($this->_peek_readahead =~ /^$re_tag_compat/os) {
+ # If we detect a "closed start tag" but we know that the
+ # following token looks like a TMPL_VAR, don't stop
+ my($head, $tail, $post) = ($1, $2, $3);
+ if ($tail eq '' && $post =~ $re_tmpl_var) {
+ # Don't bother to show the warning if we're too confused
+ # FIXME. There's no method for _closed_start_tag_warning
+ if (!defined $this->{'_closed_start_tag_warning'}
+ || ($this->{'_closed_start_tag_warning'}->[0] eq $head
+ && $this->{'_closed_start_tag_warning'}->[1] != $this->line_number - 1)) {
+ warn_normal "Possible SGML \"closed start tag\" notation: $head<\n", $this->line_number
+ if split(/\n/, $head) < 10;
+ }
+ $this->{'_closed_start_tag_warning'} = [$head, $this->line_number];
+ } else {
+ ($kind, $it) = (TmplTokenType::TAG, "$head>");
+ $this->_set_readahead( $post );
+ $ok_p = 1;
+ warn_normal "SGML \"closed start tag\" notation: $head<\n", $this->line_number if $tail eq '';
+ }
+ } elsif ($this->_peek_readahead =~ /^)$re_directive*.)*-->/os) {
+ ($kind, $it) = (TmplTokenType::COMMENT, $&);
+ $this->_set_readahead( $' );
+ $ok_p = 1;
+ $bad_comment_p = 1;
+ }
+ last if $ok_p;
+ my $next = scalar <$h>;
+ $eof_p = !defined $next;
+ last if $eof_p;
+ $this->_increment_line_number;
+ $this->_append_readahead( $next );
+ }
+ if ($kind ne TmplTokenType::TAG) {
+ ;
+ } elsif ($it =~ /^).)*-->/;
+ if ($kind == TmplTokenType::COMMENT && $it =~ /^