#!/usr/bin/perl -w # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; use warnings; use FindBin; use Getopt::Std; # Lingua::Translate has a bunch of requirements (see bottom of its perldoc) use Lingua::Translate; # %rules and %scores from tmp/rules.pl use vars qw { $opt_c $opt_e $opt_h $opt_n $opt_r %rules %scores }; sub usage { die "generate-translation language output_file -h print this help -e STR use STR as destination character set (using Lingua::Translate) -r STR use STR as destination character set (using recode) -n N translate first N rules (used for testing) -c DIR use DIR as rules directory language should be a two letter language code from this list: zh: Chinese-simplified zt: Chinese-traditional nl: Dutch fr: French de: German el: Greek it: Italian ja: Japanese ko: Korean pt: Portuguese ru: Russian es: Spanish progress is displayed on standard error "; } getopts("hc:e:n:r:"); usage() if ($opt_h || @ARGV < 2); # options my $dest = shift @ARGV; my $output = shift @ARGV; my $cffile = $opt_c || "$FindBin::Bin/../rules"; my $enc = $opt_e || "utf8"; my $recode = $opt_r || "UTF-8"; # rule => configuration hashes my %english; my %old; my %translation; # translation cache my %lang_cache; # do the work read_rules($cffile); generate_translation(); print_translation(); sub read_rules { my ($cffile) = @_; system("$FindBin::Bin/../build/parse-rules-for-masses -d \"$cffile\"") and die "unable to parse rules\n"; require "$FindBin::Bin/tmp/rules.pl" or die "unable to read tmp/rules.pl\n"; } sub generate_translation { my $fish = Lingua::Translate->new(src => "en", dest => $dest, dest_enc => $enc) or die "no translation server available for en -> $dest\n"; # see if we had an old translation if (-f "$FindBin::Bin/../rules/30_text_$dest.cf") { open(OLD, "$FindBin::Bin/../rules/30_text_$dest.cf"); while() { if (/^lang\s+$dest\s+describe\s+(\S+)\s+(.*?)\s*$/) { $old{$1} = "lang $dest describe $1 $2\n"; } } close(OLD); } # try to generate new translation my $count = 0; for my $name (sort keys %rules) { next if ($name eq '_scoreset'); my $lang_name = $name; my $lang_describe = ''; if ($rules{$name}->{lang}) { next; } if (defined $rules{$name}->{describe}) { # translate name if it appears in the description my $describe = $rules{$name}->{describe}; if ($describe =~ /$name/) { eval { $lang_name = $fish->translate($name); # dies or croaks on error }; if ($@) { $lang_name = '[A-Z]+[A-Z0-9_]+[A-Z0-9]'; } } # English version $english{$name} = "describe $name\t$describe\n"; # translate description eval { if (defined $lang_cache{$describe}) { $lang_describe = $lang_cache{$describe}; } else { # dies or croaks on error $lang_describe = $fish->translate($describe); $lang_describe =~ s/\s+/ /sg; $lang_describe =~ s/ $//g; $lang_cache{$describe} = $lang_describe; } }; # didn't work if ($@) { print STDERR "x"; } else { $lang_describe =~ s/$lang_name/$name/; $translation{$name} = "lang $dest describe $name\t$lang_describe\n"; print STDERR "."; } $count++; last if ($opt_n && $count == $opt_n); } } print STDERR "\n" if $count > 0; } sub print_translation { my $charset = $enc; if ($opt_r) { $charset = $recode; } $charset =~ s/utf-?8/utf-8/i; open(OUTPUT, "> $output"); print OUTPUT <<'EOF'; # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ########################################################################### # character set used in the following texts EOF print OUTPUT <) { if (/^(?:clear_report_template|report|clear_unsafe_report_template|unsafe_report)\b/) { print OUTPUT "lang $dest $_"; } } print OUTPUT "\n\n"; for (sort keys %english) { print OUTPUT "# $english{$_}"; print OUTPUT "# $translation{$_}" if $translation{$_}; print OUTPUT "# $old{$_}" if $old{$_}; print OUTPUT "\n"; } system("/usr/bin/recode $enc..$recode $output") if $opt_r; }