#!/usr/local/ymir/perl/bin/perl use strict; use Unicode::Japanese; use Data::Dumper; my $re_char = qr/[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3}|[\xf8-\xfb][\x80-\xbf]{4}|[\xfc-\xfd][\x80-\xbf]{5}/; opendir(DIR, ".") or die; my @files = sort grep(m/\.hmo_kana$/, readdir(DIR)); #my @files = grep(m/\M1.hmo_kana$/, readdir(DIR)); closedir(DIR); # ようやく最短キータイプを姫踊子草の定義ファイルから求めるプログラムが出来ました. my $teststr = 'ヨウヤクサイタンキータイプヲヒメオドリコソウノテイギファイルカラモトメルプログラムガデキマシタ'; print "keycheck\n"; print "\n"; print "\n"; foreach my $file (@files) { my ($conf, $table) = &loadkeytable($file); # print Data::Dumper->new([$conf, $table])->Dump; print STDERR "==== $file\n"; my $res = &parsekey($conf, $table, $teststr, \&keycount); print ""; print ""; print ""; print ""; print ""; print "\n"; } print "
定義ファイルキー数移動数タイプ内容
$file" . &keycount($res) . "" . &movecount($res) . "$res
\n"; print "\n"; sub keycount { my $key = shift; $key =~ s/\([^\)]+\)//g; $key =~ tr/\s//d; length($key); } sub movecount { my $key = shift; $key =~ s/\([^\)]+\)//g; $key =~ s/[^\s]+//g; length($key); } sub parsekey { my $conf = shift; my $table = shift; my $teststr = shift; my $scorefunc = shift; my @pat = ([$teststr, '']); my @respat; while(1) { my $maxlen; my $minscore; foreach my $pat (@pat) { my $len = length($pat->[0]); my $score = &$scorefunc($pat->[1]); if(!defined($maxlen) or $maxlen < $len) { $maxlen = $len; } if(!defined($minscore) or $minscore > $score) { $minscore = $score; } } my @thispat; my @nextpat; foreach my $pat (@pat) { if(length($pat->[0]) == $maxlen) { next if(&$scorefunc($pat->[1]) > $minscore); next if(scalar(@thispat)); push(@thispat, $pat); } else { push(@nextpat, $pat); } } # print STDERR "minscore: $minscore, maxlen: $maxlen, trypat: ".scalar(@thispat)."/" . scalar(@nextpat) . "\n"; my @newpat; my $check; foreach my $pat (@thispat) { my $str = $pat->[0]; my $strkana = Unicode::Japanese->new($str)->z2h->h2zKanaK->get; foreach my $key (keys %$table) { if($str =~ s/^\Q$table->{$key}\E//) { my $newpat = [$str, $pat->[1] . ' ' . $key . '(' . $table->{$key} . ')']; if($str eq '') { push(@respat, $newpat->[1]); $check = 1; } else { push(@newpat, $newpat); } # print "$pat->[0]:$pat->[1] -> $str: $key, $table->{$key}\n"; $str = $pat->[0]; } elsif($strkana =~ s/^\Q$table->{$key}\E//) { my $newpat = [$strkana, $pat->[1] . ' ' . $key . '(' . $table->{$key} . ')']; if($strkana eq '') { push(@respat, $newpat->[1]); $check = 1; } else { push(@newpat, $newpat); } # print "$pat->[0]:$pat->[1] -> $strkana: $key, $table->{$key}\n"; $strkana = Unicode::Japanese->new($str)->z2h->h2zKanaK->get; } } } # print STDERR Data::Dumper->new([\@respat, \@thispat, \@newpat, \@nextpat])->Dump; if(scalar(@newpat)) { @pat = (@newpat, @nextpat); } else { if(scalar(@nextpat)) { @pat = (@nextpat); } else { last; } } } my $minpat; my $minscore; foreach my $pat (@respat) { my $score = &$scorefunc($pat); if(!defined($minscore) or $score < $minscore) { $minscore = $score; $minpat = $pat; } } $minpat =~ s/^ //; $minpat; } sub loadkeytable { my $file = shift; open(KEYFILE, $file) or die "Can't open $file"; my $conf = {}; my $table = {}; my $prefix = ''; while() { next if(m/^'/); $_ = Unicode::Japanese->new($_, 'sjis')->get; $_ =~ tr/\r\n//d; if(m/^(\w+)=(\w+)/) { $conf->{$1} = $2; next; } if(m/^=([^\t]*)/) { $prefix = $1; next; } if(m/^(.+)\t(.+)/) { my $key = $1; my $str = $2; # warn "$key/$str\n"; while($key =~ s/^\{((?:$re_char)+?)\}|(.)//) { my $ch = $1 // $2; if($str =~ s/^((?:$re_char)+?)(?:\$|$)//) { my $ch2 = $1; $ch2 = Unicode::Japanese->new($ch2)->hira2kata->get; $table->{$prefix.$ch} = $ch2; # warn "$prefix.$ch: $1\n"; } } if($key ne '' or $str ne '') { warn "Parse error. ($file)[$_][$key/$str]\n"; } } } ($conf, $table); }