#!/usr/local/ymir/perl/bin/perl use strict; use warnings; use JSON::XS; use utf8; use Data::Dumper; my $dic; open(F, "<:encoding(cp932)", "pejv/pejvo.txt") or die $!; my $idx = 0; while() { s/\r?\n//; $_ = unicode_word($_); my ($eo, $ja) = split(/:/, $_, 2); my $search_eo = lc($eo); $search_eo =~ tr/-\/!\?//d; my $type = 2; if($eo =~ /^-|-$/) { if($eo !~ /^-/ && $eo =~ /-$/) { $type = 1; } elsif($eo =~ /^-/ && $eo !~ /-$/) { $type = 4; } else { $type = 3; } } my $root = lc($eo); if($root =~ s!/[aio]$!!) { my $search_root = $root; $search_root =~ tr/-\/!\?//d; my $root_ja = '<<' . $search_eo . '〔' . $ja . '〕'; if($dic->{index}{$search_root}) { my $old_idx = $dic->{index}{$search_root}[0]; $dic->{dic}[$old_idx][2] .= "\n" . $root_ja; } else { $dic->{index}{$search_root} = [$idx]; push(@{$dic->{dic}}, [$search_root, $root, $root_ja, $type]); $idx++; } } my $oldidx = $dic->{index}{$search_eo}; if($oldidx) { if($oldidx->[$#$oldidx] == $idx) { } else { push(@{$dic->{index}{$search_eo}}, $idx); } } else { push(@{$dic->{index}{$search_eo}}, $idx); } push(@{$dic->{dic}}, [$search_eo, $eo, $ja, $type]); $idx++; } delete $dic->{index}; open(W, ">:encoding(UTF-8)", "dic2.json") or die $!; print W JSON::XS->new->encode($dic); close(W); #system(q{gzip -c dic2.json > dic2.json.gz}); sub unicode_word { my $str = shift; $str =~ s/C\^/Ĉ/g; $str =~ s/G\^/Ĝ/g; $str =~ s/H\^/Ĥ/g; $str =~ s/J\^/Ĵ/g; $str =~ s/S\^/Ŝ/g; $str =~ s/U\^/Ŭ/g; $str =~ s/c\^/ĉ/g; $str =~ s/g\^/ĝ/g; $str =~ s/h\^/ĥ/g; $str =~ s/j\^/ĵ/g; $str =~ s/s\^/ŝ/g; $str =~ s/u\^/ŭ/g; $str; }