use strict; use warnings; use File::Basename; use utf8; use Encode; use open IN => ":encoding(utf8)"; use open OUT => ":encoding(utf8)"; #ファイルオープン処理 my $in_file1 = $ARGV[0]; die "input file missing" unless ($in_file1); my $in_file2 = $ARGV[1]; die "input file missing 2" unless ($in_file2); my $out_file = $ARGV[2]; die "output filename missing" unless ($out_file); # ファイル名 my $filename1 = basename($in_file1); my $filename2 = basename($in_file2); open my $file1, '<', $in_file1 or die qq{Can't open in_file1: "$in_file1":$!}; open my $file2, '<', $in_file2 or die qq{Can't open in_file2: "$in_file2":$!}; open my $outfile, '>', $out_file or die qq{Can't open out_file: "$out_file":$!}; my $flag_index=0; my %original; while (my $line = <$file1>) { chomp($line); if($line =~ /\<h\d id=/){ $line =~ /id=\"(.+)\"/; my $id = "$1"; $id =~ s/\"\>.+ \(\<span class=\"caps//g; $line =~ /\"\>(.+)\<\/h\d\>/; my $title = $1; $title =~ s/\<span class=\"caps\"\>//g; $title =~ s/\<\/span\>//g; if($original{$title}){ $original{$title} =~ /^(.+)\_\_\_(\d)$/; my $recur = $2; $recur++; $original{$title} = "$1___$recur"; my $title2 = "${title}___$recur"; $original{$title2} = "${id}___1"; } else { $original{$title} = "${id}___1"; } } } print encode('MS932', "found original ids:\n"); foreach my $content (keys %original){ print encode('MS932', "$original{$content} <> $content\n"); } my %new; print encode('MS932', "\nfound new ids:\n"); while (my $line = <$file2>) { chomp($line); if($line =~ /^\s*#{2,3}([^#].+)/){ print encode('MS932', "found: $line\n"); my $flag_hit=0; my $line2 = $1; $line2 =~ s/^\s+//; $line2 =~ s/[\s ]+//g; $line2 =~ s/\(//g; $line2 =~ s/\(//g; $line2 =~ s/\)//g; $line2 =~ s/\)//g; $line2 =~ s/\`//g; if($new{$line2}){ my $recur = $new{$line2}; $recur++; my $recurline = "${line2}___$recur"; $new{$recurline} = 1; $new{$line2} = $recur; $line2 = $recurline; } else { $new{$line2} = 1; } foreach my $title (keys %original){ my $title_orig = $title; $title =~ s/[\s ]+//g; $title =~ s/\(//g; $title =~ s/\(//g; $title =~ s/\)//g; $title =~ s/\)//g; # print encode('MS932', "$line2 <> $title\n"); if($line2 =~ /^$title$/i){ # print encode('MS932', "$original{$title}\n"); $original{$title_orig} =~ /^(.+)\_\_\_\d$/; my $id = $1; # print encode('MS932', "$id\n"); $line = "$line\{\#$id\}"; print encode('MS932', "-> added: $line\n"); $flag_hit=1; } } if(!$flag_hit){ $line = "$line\{#insert_plz\}"; } } print $outfile "$line\n"; }