| 1 | #!/usr/bin/perl |
| 2 | |
| 3 | # b2m.pl - Script to convert a Babyl file to an mbox file |
| 4 | |
| 5 | # This program is free software; you can redistribute it and/or modify |
| 6 | # it under the terms of the GNU General Public License as published by |
| 7 | # the Free Software Foundation; either version 2 of the License, or |
| 8 | # (at your option) any later version. |
| 9 | |
| 10 | # This program is distributed in the hope that it will be useful, but |
| 11 | # WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | # General Public License for more details. |
| 14 | |
| 15 | # You should have received a copy of the GNU General Public License |
| 16 | # along with this program; if not, write to the Free Software |
| 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 |
| 18 | # USA. |
| 19 | |
| 20 | # Maintained by Jonathan Kamens <jik@kamens.brookline.ma.us>. |
| 21 | |
| 22 | # Requires CPAN modules: MailTools (for Mail::Address), TimeDate (for |
| 23 | # Date::Parse). |
| 24 | |
| 25 | use warnings; |
| 26 | use strict; |
| 27 | use File::Basename; |
| 28 | use Getopt::Long; |
| 29 | use Mail::Address; |
| 30 | use Date::Parse; |
| 31 | |
| 32 | my($whoami) = basename $0; |
| 33 | my($version) = '$Revision$'; |
| 34 | my($usage) = "Usage: $whoami [--help] [--version] [--[no]full-headers] [Babyl-file] |
| 35 | \tBy default, full headers are printed.\n"; |
| 36 | |
| 37 | my($opt_help, $opt_version); |
| 38 | my($opt_full_headers) = 1; |
| 39 | |
| 40 | die $usage if (! GetOptions( |
| 41 | 'help' => \$opt_help, |
| 42 | 'version' => \$opt_version, |
| 43 | 'full-headers!' => \$opt_full_headers, |
| 44 | )); |
| 45 | |
| 46 | if ($opt_help) { |
| 47 | print $usage; |
| 48 | exit; |
| 49 | } |
| 50 | elsif ($opt_version) { |
| 51 | print "$whoami version: $version\n"; |
| 52 | exit; |
| 53 | } |
| 54 | |
| 55 | die $usage if (@ARGV > 1); |
| 56 | |
| 57 | $/ = "\n\037"; |
| 58 | |
| 59 | if (<> !~ /^BABYL OPTIONS:/) { |
| 60 | die "$whoami: $ARGV is not a Babyl file\n$usage"; |
| 61 | } |
| 62 | |
| 63 | while (<>) { |
| 64 | my($msg_num) = $. - 1; |
| 65 | my($labels, $pruned, $full_header, $header); |
| 66 | my($from_line, $from_addr); |
| 67 | my($time); |
| 68 | |
| 69 | # This will strip the initial form feed, any whitespace that may |
| 70 | # be following it, and then a newline |
| 71 | s/^\s+//; |
| 72 | # This will strip the ^_ off of the end of the message |
| 73 | s/\037$//; |
| 74 | |
| 75 | if (! s/(.*)\n//) { |
| 76 | malformatted: |
| 77 | warn "$whoami: message $msg_num in $ARGV is malformatted\n"; |
| 78 | next; |
| 79 | } |
| 80 | $labels = $1; |
| 81 | |
| 82 | # Strip the integer indicating whether the header is pruned |
| 83 | $labels =~ s/^(\d+)[,\s]*//; |
| 84 | $pruned = $1; |
| 85 | |
| 86 | s/(?:((?:.+\n)+)\n*)?\*\*\* EOOH \*\*\*\n+// || goto malformatted; |
| 87 | $full_header = $1; |
| 88 | |
| 89 | if (s/((?:.+\n)+)\n+//) { |
| 90 | $header = $1; |
| 91 | } |
| 92 | else { |
| 93 | # Message has no body |
| 94 | $header = $_; |
| 95 | $_ = ''; |
| 96 | } |
| 97 | |
| 98 | # "$pruned eq '0'" is different from "! $pruned". We want to make |
| 99 | # sure that we found a valid label line which explicitly indicated |
| 100 | # that the header was not pruned. |
| 101 | if ((! $full_header) || ($pruned eq '0')) { |
| 102 | $full_header = $header; |
| 103 | } |
| 104 | |
| 105 | # End message with two newlines (some mbox parsers require a blank |
| 106 | # line before the next "From " line). |
| 107 | s/\s+$/\n\n/; |
| 108 | |
| 109 | # Quote "^From " |
| 110 | s/(^|\n)From /$1>From /g; |
| 111 | |
| 112 | # Strip extra commas and whitespace from the end |
| 113 | $labels =~ s/[,\s]+$//; |
| 114 | # Now collapse extra commas and whitespace in the remaining label string |
| 115 | $labels =~ s/[,\s]+/, /g; |
| 116 | |
| 117 | foreach my $rmail_header qw(summary-line x-coding-system) { |
| 118 | $full_header =~ s/(^|\n)$rmail_header:.*\n/$1/i; |
| 119 | } |
| 120 | |
| 121 | if ($full_header =~ s/(^|\n)mail-from:\s*(From .*)\n/$1/i) { |
| 122 | ($from_line = $2) =~ s/\s*$/\n/; |
| 123 | } |
| 124 | else { |
| 125 | foreach my $addr_header qw(return-path from really-from sender) { |
| 126 | if ($full_header =~ /(?:^|\n)$addr_header:\s*(.*\n(?:\B.*\n)*)/i) { |
| 127 | my($addr) = Mail::Address->parse($1); |
| 128 | $from_addr = $addr->address($addr); |
| 129 | last; |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | if (! $from_addr) { |
| 134 | $from_addr = "Babyl_to_mail_by_$whoami\@localhost"; |
| 135 | } |
| 136 | |
| 137 | if ($full_header =~ /(?:^|\n)date:\s*(\S.*\S)/i) { |
| 138 | $time = str2time($1); |
| 139 | } |
| 140 | |
| 141 | if (! $time) { |
| 142 | # No Date header or we failed to parse it |
| 143 | $time = time; |
| 144 | } |
| 145 | |
| 146 | $from_line = "From " . $from_addr . " " . localtime($time) . "\n"; |
| 147 | } |
| 148 | |
| 149 | print($from_line, ($opt_full_headers ? $full_header : $header), |
| 150 | ($labels ? "X-Babyl-Labels: $labels\n" : ""), "\n", |
| 151 | $_) || die "$whoami: error writing to stdout: $!\n"; |
| 152 | } |
| 153 | |
| 154 | close(STDOUT) || die "$whoami: Error closing stdout: $!\n"; |
| 155 | |
| 156 | # arch-tag: 8c7c8ab0-721c-46d7-ba3e-139801240aa8 |