| 1 | #!/usr/bin/perl |
| 2 | |
| 3 | # b2m.pl - Script to convert a Babyl file to an mbox file |
| 4 | |
| 5 | # Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
| 6 | # Free Software Foundation, Inc. |
| 7 | |
| 8 | # Maintainer: Jonathan Kamens <jik@kamens.brookline.ma.us> |
| 9 | |
| 10 | # This program is free software: you can redistribute it and/or modify |
| 11 | # it under the terms of the GNU General Public License as published by |
| 12 | # the Free Software Foundation, either version 3 of the License, or |
| 13 | # (at your option) any later version. |
| 14 | |
| 15 | # This program is distributed in the hope that it will be useful, |
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | # GNU General Public License for more details. |
| 19 | |
| 20 | # You should have received a copy of the GNU General Public License |
| 21 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 22 | |
| 23 | |
| 24 | # Requires CPAN modules: MailTools (for Mail::Address), TimeDate (for |
| 25 | # Date::Parse). |
| 26 | |
| 27 | use warnings; |
| 28 | use strict; |
| 29 | use File::Basename; |
| 30 | use Getopt::Long; |
| 31 | use Mail::Address; |
| 32 | use Date::Parse; |
| 33 | |
| 34 | my($whoami) = basename $0; |
| 35 | my($version) = '$Revision$'; |
| 36 | my($usage) = "Usage: $whoami [--help] [--version] [--[no]full-headers] [Babyl-file] |
| 37 | \tBy default, full headers are printed.\n"; |
| 38 | |
| 39 | my($opt_help, $opt_version); |
| 40 | my($opt_full_headers) = 1; |
| 41 | |
| 42 | die $usage if (! GetOptions( |
| 43 | 'help' => \$opt_help, |
| 44 | 'version' => \$opt_version, |
| 45 | 'full-headers!' => \$opt_full_headers, |
| 46 | )); |
| 47 | |
| 48 | if ($opt_help) { |
| 49 | print $usage; |
| 50 | exit; |
| 51 | } |
| 52 | elsif ($opt_version) { |
| 53 | print "$whoami version: $version\n"; |
| 54 | exit; |
| 55 | } |
| 56 | |
| 57 | die $usage if (@ARGV > 1); |
| 58 | |
| 59 | $/ = "\n\037"; |
| 60 | |
| 61 | if (<> !~ /^BABYL OPTIONS:/) { |
| 62 | die "$whoami: $ARGV is not a Babyl file\n$usage"; |
| 63 | } |
| 64 | |
| 65 | while (<>) { |
| 66 | my($msg_num) = $. - 1; |
| 67 | my($labels, $pruned, $full_header, $header); |
| 68 | my($from_line, $from_addr); |
| 69 | my($time); |
| 70 | |
| 71 | # This will strip the initial form feed, any whitespace that may |
| 72 | # be following it, and then a newline |
| 73 | s/^\s+//; |
| 74 | # This will strip the ^_ off of the end of the message |
| 75 | s/\037$//; |
| 76 | |
| 77 | if (! s/(.*)\n//) { |
| 78 | malformatted: |
| 79 | warn "$whoami: message $msg_num in $ARGV is malformatted\n"; |
| 80 | next; |
| 81 | } |
| 82 | $labels = $1; |
| 83 | |
| 84 | # Strip the integer indicating whether the header is pruned |
| 85 | $labels =~ s/^(\d+)[,\s]*//; |
| 86 | $pruned = $1; |
| 87 | |
| 88 | s/(?:((?:.+\n)+)\n*)?\*\*\* EOOH \*\*\*\n+// || goto malformatted; |
| 89 | $full_header = $1; |
| 90 | |
| 91 | if (s/((?:.+\n)+)\n+//) { |
| 92 | $header = $1; |
| 93 | } |
| 94 | else { |
| 95 | # Message has no body |
| 96 | $header = $_; |
| 97 | $_ = ''; |
| 98 | } |
| 99 | |
| 100 | # "$pruned eq '0'" is different from "! $pruned". We want to make |
| 101 | # sure that we found a valid label line which explicitly indicated |
| 102 | # that the header was not pruned. |
| 103 | if ((! $full_header) || ($pruned eq '0')) { |
| 104 | $full_header = $header; |
| 105 | } |
| 106 | |
| 107 | # End message with two newlines (some mbox parsers require a blank |
| 108 | # line before the next "From " line). |
| 109 | s/\s+$/\n\n/; |
| 110 | |
| 111 | # Quote "^From " |
| 112 | s/(^|\n)From /$1>From /g; |
| 113 | |
| 114 | # Strip extra commas and whitespace from the end |
| 115 | $labels =~ s/[,\s]+$//; |
| 116 | # Now collapse extra commas and whitespace in the remaining label string |
| 117 | $labels =~ s/[,\s]+/, /g; |
| 118 | |
| 119 | foreach my $rmail_header qw(summary-line x-coding-system) { |
| 120 | $full_header =~ s/(^|\n)$rmail_header:.*\n/$1/i; |
| 121 | } |
| 122 | |
| 123 | if ($full_header =~ s/(^|\n)mail-from:\s*(From .*)\n/$1/i) { |
| 124 | ($from_line = $2) =~ s/\s*$/\n/; |
| 125 | } |
| 126 | else { |
| 127 | foreach my $addr_header qw(return-path from really-from sender) { |
| 128 | if ($full_header =~ /(?:^|\n)$addr_header:\s*(.*\n(?:\B.*\n)*)/i) { |
| 129 | my($addr) = Mail::Address->parse($1); |
| 130 | $from_addr = $addr->address($addr); |
| 131 | last; |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | if (! $from_addr) { |
| 136 | $from_addr = "Babyl_to_mail_by_$whoami\@localhost"; |
| 137 | } |
| 138 | |
| 139 | if ($full_header =~ /(?:^|\n)date:\s*(\S.*\S)/i) { |
| 140 | $time = str2time($1); |
| 141 | } |
| 142 | |
| 143 | if (! $time) { |
| 144 | # No Date header or we failed to parse it |
| 145 | $time = time; |
| 146 | } |
| 147 | |
| 148 | $from_line = "From " . $from_addr . " " . localtime($time) . "\n"; |
| 149 | } |
| 150 | |
| 151 | print($from_line, ($opt_full_headers ? $full_header : $header), |
| 152 | ($labels ? "X-Babyl-Labels: $labels\n" : ""), "\n", |
| 153 | $_) || die "$whoami: error writing to stdout: $!\n"; |
| 154 | } |
| 155 | |
| 156 | close(STDOUT) || die "$whoami: Error closing stdout: $!\n"; |
| 157 | |
| 158 | # arch-tag: 8c7c8ab0-721c-46d7-ba3e-139801240aa8 |