petrichor on the breeze: perl: extracting email addresses from a thunderbird message list

Thursday, March 27, 2008

perl: extracting email addresses from a thunderbird message list

The inputfile resides somewhere like: "C:\Documents and Settings\USERNAME\Application Data\Thunderbird\Profiles\sbdq6f9n.default\Mail\Local Folders\Inbox.sbd\guitar.sbd"

use strict;
use warnings;
use Tie::File;
use Fcntl;


sub alphabetically { lc $a cmp lc $b }


my $inputfile = shift;

unless ($inputfile)
{
    print "\nUSAGE:\n$0  \n\n\n\n";
    exit 0;
}

my @contents;

tie (@contents, 'Tie::File', $inputfile, mode=>O_RDONLY) or die "Can't open $inputfile: $!\n\n\n\n";
#open (MYFILE, $inputfile) || die " $! ";
#my @contents = ;
#close(MYFILE);


my @addresses; my $line; my $x; my %seen = ();


foreach $line (@contents) {
        if ( $line =~ m/([a-zA-Z]+\.[a-zA-Z]+\@calsoftinc\.com)/ ) 
        {
            if ( $1=~m/ambar/) 
            {  
                next; 
            }
            else { $seen{$1}++; } #use a hash to automatically get a unique list
        }
}

foreach $x (keys %seen) { unshift(@addresses, "$x\n"); }

print sort alphabetically @addresses;