#!/usr/bin/perl -w
#by Torben Menke 2008-02-29
#http://www.entorb.net
# search multiple textfiles for email-Adresses
# format of textfile: Name <email> or just email
# (one perl line)
use warnings;
use strict;
use Data::Dumper;
my @filelist = (
'/home/torben/adressen/addressbook-email-export.txt',
'/home/torben/adressen/haufen.txt',
'/home/torben/adressen/collected.txt',
'/home/torben/adressen/collected2008.txt',
'/home/torben/adressen/haufen2.txt',
);
@filelist = grep {-f} @filelist;
my $keyword;
my $i;
my $s;
my @result;
if (@ARGV) {
$keyword = join ' ', @ARGV;
} else {
print "Search for: ";
chomp ($keyword = <STDIN>);
}
foreach my $file (@filelist) {
open (FILE, "< $file") or die $!;
my @cont = <FILE>;
close FILE;
@cont = grep {m/\@/i} @cont;
@cont = grep {m/$keyword/i} @cont;
push @result, @cont if @cont;
}
# remove linebreak and whitespace form the ends
@result = map {s/(^\s+|\s+$)//g; $_ } @result;
#chomp @result;
# array unicate, remove dublicates
my %seen = ();
@result = grep { ! $seen{$_} ++ } @result;
# remove duplicates like
# 1. Max Musterman <max.mustermann@gmx.de>
# 2. max.mustermann@gmx.de
# 3. M. Mustermann <max.mustermann@gmx.de>
# email only = not a char followed my a space
# (the whitespaces at the end are removed earlier)
my @emailOnly = grep {not m/\w\s/} @result;
my @emailAndName = grep {m/\w\s/} @result;
my @emailOnlyAndUniqe;
# $result = @emailOnly + @emailAndName
# remove the ones from @emailOnly that are also in @emailAndName
foreach my $email (@emailOnly){
if (not grep {m/<$email>/i} @emailAndName) {
push @emailOnlyAndUniqe, $email;
}
}
@result = (@emailAndName);
push @result,@emailOnlyAndUniqe;
#@emailOnly = grep {not grep {m/<$_>/} @emailAndName } @emailOnly;
# sort case-insensitively
@result = sort {uc($a) cmp uc($b)} @result;
print join "\n",@result;
print "\n";
Hope you found what you where looking for. Feel free to drop me a line
Torben