#! /usr/bin/perl -w
# vim:syntax=perl

use strict;
use DB_File;

(my $program = $0) =~ s%.*/%%;

my $stem = shift or die "$program: give dumpfilestem as arg\n";

my $debug = 1;

my %h; # hash to store dump in

my @dbs = ('inaddr', 'maildomain', 'email', 'ip', 'domain');

for my $name (@dbs) {
    my $dbfile = "$stem.$name";
    -r $dbfile or die "$program: no readable dbfile $dbfile found\n";
    # default mode is O_CREAT|O_RDWR
    tie %{ $h{$name} }, "DB_File", "$dbfile", O_CREAT|O_RDONLY, 0666, 
	$DB_HASH or die "$program: cannot tie to $dbfile\n";
}

while (<>) {
    chomp;
    my $r = '';
    while (length) {
	if (/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.10.in-addr\.arpa)/) {
	    if (defined $h{'inaddr'}{$1}) {
		$r .= $h{'inaddr'}{$1};
		$_ =~ s/^$1//;
	    } else {
		die "$program: cannot match inaddr '$1' in line '$_' to " .
		    "anything in '$stem' db's\n";
	    }
	} elsif (/^(john\.doe\.\d+\@\d+\.mail\.example\.com)/) {
	    if (defined $h{'email'}{$1}) {
		$r .= $h{'email'}{$1};
		$_ =~ s/^$1//;
	    } else {
		die "$program: cannot match email '$1' in line '$_' to " .
		    "anything in '$stem' dbs\n";
	    }
	} elsif (/^(10\.\d{1,3}\.\d{1,3}\.\d{1,3})([^\d]|$)/) {
	    # we match an ip adress
	    if (defined $h{'ip'}{$1}) {
		$r .= $h{'ip'}{$1};
		$_ =~ s/^$1//;
	    } else {
		die "$program: cannot match ip '$1' in line '$_' to " .
		    "anything in '$stem' db's\n";
	    }
	} elsif (/^(\d+\.mail\.example\.com)/) {
	    if (defined $h{'maildomain'}{$1}) {
		$r .= $h{'maildomain'}{$1};
		$_ =~ s/^$1//;
	    } else {
                die "$program: cannot match maildomain '$1' in line '$_' to " .
                    "anything in '$stem' dbs\n";
            }
	} elsif (/^(\d+\.example\.com)/) {
	    if (defined $h{'domain'}{$1}) {
		$r .= $h{'domain'}{$1};
		$_ =~ s/^$1//;
	    } else {
                die "$program: cannot match fqdn '$1' in line '$_' to " .
                    "anything in '$stem' dbs\n";
            }
	} else {
	    $r .= substr($_, 0, 1);
	    $_ =~ s/^.//;
	}
	# warn "$program: debug: length is " . length . "\n" if $debug;
    }
    print "$r\n" or die;
}

for my $name (@dbs) {
    my $dbfile = "$stem.$name";
    untie %{ $h{$name} } or
        die "$program: cannot untie hash key $name from file $dbfile\n";
}


__END__

=pod

=head1 NAME

lr_deanonymize - restore anonymized data, using a dump as produced by lr_anonymize(1)

=head1 SYNOPSIS

B<lr_deanonymize> I<dumpfilestem>

=head1 DESCRIPTION

B<lr_deanonymize> is typically used when receiving anonymized reports from a
responder.  See the section on "Processing The Responder's Results" in the
chapter on "Using A Responder" in the Lire User Manual for usage examples.

B<lr_deanonymize> reads a file containing anonymized emailaddresses, ipnumbers, 
and hostnames (typically a report, generated from a logfile from an internet 
service) from stdin, and prints a "deanonymized" version of this file 
to stdout. It reads its information to do this from a bunch
of Berkeley DB's, stored in files whose's names are derived from 
I<dumpfilestem>, as produced by lr_anonymize(1).

=head1 EXAMPLE

A 'logfile' like e.g.


 blaat fkrf 1.2.3.4.in-addr.arpa] pietje@bigcompany.com bla 1 2 3 lj;agas;gag
 blaat 1.2.3.4 fkrf 3.2.3.4.in-addr.arpa] bla 1 www.hotsex.com 2 3 lj;agas;gag 
 jan@blaat.frut.com agagag
 blaat fkrf 4.2.3.4.in-addr.arpa] bla pietje@bigcompany.com www.hotsex.com 
 234.34.2.0 jan@blaat.frut.com 4.2.3.4.in-addr.arpa1 2 3 lj;agas;gag
 blaat fkrf tweede 3.2.3.4.in-addr.arpa] bla 1.2.3.4 1 blablabla.com 
 2 mdcc.cx 
 3 lj;agas;gag

wil get anonymized to

 blaat fkrf 1.0.0.10.in-addr.arpa] john.doe.1@example.com bla 1 2 3 lj;agas;gag
 blaat 10.0.0.1 fkrf 2.0.0.10.in-addr.arpa] bla 1 1.example.com 2 3 lj;agas;gag 
 john.doe.2@example.com agagag
 blaat fkrf 3.0.0.10.in-addr.arpa] bla john.doe.1@example.com 1.example.com 
 10.0.0.2 john.doe.2@example.com 3.0.0.10.in-addr.arpa1 2 3 lj;agas;gag
 blaat fkrf tweede 2.0.0.10.in-addr.arpa] bla 10.0.0.1 1 2.example.com 
 2 3.example.com 
 3 lj;agas;gag

The dump will look like

 ip 234.34.2.0 10.0.0.2
 ip 1.2.3.4 10.0.0.1
 inaddr 3.2.3.4.in-addr.arpa 2.0.0.10.in-addr.arpa
 inaddr 1.2.3.4.in-addr.arpa 1.0.0.10.in-addr.arpa
 inaddr 4.2.3.4.in-addr.arpa 3.0.0.10.in-addr.arpa
 domain mdcc.cx 3.example.com
 domain blablabla.com 2.example.com
 domain www.hotsex.com 1.example.com
 email jan@blaat.frut.com john.doe.2@example.com
 email pietje@bigcompany.com john.doe.1@example.com


=head1 SEE ALSO

lr_anonymize(1)

=head1 VERSION

$Id: lr_deanonymize.in,v 1.4 2006/07/23 13:16:32 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2000-2001 Stichting LogReport Foundation LogReport@LogReport.org

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html. 

=head1 AUTHOR

Joost van Baal <joostvb@logreport.org>

=cut

# Local Variables:
# mode: cperl
# End:
