#!/usr/bin/perl -w
use strict;
use Getopt::Long qw(VersionMessage HelpMessage);
use Pod::Usage;
use POSIX qw(strftime);
use Digest::MD5 qw(md5);


# ==============================================================================
# Purpose: extract process information from Windows memory dump
# and generates a DOT file

# Proof-of-Concept for Microsoft Windows 2003 (version 5.2.3790.0)
#
# This is NOT meant to be an example of good programming style!
# ==============================================================================

my $MESSAGE1="PTfinder v0.2.03-W2003 (2006-08-10)\nCopyright (C) 2006 by Andreas Schuster\n";
my $MESSAGE2=$MESSAGE1."Issue $0 --help for further information.\n";


# Start of _DISPATCHER_HEADER for various object types.
# The type code is constant over all versions of Microsoft Windows from Win 2000
# to Windows Vista/Longhorn. Only the size (2nd hex value in the patterns below)
# varies.
my $DISP_SYNCHRONIZATION_EVENT	= "\x01.\x04.";
my $DISP_PROCESS		= "\x03.\x1b.";
my $DISP_SEMAPHORE		= "\x05.\x05.";
my $DISP_THREAD			= "\x06.\x72.";
my $DISP_NOTIFICATION_TIMER	= "\x08.\x0a.";

# object sizes
my $SIZEOF_PROC = 0x278;
my $SIZEOF_THRD = 0x260;

# Kernel virtual address space starts here. 
# This should be safe for /3GB systems.
my $BORDER_normal = 0x80000000; 
my $BORDER_3GB    = 0xc0000000; 

# Some macros to control the appearance of resulting graphs.
my $DOT_PROC_SHAPE = "shape = \"record\"";
my $DOT_PROC_COLOR = "color = \"green\"";
my $DOT_THRD_SHAPE = "shape = \"record\"";
my $DOT_THRD_COLOR = "color = \"blue\"";
my $DOT_TERMINATED = "style = \"filled\" fillcolor = \"lightgray\"";  

# ==============================================================================

# Parse command line options
my $opt_3GB = 0;
my $opt_align = 8; # stepwidth of the scanner
my $opt_color = 1;
my $opt_dot = '';
my $opt_help = 0;
my $opt_lst = 1;
my $opt_log = '';
my $opt_proc = 1;
my $opt_skip = 0;
my $opt_thrd = 1;
my $opt_unique = 1;
my $opt_usage = 0;
my $opt_version = 0;

GetOptions(
	'3GB!' => $opt_3GB,
	'align=i' => \$opt_align,
	'color!' => \$opt_color,
	'dotfile=s' => \$opt_dot,
	'help' => \$opt_help,
	'listing!' => \$opt_lst,
	'logfile=s' => \$opt_log,
	'procs!' => \$opt_proc,
	'skip=i' => \$opt_skip,
	'threads!' => \$opt_thrd,
	'unique!' => \$opt_unique,
	'usage|?|' => \$opt_usage,
	'version' => \$opt_version,
	) or pod2usage({
		-message => $MESSAGE1,
		-verbose => 1,
		-exitval => 2 });

if ($opt_version) {
	print "$MESSAGE1";
	exit(0);
};

if ($opt_usage) {
	pod2usage({
		-message => $MESSAGE1,
		-verbose => 1,
		-exitval => 0 });
};

if ($opt_help) {
	pod2usage({
		-message => $MESSAGE1,
		-exitval => 0,
		-verbose => 2 });
};


my $INFILE = shift;
pod2usage({
	-message => $MESSAGE2,
	-exitval => 2 }) unless ($INFILE);


my $BORDER = ($opt_3GB) ? $BORDER_3GB : $BORDER_normal;

# Other variables
my $currentpos = 0;
my $hits = 0;
my $data;
my %prochash;
my %thrdhash;

# ==============================================================================

sub Win2Unix() {
	# Convert windows FILETIME to UNIX format.

	# Windows epoch is 1601-01-01 00:00:00, resolution 100ns
	# UNIX epoch is 1970-01-01 00:00:00, resolution 1s

	my $Lo = shift;
	my $Hi = shift;
	my $Time;

	if (($Lo == 0) and ($Hi == 0)) {
		$Time = 0;
	} else {
		$Lo -= 0xd53e8000;
		$Hi -= 0x019db1de;
		$Time = int($Hi*429.4967296 + $Lo/1e7);
	};
	$Time = 0 if ($Time < 0);
	return $Time;
};


sub FormatTimeISO() {
	# Format UNIX time.
	
	my $type = shift;
	my $Time = shift;
	my $Result;

	my %fmt = (
		'DOT' => "%Y-%m-%d\\n%H:%M:%S",
		'LST' => "%Y-%m-%d %H:%M:%S",
	);

	my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime($Time);
	
	if ($Time == 0) {
		$Result = '';
	} else {
		$Result = strftime($fmt{$type}, 
			$sec, $min, $hour, $mday, $mon, $year, -1, -1, -1);
	};
	return $Result;
};


# ==============================================================================
# DOT output routines

sub dot_open() {

	return unless ($opt_dot);
	open(DOTFILE, ">", $opt_dot) or die "$0: unable to open dotfile $opt_dot: $!";

	print DOTFILE << '__DOT_HEADER__';
digraph processtree {
graph [rankdir = "TB"];
__DOT_HEADER__
};


sub dot_close() {

	return unless($opt_dot);

	print DOTFILE << '__DOT_FOOTER__';
}
__DOT_FOOTER__
	close(DOTFILE) 
};


# ==============================================================================
# LOG output routines

sub log_open() {
	return unless ($opt_log);
	open(LOGFILE, ">", $opt_log) or die "$0: unable to open logfile $opt_log: $!";
};

sub log_print() {
	return unless($opt_log);

	my ($pos, $class, $msg) = @_;

	if($pos >= 0) {
		printf(LOGFILE "0x%08x\t", $pos);
	} else {
		print LOGFILE ' ' x 10 . "\t";
	};

	printf(LOGFILE "%s\t%s\n",
		$class,
		$msg);
};


sub log_close() {
	return unless ($opt_log);
	close(LOGFILE);
};


# ==============================================================================
# LST output routines

sub lst_open() {
	return unless ($opt_lst);
	# output actually goes to stdout
	print
"No.  Type PID    TID    Time created        Time exited         Offset     PDB        Remarks\n".
"---- ---- ------ ------ ------------------- ------------------- ---------- ---------- ----------------\n";
#123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
#         1         2         3         4         5         6         7         8         9
};


sub lst_print() {
	return unless ($opt_lst);

	my ($type, $PID, $TID, $created, $exited, $PDB, $remarks) = @_;
	
	$hits++;
	if ($type eq 'P') {
		printf("%4d %4s %6d %6s %19s %19s 0x%0.8x 0x%0.8x %-16s\n",
			$hits,	
			'Proc',
			$PID, '',
			($created > 0) ? &FormatTimeISO('LST', $created) : '', 
			($exited > 0) ? &FormatTimeISO('LST', $exited) : '',
			$currentpos,
			$PDB,
			$remarks,
		);
	} elsif ($type eq 'T') {
		printf("%4d %4s %6d %6d %19s %19s 0x%0.8x %10s %-16s\n",
			$hits,
			'Thrd',
			$PID, $TID,
			'',
			($exited > 0) ? &FormatTimeISO('LST', $exited) : '',
			$currentpos,
			'',
			'',
		);
	};
};


sub lst_close() {
	return unless ($opt_lst);
};


# ==============================================================================
# Process routines

sub ParseProcess {
	&log_print($currentpos, 'scan', 'Found process candidate.');

	sysread(INFILE, $data, $SIZEOF_PROC-$opt_align, $opt_align);

	my $PageDirectoryBase	= unpack('L', substr($data, 0x018, 4));
	my $ThreadListHeadFlink	= unpack('L', substr($data, 0x050, 4));
	my $ThreadListHeadBlink	= unpack('L', substr($data, 0x054, 4));
	my $ExitStatus		= unpack('L', substr($data, 0x24c, 4));
	my ($CreateTimeLo, 
		$CreateTimeHi) 	= unpack('Ll', substr($data, 0x070, 8));
	my $CreateTime		= &Win2Unix($CreateTimeLo, $CreateTimeHi);
	my ($ExitTimeLo, 
		$ExitTimeHi)	= unpack('Ll', substr($data, 0x078, 8));
	my $ExitTime		= &Win2Unix($ExitTimeLo, $ExitTimeHi);
	my $PID			= unpack('L', substr($data, 0x084, 4));
	my $PPID		= unpack('L', substr($data, 0x128, 4));
	my $ImageFileName	= unpack('Z16', substr($data, 0x154, 16));


	my $Check = sub {
		my $result = 1;

		# The Page Directory is used to convert virtual into physical addresses. 
		# It must exist, so the pointer must not be null.
		if ($PageDirectoryBase == 0) {
			$result = 0;
			&log_print(-1, 'check', 'PageDirectoryBase is NULL.');
		};

		# The PDB must be page-aligned, pagesize is assumed to be 0x1000.
		if ($PageDirectoryBase % 0x1000 != 0) {
			$result = 0;
			&log_print(-1, 'check', 'PageDirectoryBase not aligned on page boundary.');
		};

		# Thread control structures are kept in kernel space.
		if ($ThreadListHeadFlink < $BORDER) {
			$result = 0;
			&log_print(-1, 'check', 'ThreadListHead.Flink does not point into kernel space.');
		};
		if ($ThreadListHeadBlink < $BORDER) {
			$result = 0;
			&log_print(-1, 'check', 'ThreadListHead.Blink does not point into kernel space.');
		};

		# Some _DISPATCHER_HEADER inside the object.
		if (substr($data, 0x0dc, 4) !~ /$DISP_SYNCHRONIZATION_EVENT/) {
			$result = 0;
			&log_print(-1, 'check', 'No SYNCHRONIZATION_EVENT at 0x0d8.');
		};
		if (substr($data, 0x224, 4) !~ /$DISP_SYNCHRONIZATION_EVENT/) {
			$result = 0;
			&log_print(-1, 'check', 'No SYNCHRONIZATION_EVENT at 0x0fc.');
		};

		return $result;
	};


	# run some checks on the process candidate
	if (! &$Check) {
		# check failed, bail out
		&log_print(-1, 'scan', 'Rejected process structure.');
		sysseek(INFILE, $currentpos+$opt_align, 0); 
		return;
	};

	if ($opt_unique) {
		my $digest = md5($data);
		$prochash{$digest}++;
		if ($prochash{$digest} > 1) {
			&log_print(-1,'check', 'Duplicate structure.');
			return;
		};
	};

	&log_print(-1, 'scan', 'Accepted process structure.');
	&lst_print('P', $PID, 0, $CreateTime, $ExitTime, $PageDirectoryBase, $ImageFileName);

	if ($opt_dot) {
		# process node description
		if ($ExitTime > 0) {
			printf(DOTFILE "pid%u [label = \"{%u | file ofs\\n0x%x | %s%s | exited\\n%s\\n code %d}\" %s %s %s]\;\n",
				$PID,
				$PID,
				$currentpos,
				$ImageFileName,
				($CreateTime > 0) ? sprintf(" | started\\n%s", &FormatTimeISO('DOT', $CreateTime)) : '',
				&FormatTimeISO('DOT', $ExitTime),
				$ExitStatus,
				$DOT_PROC_SHAPE,
				($opt_color) ? $DOT_PROC_COLOR : '',
				$DOT_TERMINATED,
			);
		} else {
			printf(DOTFILE "pid%u [label = \"{%u | file ofs\\n0x%x | %s%s | running}\" %s %s]\;\n", 
				$PID,
				$PID,
				$currentpos,
				$ImageFileName,
				($CreateTime > 0) ? sprintf(" | started\\n%s", &FormatTimeISO('DOT', $CreateTime)) : '',
				$DOT_PROC_SHAPE,
				($opt_color) ? $DOT_PROC_COLOR : '',
			);
		}
		# process edge description
		printf(DOTFILE "pid%u -> pid%u [%s]\n",
			$PPID,
			$PID,
			($opt_color) ? $DOT_PROC_COLOR : '',
		);
	};
};


# ==============================================================================
# Thread routines

sub ParseThread() {
	&log_print($currentpos, 'scan', 'Found thread candidate.');

	sysread(INFILE, $data, $SIZEOF_THRD-$opt_align, $opt_align);

	my ($CreateTimeLo, 
		$CreateTimeHi) 	= unpack('Ll', substr($data, 0x1c8, 8));
	my $CreateTime		= &Win2Unix($CreateTimeLo, $CreateTimeHi);
	my ($ExitTimeLo,
		$ExitTimeHi)	= unpack('Ll', substr($data, 0x1d0, 8));
	my $ExitTime		= &Win2Unix($ExitTimeLo, $ExitTimeHi);
	my $ExitStatus		= unpack('L', substr($data, 0x1d8, 4));
	my ($PID, $TID)		= unpack('LL', substr($data, 0x1f4, 8));
	my $Terminated	= (unpack('L', substr($data, 0x250, 4)) && 0x00000001);
	my $ThreadsProcess	= unpack('L', substr($data, 0x228, 4));
	my ($StartAddress, $Win32StartAddress)
				= unpack('LL', substr($data, 0x22c, 8));

	my $Check = sub {
		my $result = 1;

		if (($ThreadsProcess < $BORDER) && ($PID != 0)) {
			$result = 0;
			&log_print(-1, 'check', 'ThreadsProcess not in kernel space.');
		};
		if (($StartAddress == 0) && ($PID != 0)) {
			$result = 0;
			&log_print(-1, 'check', 'StartAddress is NULL.');
		};

		# Some _DISPATCHER_HEADER inside the object.
		if (substr($data, 0x078, 4) !~ /$DISP_NOTIFICATION_TIMER/) {
			$result = 0;
			&log_print(-1, 'check', 'No NOTIFICATION_TIMER at 0x0f0.');
		};
		if (substr($data, 0x190, 4) !~ /$DISP_SEMAPHORE/) {
			$result = 0;
			&log_print(-1, 'check', 'No SEMAPHORE at 0x19c.');
		};
		if ((substr($data, 0x1fc, 4) !~ /$DISP_SEMAPHORE/) && ($PID != 0)) {
			$result = 0;
			&log_print(-1, 'check', 'No SEMAPHORE at 0x1e8.');
		};

		return $result;
	};

	# run some checks on the thread candidate
	if (! &$Check) {
		# check failed, bail out
		&log_print(-1, 'scan', 'Rejected thread structure.');
		sysseek(INFILE, $currentpos+$opt_align, 0); 
		return;
	};
	
	if ($opt_unique) {
		my $digest = &md5($data);
		$thrdhash{$digest}++;
		if ($thrdhash{$digest} > 1) {
			&log_print(-1,'check', 'Duplicate structure.');
			return;
		};
	};

	&log_print(-1, 'scan', 'Accepted thread structure.');
	&lst_print('T', $PID, $TID, $CreateTime, $ExitTime);

	if ($opt_dot) {
		# thread node description
		printf(DOTFILE "thrd%u_%u_%x [label = \"{%u.%u | file ofs\\n0x%x | Win32 addr\\n0x%x%s%s}\" %s %s %s]\;\n",
				$PID, 
				$TID,
				$currentpos,
				$PID, 
				$TID,
				$currentpos,
				$Win32StartAddress,
				($CreateTime > 0 && $CreateTime <= 0x7fffffff ) ? sprintf(" | started\\n%s", &FormatTimeISO('DOT', $CreateTime)) : '',
				($Terminated && ($ExitTime > 0)) ? 
								sprintf(" | exited\\n%s\\ncode %d",
									&FormatTimeISO('DOT', $ExitTime),
									$ExitStatus)
								: '',
				$DOT_THRD_SHAPE,
				($opt_color) ? $DOT_THRD_COLOR : '',
				($Terminated) ? $DOT_TERMINATED : '',
			);
	
		# thread edge description
		printf(DOTFILE "pid%u -> thrd%u_%u_%x [%s]\n",
			$PID,
			$PID,
			$TID,
			$currentpos,
			($opt_color) ? $DOT_THRD_COLOR : '',
		);
	};
};




# ==============================================================================
# Main routine
# ==============================================================================

	
open(INFILE, "<", $INFILE) or die "$0: unable to open $INFILE: $!";
binmode(INFILE);
sysseek(INFILE, $opt_skip, 0);

&dot_open();
&log_open();
&lst_open();

while (1) {
	my $data;
	$currentpos = sysseek(INFILE, 0, 1);
	my $lpos = $currentpos - $opt_skip;

	last if (sysread(INFILE, $data, $opt_align) != $opt_align);

	if (substr($data, 0, 4) =~ /$DISP_PROCESS/) {
		&ParseProcess if ($opt_proc);
	} elsif (substr($data, 0, 4) =~ /$DISP_THREAD/) {
		&ParseThread if ($opt_thrd);
	};
}

close(INFILE);
&log_close();
&lst_close();
&dot_close();


__END__

=head1 NAME

PTfinder - find processes and threads in a Microsoft Windows memory dump.

=head1 SYNOPSIS

ptfinder [options] F<file>

=head1 ARGUMENTS

=over 8

=item F<file>

Memory dump file to analyze.

=item B<--3GB> / B<--no3GB>

Turns on/off support for systems booted with the /3GB switch. Default: OFF.

=item B<--align I<n>>

Enforce an alignment of I<n> bytes. Default: 8.

This parameter also controls the stepwidth of the scanner,
so it has a great impact on performance.

=item B<--color> / B<--nocolor>

Turns on/off coloring of dot(1) graphs. Default: ON.

=item B<--dotfile F<file>>

PTfinder will generate a graph description in a format sufficient for dot(1).

=item B<--help>

Prints the full help text and exists.

=item B<--listing> / B<--nolisting>

When active, PTfinder will print a listing of processes and threads as it works
its way through a memory dump.

=item B<--logfile F<file>>

A file where PTfinder will document all process and thread candidates it finds, 
the checks performed and the final decission made. This file is mainly used for 
debugging purposes.

=item B<--procs> / B<--noprocs>

Includes/excludes processes in the generated output. Turning 
this option off may lead to cluttered graphs. Default: ON.

=item B<--skip I<n>>

Skips over the first I<n> bytes of the dump file. This option can be used to adopt to 
really strange dump formats.

=item B<--threads> / B<--nothreads>

Includes/excludes threads in the generated output. Including threads may lead to large graphs.
Default: ON.

=item B<--unique> / B<--nounique>

Suppresses/accepts duplicate processes and threads based on the object's MD5 hash. Default: ON (suppress duplicates).

=item B<--usage>

Prints a brief help message and exits.

=item B<--version>

Display version and exit.

=back

=head1 DESCRIPTION

PTfinder searches a memory dump of a system running Microsoft Windows for traces
of processes and threads. At this it uses signatures based on the
_DISPATCHER_HEADER structure declared in the Windows DDK (Ntddk.h and wdm.h).
Some functional checks are also applied.

The dump file may have been created in several ways:

=over 8

=item *

in a traditional way with B<dd>: C<dd bs=4096 if=\Device\PhysicalMemory of=dumpfile>,

=item *

in-vivo using Sysinternal's B<LiveKd> and a debugger,

=item *

post-mortem as described in Microsoft Knowledge Base article no. 244139,

=item

by suspending a VMware session.

=back


=head2 Visualization

PTfinder can output its findings in a format sufficiently for dot(1).
Dot calculates graphs, whereas processes and threads are the nodes and edges 
indicate a "created-by" relationship.

=head2 Futher information

The lastest version of this program, information on the underlying principles
as well as usage examples are available at:

        http://computer.forensikblog.de/en/topics/windows/memory_analysis/

If your German is better than the author's English please consider visiting
the main site where you'll find lots of additional information:

        http://computer.forensikblog.de/

Look for the section entitled "Speicheranalyse".

=head1 EXAMPLES

=over 8

=item Display processes and threads:

C<ptfinder.pl mymem.dmp>

=item Display only processes:

C<ptfinder.pl --nothreads mymem.dmp>

=item Generate an input file for dot(1):

C<ptfinder.pl --nothreads --dotfile mymem.dot mymem.dmp>

=item Turn the annoying listing off, please!

C<ptfinder.pl --nothreads --nolisting --dotfile mymem.dot mymem.dmp>

=item Prepare the graph for black/white printing:

C<ptfinder.pl --nothreads --nolisting --dotfile mymem.dot mymem.dmp>

=item And now ignore that large header:

C<ptfinder.pl --nothreads --nolisting --skip 1048576 --dotfile mymem.dot mymem.dmp>

=back


=head1 BUGS

This version will work only on dumps of systems running Microsoft Windows
2003 (build 5.2.3790.0).

Also, the code needs some cleanup and restructuring badly.

Beside this, no bugs are known yet.

Please send bug reports and suggestions to E<lt>bugs-ptfinder@forensikblog.deE<gt>.


=head1 AUTHOR

Written by Andreas Schuster E<lt>a.schuster@yendor.netE<gt>


=head1 COPYRIGHT

Copyright (c) 2006 by Andreas Schuster.

PTfinder may be distributed under the GNU General Public License.


=head1 SEE ALSO

dd(1), dot(1), zgrviewer(1)

=cut
