#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell
# Copyright 2002 Ed Avis.  See the file COPYING.

=head1 NAME

pip - wrap programs to use them as filters

=head1 SYNOPSIS

C<pip [-I] [-O] [-i|-o|-b]... PROG ARGUMENTS>

where for every C<-i> (input), C<-o> (output), or C<-b> (both) switch,
there is one argument of the form '-', or '-.foo' for some string
'foo'.

=head1 DESCRIPTION

The '-' or '-.foo' arguments are placeholders and are matched up
left-to-right with the C<-i>, C<-o> and C<-b> switches.  Each placeholder is
replaced by the name of a temporary file.  For a C<-i> switch this is an
input file, which is created by pip before the command runs and
contains data read from pipE<39>s standard input.  For C<-o> it is an
output file, whose contents are printed after the command runs.  For
C<-b> the temporary file is both an input file and an output file.

If a placeholder argument has the form '-.foo' then the temporary
filename will end in '.foo'.  This is useful for programs which change
behaviour based on filename, such as the C compiler.  But note that an
argument of '-foo' would not be changed.

The C<-I> switch is used with programs that can read from standard
input, but require it to be seekable.  Pip buffers standard input into
a file if necessary.  (If both C<-I> and input placeholders are used,
the programE<39>s standard input will be buffered but unchanged - not
empty as it would be without C<-I>.)  Similarly <-O> causes the
programE<39>s standard output to be buffered to a file and written
after the program has terminated.

If there are multiple input files then each input file gets the same
data.  If there are multiple output files then each output file is
printed in turn.  If there are no input files then standard input is
passed through unchanged to the program.  If the program prints
anything to its standard output then this appears before the content
of any output files (whether or not the C<-O> flag was used).

=head1 EXAMPLES

=over

=item C<pip -i mozilla ->

will read a file from standard input, and give it to mozilla to
display.  The final commandline might be C<mozilla /tmp/pip123.tmp>.

=item C<pip -io cc -.c -o ->

will pipe a C program through the compiler, giving an executable on
standard output.  The temporary input filename given to cc will end in
'.c'.

=item C<pip -b emacs ->

will read data, give the user the chance to modify it in emacs, and
then print it.

=item C<pip -I dvips -f>

runs dvips in 'filter' mode, which can read from stdin but needs it to
be seekable.

=item C<pip -O pnmtotiff>

turns pnmtotiff, which requires that its stdout be seekable, into a
general filter program.

=back

=head1 COMPARISON WITH REAL PIPES

When using pip you must wait for all input to be consumed before the
command is run, and for the command to exit before seeing any of the
output.  You donE<39>t get partial output as you would with pipes.
But pip works with programs that seek backwards and forwards in their
input, and which may write their output in a strange order too.  This
makes it work in places where the shellE<39>s process substitution
would not.


=head1 BUGS

Because pip unlinks temporary files before exiting, and exits as soon
as running the program returns, any command which puts itself
into the background before opening its input files will not work.

Not related to the CP/M command of the same name.

=head1 SEE ALSO

L<pip_tex(1)>.

=head1 AUTHOR

Ed Avis, ed@membled.com

=cut

use strict;
use IO::Handle;
use File::Temp qw(tempdir);
sub tmpnam();

my $VERSION = '1.2';

# Whether to remove the temporary directory after running.  Normally
# on, but turning it off may be useful for debugging.
#
my $CLEANUP = 1;

sub usage() {
    print STDERR <<END
usage: $0 [-I] [-O] [-i|-o|-b]... PROG ARGS...
where some ARGS are - or -.foo, if lowercase switches have been used.
-i replaces the placeholder with a file containing standard input,
-o replaces it with a file whose contents are output after running,
-b does both.
Use the placeholder -.foo to make a temporary file ending '.foo'.

-I means buffer standard input to make it seekable, but does not change
the arguments passed to PROG.  -O does the same for standard output.

Report bugs to <ed\@membled.com>.
END
  ;
}
if (@ARGV < 1) {
    usage();
    exit(1);
}

# Split the arguments into flags, and the rest.
my $flags = '';
my @rest = ();

# Get the flags into a big lump.
while (my $arg = shift @ARGV) {
    if (index('--help', $arg) == 0) {
	usage();
	exit(0);
    }
    elsif (index('--version', $arg) == 0) {
	print STDERR <<END
pip $VERSION
Written by Ed Avis.

Copyright (C) 2002 Ed Avis.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
END
  ;
	exit(0);
    }
    elsif ($arg =~ /^--/) {
	print STDERR "unknown option $arg\n";
	usage();
	exit(1);
    }
    elsif ($arg =~ /^-(.*)/) {
	local $_ = $1;
	
	if (tr/iobIO//c) {
	    die "bad switch $_, expected one of: -i -o -b -I -O";
	}
	elsif ($_ eq '') {
	    die "argument '-' must come after name of program to run";
	}
	else {
	    $flags .= $_;
	}
    }
    else {
	# Not a flag, push it back and finish.
	unshift @ARGV, $arg;
	last;
    }
}

@rest = @ARGV;
die "no program specified, usage: $0 [-i|-o]... PROG ARGS..."
    if @rest == 0;

my ($prog, @args) = @rest;
my @infiles;
my @outfiles;
my %is_outfile;
my ($stdin_file, $stdout_file);

sub find_placeholder( $ ) {
    use vars '@l'; local *l = shift;
    my $i = 0;
    foreach (@l) {
	return $i if /^-(\..+|)$/;
	++ $i;
    }
    return undef;
}

# Go through all the flags, substituting filenames for '-'
# arguments.
#
my $flag;
foreach $flag (split(//, $flags)) {
    my $found = 0;
    if ($flag eq 'i' or $flag eq 'o' or $flag eq 'b') {
	my $is_in = ($flag eq 'i' or $flag eq 'b');
	my $is_out = ($flag eq 'o' or $flag eq 'b');
	my $n = find_placeholder(\@args);
	die "no '-' argument found for -i\n" if not defined $n;
	for ($args[$n]) {
	    /^-(.*)/ or die;
	    $_ = tmpnam() . $1;
	    push @infiles, $_ if $is_in;
	    push @outfiles, $_ if $is_out;
	    $is_outfile{$_} = $is_out;
	}
    }
    elsif ($flag eq 'I') {
	for ($stdin_file) {
	    warn '-I seen twice' if defined;
	    $_ = tmpnam();
	    push @infiles, $_;
	}
    }
    elsif ($flag eq 'O') {
	for ($stdout_file) {
	    warn '-O seen twice' if defined;
	    $_ = tmpnam();
	    $is_outfile{$_} = 1;
	}
    }
    else {
	die; # shouldn't happen, we filtered earlier
    }
}
# The file buffering the program's stdout is printed first.
unshift @outfiles, $stdout_file if defined $stdout_file;

# Get stdin if necessary.  Each input file gets the same data.
if (@infiles) {
    my @handles = ();
    foreach (@infiles) {
	my $fh = new IO::Handle;
	open ($fh, ">$_") or die "can't write to $_: $!";
	push @handles, $fh;
    }

    while (<STDIN>) {
	my $handle;
	foreach $handle (@handles) {
	    print $handle $_;
	}
    }

    foreach (@handles) {
	close $_;
    }
}

# Run the program, with stdin and stdout redirected if appropriate.
local *OLDOUT;
if (defined $stdin_file) {
    # Won't need to restore stdin afterwards.
    open(STDIN, $stdin_file) or die "cannot reopen $stdin_file: $!";
}
if (defined $stdout_file) {
    open(OLDOUT, '>&STDOUT') or die "cannot dup stdout: $!";
    open(STDOUT, ">$stdout_file")
      or die "cannot reopen $stdout_file for writing: $!";
}
system($prog, @args);
if (defined $stdout_file) {
    open(STDOUT, '>&OLDOUT')
      or die "cannot dup stdout back again: $!";
}

if ($CLEANUP) {
    # Remove input files.
    foreach (@infiles) {
	next if $is_outfile{$_};
	(not -e $_) or unlink or die "cannot unlink $_: $!";
    }
}

# Print output if necessary, and remove files.
my $outfile;
foreach $outfile (@outfiles) {
    unless (open (OUTFILE, $outfile)) {
	if ($! =~ /^No such file or directory/
	    and $outfile !~ m!\.[^/]*$!)
        {
	    # Sometimes DOSish programs add an extension to the output
	    # filename without being asked.  Sniff around and see if
	    # we can find any evidence of this.
	    #
	    if (-e $outfile) {
		die "open() said $outfile doesn't exist, but it does";
	    }
	    my @poss = <$outfile.*>;
	    if (@poss == 0) {
		# Nope, nothing.
		die "$prog didn't create $outfile or any $outfile.*\n";
	    }
	    elsif (@poss == 1) {
		# It looks like the program has indeed created an
		# output file with a silly name.
		#
		my $o = $poss[0];
		$o =~ /^$outfile(\..*)$/ or die;
		my $ext = $1;
		warn <<END;
$prog has created the file '$o' instead of '$outfile'.  Perhaps you
should have given the output placeholder as '-$1' instead of '-'?
END
                open (OUTFILE, $o) or die "cannot open $o: $!";
		$outfile = $o;
	    }
	    else {
		my $s = "$prog did not create $outfile, "
		  . "but it did create: " . join(', ', @poss) . "\n"
		    . "I can't handle this sort of thing, giving up";
		die $s;
	    }
	}
	else {
	    die "cannot open $outfile: $!";
	}
    }

    while (<OUTFILE>) {
	print;
    }
    close OUTFILE;
    if ($CLEANUP) {
	unlink $outfile or die "cannot unlink $outfile: $!";
    }
}


# tmpnam()
#
# Return a name for a temporary file.  I believe this is secure
# because it creates a 'private' directory and then uses that.  Cannot
# use POSIX's tmpnam() because I want to ensure 8.3 filenames (and
# anyway, this ought to be more secure).
#
my $tempdir;
my $num;
sub tmpnam() {
    die 'usage: tmpnam()' if @_;
    for ($tempdir) {
	if (not defined) {
	    $_ = tempdir('XXXXXXXX', CLEANUP => $CLEANUP, TMPDIR => 1);
	    die 'cannot make temporary directory' if not defined;
	    die "failed to make directory $_" if not -d;
	}
    }

    # Pick an unused filename in this directory.  Since we created the
    # dir ourselves we don't need to look at what it contains.
    #
    $num = 0 if not defined $num;
    return "$tempdir/" . $num++;
}
