#!/usr/bin/perl -w #============================================================ # correct input list according to PSI2 original tier information #=============================================================== use Getopt::Long; # default options $opt_help = ''; $opt_debug = 0; $opt_verbose = 0; $name_scr = $0; $name_scr =~ s/.*\///g; $file_psi = 'psi.list'; @tiers = qw(tier1 tier2 tier3); $Lok = GetOptions ('debug!' => \$opt_debug, 'i=s' => \$file_in, 'p=s' => \$file_psi, 'o=s' => \$file_out, 'help' => \$opt_help, 'verbose!' => \$opt_verbose, ); if ( ! $Lok ) { print STDERR "*** ERROR: Invalid arguments found, -h for help\n"; &usage(); exit(1); } if ( $opt_help ) { &usage(); exit(1); } if ( ! $file_in ) { print STDERR "*** ERROR: input file not specified\n"; &usage(); exit(1); } if ( ! -s $file_in ) { print STDERR "*** ERROR: input file '$file_in' not found, exiting..\n"; &usage(); exit(1); } # end of option/sanity check $fh_out = 'STDOUT'; if ( $file_out ) { $fh_out = 'OUT'; open ( $fh_out, ">$file_out") or die "cannot write to $file_out:$!"; } if ( $opt_verbose ) { print $fh_out "# input: $file_in\n", "# generated by: $name_scr\n"; } ($list_psi{tier1},$list_psi{tier2},$list_psi{tier3}) = &read_input($file_psi); foreach $tier ( @tiers ) { foreach $id ( @{$list_psi{$tier}} ) { $in_tier{$id} = $tier; } } ($list_in{tier1},$list_in{tier2},$list_in{tier3}) = &read_input($file_in); foreach $tier_in ( @tiers ) { foreach $id ( @{$list_in{$tier_in}} ) { $tier_out = $in_tier{$id}; if ( ! $tier_out ) { print STDERR "*** tier info for $id not found, skip\n"; next; } if ( $tier_out eq $tier_in ) { push @{$list_out{$tier_in}},$id; } else { push @{$in_out{$tier_in}{$tier_out}},$id; print STDERR "$id belongs to $tier_out instead of $tier_in\n"; } } } push @{$list_out{tier1}}, @{$in_out{tier2}{tier1}} if $in_out{tier2}{tier1}; push @{$list_out{tier1}}, @{$in_out{tier3}{tier1}} if $in_out{tier3}{tier1}; unshift @{$list_out{tier2}}, @{$in_out{tier1}{tier2}} if $in_out{tier1}{tier2}; push @{$list_out{tier2}}, @{$in_out{tier3}{tier2}} if $in_out{tier3}{tier2}; unshift @{$list_out{tier3}}, @{$in_out{tier2}{tier3}} if $in_out{tier2}{tier3}; unshift @{$list_out{tier3}}, @{$in_out{tier1}{tier3}} if $in_out{tier1}{tier3}; foreach $tier ( @tiers ) { print $fh_out "# $tier\n"; foreach $id ( @{$list_out{$tier}} ) { print $fh_out $id,"\n"; } } close $fh_out; exit; sub read_input { my ( $file ) = @_; my ( $fh,$line,$ct_tier ); my %list = (); $ct_tier = 0; open ($fh,$file) or die "cannot open $file:$!"; while ( $line=<$fh> ) { $line =~ s/\s+//g; next if ( $line =~ /^$/ ); if ( $line =~ /^\#/ ) { $ct_tier++; last if ( $ct_tier > 3 ); } elsif ( $line =~ /\w+/ ) { next if ( $ct_tier == 0 ); $line =~ s/\s+.*//g; push @{$list{"tier$ct_tier"}},$line; } } close $fh; return ($list{tier1},$list{tier2},$list{tier3}); } sub usage { print STDERR "$name_scr: correct input list according to PSI2 original tier information \n", "Usage: $name_scr [options] -i in_file \n", " Opt: -h print this help\n", " -i input file (REQUIRED)\n", " -p input PSI2 list file (REQUIRED, default=$file_psi)\n", " -o output file (default STDOUT)\n", " --(no)debug print debug info(default=nodebug)\n"; }