#!/usr/bin/perl # # Usage: processanda infile [outfile a b] # # infile has a N x K (K >= 2) matrix of (other than the first two columns) numbers # The a-th and b-th columns of infile have nodenames. (a and b are assumed to be 0 and 1 by default ... indexing starts at 0) # # It creates the following files: # outfile.label, a file whose n-th line (n>=1) has the label of # the n-th node found in infile # outfile.ncol is a N x 2 matrix representing an undirected graph # with no repeated edges or loops. # It's suitable to be used as an .ncol file for # submission to http://orion.icmb.utexas.edu/cgi-bin/lgl/web_lgl.pl # # Dinoj Surendran (dinojATATATATATcs.uchicago.edu) my %nodenames; my %names; $infile = $ARGV[0]; $outfile = $infile; $outfile =~ s/\./_/g; if ($#ARGV >= 1) { $outfile = $ARGV[1]; } $col1 = 0; $col2 = 1; if ($#ARGV >= 2) { $col1 = $ARGV[2]; } if ($#ARGV >= 3) { $col2 = $ARGV[3]; } open (IN,$infile); open (LAB,">".$outfile.".label"); $count = 0; while ($line = ) { @tmp = split /[\n\t\r\ ]+/, $line; # important to remove \r too! $n1 = $tmp[$col1]; $n2 = $tmp[$col2]; if (! exists($nodenames{$n1})) { $count++; $nodenames{$n1} = $count; print LAB "$n1 \n"; } if (! exists($nodenames{$n2})) { $count++; $nodenames{$n2} = $count; print LAB "$n2 \n"; } } close (IN); open (OUT,">".$outfile.".ncol"); open (IN,$infile); my %existingedges; $count = 0; while ($line = ) { @tmp = split /[\n\t\r\ ]+/, $line; $n1 = $nodenames{$tmp[$col1]}; $n2 = $nodenames{$tmp[$col2]}; $edgename = $n1."_".$n2; if ($n1 > $n2) { $edgename = $n2."_".$n1; } if (! exists ($existingedges{$edgename})) { print OUT "$n1 $n2 \n"; $existingedges{$edgename} = 1; } }