#!/usr/local/bin/perl ###################################################################### # # Greedy MDL-minimization algorithm # # # Copyright 1996 Jaak Vilo # ###################################################################### $PRATT = "./pratt"; $REALOUTPUT = "MDL.result" ; $TMP_DIR = "mdl_tmp" ; $C0=10; $C1=10; $C2=3; $C3=10; system( "mv MDL.result MDL.result.backup.$$" ); # Old results... mkdir ( $TMP_DIR, 755 ) || die unless -d $TMP_DIR ; system( "rm -rf $TMP_DIR/*"); #clean up old versions open( REALOUTPUT , ">$REALOUTPUT" ) || die; $| = 1 ; # Force the flush ###################################################################### # Read in the data file # $name=''; $count= 0; while( <> ) { chop; $_ =~ s/\s+$//g; # Remove empty line endings... if ( /^\>/ ) { $name = substr($_,1) ; $NAMES[++$count-1]=$name; next; } $LINES{$name} .= "$_\n"; } ###################################################################### $iteration=0; while( $count > 0 ) # While some sequences not covered yet... { ++$iteration; open( DATA , ">$TMP_DIR/mdl_data.$count" ); for( $i=0; $i < $count ; $i++ ) { print DATA ">$NAMES[$i]\n$LINES{$NAMES[$i]}"; } close(DATA); # Run pratt, greedy choice @RESULT = (); # Empty result... # Cycle for different K values for( $K = $count ; $K >= 4 ; $K = $K - ( int($K/5) ? int($K/5) : 1 ) ) { print "$K => " , $K - ( int($K/5) ? int($K/5) : 1 ) , "\n" ; sleep( 2 ); open( S , ">parameters" ); # What parameters should be used for Pratt print S < ) { push( @RESULT, $_ ) ; } close( PRATTOUTPUT ); } open( MDLSPEC, ">$TMP_DIR/mdl.special.$count.$iteration" ); print MDLSPEC @RESULT ; close(MDLSPEC); print "SORT\n" if $#RESUL => 0; sleep( 3 ); sub sortorder { @all=split(/\s+/,$a); $vala = ( $all[0] eq '' ) ? $all[1] : $all[0] ; @all=split(/\s+/,$b); $valb = ( $all[0] eq '' ) ? $all[1] : $all[0] ; # print "$vala <=> $valb\n"; $valb <=> $vala ; # Reverse sort - pick the largest value! } @RESULT = sort sortorder @RESULT ; print @RESULT ; $greedychoice = $RESULT[0] ; chop( $greedychoice ); @all=split(/\s+/,$greedychoice ); splice( @all,0,1 ) if $all[0] eq '' ; $mdlcost = $all[0]; $prattcost = $all[1]; $pattern = $all[2]; @sequences = splice( @all, 3 ); # Output the answer of greedy choice for $i ( @sequences ){ $answer{$NAMES[$i]}=$iteration; # Mark removed print REALOUTPUT "$NAMES[$i] "; } print REALOUTPUT "\n$mdlcost $prattcost\n$pattern\n\n"; for $i ( @NAMES ) { # Remove covered sequences from @NAMES if ( $answer{$i} == $iteration ) { for $j (0..$#NAMES){ if( $NAMES[$j] eq $i ) { splice( @NAMES,$j,1 ) ; last ; } } } } $count = $#NAMES + 1; # Finish if the RESULT was left empty... if( ! ( $#RESULT >= 0 ) ) { for $i (0..$count-1) { print REALOUTPUT "$NAMES[$i]\n"; } goto FINISH ; } } FINISH: close( REALOUTPUT ); print<){ print $_ };