#!/bin/csh setenv _POSIX2_VERSION 199209 # Copyright (C) 2005 Andrew E Firth, University of Otago, Dunedin, # New Zealand, aef(at)sanger.otago.ac.nz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License (version 2) as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. #Runs mcsim programmes, runs mlogd on sims, calculates statistics and makes # plots. limit stacksize unlimited @ argc = `echo $argv | awk '{print NF}'` if ($argc != 6) then echo "Usage: domcsims newdir nsim1 nsim2 count seed newurl
" exit 1 endif set newdir = $1 cd $newdir source mlogd.param if (! $download) then setenv PATH ${PATH}:/home/aef/bin set formdir = "/aef/MLOGD/FORM" else setenv PATH ${PATH}:${HOME}/bin set formdir = "../../FORM" endif @ maxnsim1 = 200 @ maxnsim2 = 200 @ minnsim1 = 10 @ minnsim2 = 5 set tttmax = 5. set lllmax = 0.5 @ maxruntime = 400 set starttime = `date +%s` mkdir MCSIMS @ nsim1 = $2 @ nsim2 = $3 set count1 = $4 @ seed = `echo $5 | awk '{print int(300*$1+1)}'` set newurl = $6 echo Started mcsims `date` >> mclog.txt #------------------------------------------------------------------------------ #Check nsim1 and nsim2 are not too small. if ($nsim1 < $minnsim1) then echo "Warning: Minimum number of simulations for general distribution is $minnsim1.
" echo "You entered $nsim1. Adjusting to $minnsim1.
" @ nsim1 = $minnsim1 endif if ($nsim2 < $minnsim2) then echo "Warning: Minimum number of simulations for error bars is $minnsim2.
" echo "You entered $nsim2. Adjusting to $minnsim2.
" @ nsim2 = $minnsim2 endif #------------------------------------------------------------------------------ #Estimated run-time @ maxvalue = 2000000 @ extn = 20 @ qlength = $qlength set chkextn = 0 set refseq2 = `awk '{if ($1=="'"$refseq"'") print $2}' seqs.key` set npairs1 = `wc -l pairs.ref.dat | awk '{print $1}'` set npairs2 = `wc -l pairs.tree.dat | awk '{print $1}'` set length2 = `head -1 ref.info | awk '{print 1+$3-$2}'` if ($circular) then @ extn1 = `echo $range1 $extn | awk '{print $1-$2}'` @ extn2 = `echo $range2 $extn | awk '{print $1+$2}'` if ($extn1 < 1 || $extn2 > $qlength) then set length2 = `head -1 ref.info | awk '{print $1}'` set chkextn = 1 endif endif @ sumvalue = `echo $length2 $nsim1 $npairs1 $npairs2 $nsim2 | awk '{print int($1*2*($2+($3+2*$4)*$5))}'` if ($sumvalue > $maxvalue) then echo "Error: (# query-region nt) x (# of simulations) = $sumvalue exceeds maximum $maxvalue.
" echo "You must reduce the requested number of simulations.
" if ($chkextn) then echo "Note you had a query ORF or nt range crossing, or within $extn nt of, the 'boundary' in a circular genome, which resulted in the query region being extended to the whole sequence, i.e. $length2 nt.
" endif exit 1 endif set runtime = `echo $sumvalue | awk '{print int(4+$1*0.00015)}'` echo "Running Monte Carlo simulations...
" echo "Estimated runtime: $runtime seconds.
" echo "Reference sequence = $refseq.
" #------------------------------------------------------------------------------ cd MCSIMS foreach file (aa2codon.dat aa.dat codon.dat nuc.dat orfs.1 orfs.2 setup.mlogd ref.dat tree.dat ref.names tree.info) cp ../$file . end touch ref.dat #------------------------------------------------------------------------------ #Check nsim1 and nsim2 are not too large. if ($nsim1 > $maxnsim1) then echo "Error: Maximum number of simulations for general distribution is $maxnsim1.
" echo "You entered $nsim1.
" exit 1 endif if ($nsim2 > $maxnsim2) then echo "Error: Maximum number of simulations for error bars is $maxnsim2.
" echo "You entered $nsim2.
" exit 1 endif #------------------------------------------------------------------------------ #MC sims for general distributions foreach model (0 1) set currenttime = `date +%s` @ totaltime = `echo $starttime $currenttime | awk '{print $2-$1}'` if ($totaltime > $maxruntime) then echo "
Warning: Exceeded maximum allowed run-time. You will have to try fewer simulations. Note that sometimes a run-time overflow may be the result of server overload, in which case you could try running your job at a different time." exit 1 endif echo $refseq2.fasta " " > setup.mcsim.$model echo $refseq2.fasta " " >> setup.mcsim.$model echo $refseq2.fasta " " >> setup.mcsim.$model echo $wholeseq $range1 $range2 " " >> setup.mcsim.$model echo $model " " >> setup.mcsim.$model echo 0.01 0.50 " " >> setup.mcsim.$model echo 1 " " >> setup.mcsim.$model echo 1 " " >> setup.mcsim.$model echo $nsim1 " " >> setup.mcsim.$model echo 343 " " >> setup.mcsim.$model echo $circular " " >> setup.mcsim.$model cp ../$refseq2.fasta . mcsim setup.mcsim.$model >>& ../mclog.txt || exit 1 ls mcseq.[0-9]* > temp1 echo $refseq2.fasta > seqs.dat rm -f pairs.dat touch pairs.dat @ count = 2 foreach i (`cat temp1`) set id = `echo $count | awk '{printf "%03i\n",$1}'` mv $i seq.$id.fasta echo seq.$id.fasta >> seqs.dat echo 001 $id >> pairs.dat @ count += 1 end mlogd setup.mlogd >>& ../mclog.txt || exit 1 foreach i (gaplog gappos glitches info MLElog stoplog stoppos1 stoppos2 startpos1 startpos2 treesum) rm -f mlogd_out.$i end foreach i (temp1 temp2 pairs.dat seqs.dat seq.???.fasta temp.fghjys) rm -f $i end mv mlogd_out.dat ref.mc.$model.dat touch ref.mc.$model.dat end #------------------------------------------------------------------------------ #MC sims for errorbars awk '{print $2}' ../pairs.ref.dat > temp1 paste temp1 ref.dat | \ awk '{if ((0.+$2)<(0.+"'"$tttmax"'")&&(0.+$3)<(0.+"'"$tttmax"'")&&(0.+$13)>0.&&($14/$13)<(0.+"'"$lllmax"'")) {printf "%s:%s\n",$1,$14} else {printf "%s:%s\n",$1,-1}}' > temp0 rm -f ref.mc.probs ebm.0 ebl.0 ebu.0 ebm.1 ebl.1 ebu.1 touch ref.mc.probs ebm.0 ebl.0 ebu.0 ebm.1 ebl.1 ebu.1 rm -f temp1 @ run = 0 foreach i (`cat temp0`) set currenttime = `date +%s` @ totaltime = `echo $starttime $currenttime | awk '{print $2-$1}'` if ($totaltime > $maxruntime) then echo "
Warning: Exceeded maximum allowed run-time. You will have to try fewer simulations. Note that sometimes a run-time overflow may be the result of server overload, in which case you could try running your job at a different time." exit 1 endif rm -f calcprob.obs.dat calcprob.mcs.dat calcprob.mcd.dat @ seed += 2 @ run += 1 set seq2 = `echo $i | awk -F: '{printf "seq.%s\n",$1}'` @ ttt = `echo $i | awk -F: '{print $2}'` head -$run ref.dat | tail -1 | \ awk '{print $4-$3,$10,$11,$15,$16,$17}' > calcprob.obs.dat if ($ttt > 0) then foreach model (0 1) echo $refseq2.fasta " " > setup.mcsim.$model echo $refseq2.fasta " " >> setup.mcsim.$model echo $seq2.fasta " " >> setup.mcsim.$model echo $wholeseq $range1 $range2 " " >> setup.mcsim.$model echo $model " " >> setup.mcsim.$model echo $ttt $ttt " " >> setup.mcsim.$model echo 0 " " >> setup.mcsim.$model echo 1 " " >> setup.mcsim.$model echo $nsim2 " " >> setup.mcsim.$model echo $seed " " >> setup.mcsim.$model echo $circular " " >> setup.mcsim.$model cp ../$refseq2.fasta . #This is not written over. cp ../$seq2.fasta . #NB - this is written over by one of the sims. mcsim setup.mcsim.$model >>& ../mclog.txt || exit 1 ls mcseq.[0-9]* > temp1 echo $refseq2.fasta > seqs.dat rm -f pairs.dat touch pairs.dat @ count = 2 foreach i (`cat temp1`) set id = `echo $count | awk '{printf "%03i\n",$1}'` mv $i seq.$id.fasta echo seq.$id.fasta >> seqs.dat echo 001 $id >> pairs.dat @ count += 1 end mlogd setup.mlogd >>& ../mclog.txt || exit 1 foreach i (gaplog gappos glitches info MLElog stoplog stoppos1 stoppos2 startpos1 startpos2 treesum) rm -f mlogd_out.$i end foreach i (temp1 temp2 pairs.dat seqs.dat seq.???.fasta temp.fghjys) rm -f $i end set nsims = `wc mlogd_out.dat | awk '{print $1}'` @ am = `echo $nsims | awk '{print int(0.5+0.50*$1)}'` @ al = `echo $nsims | awk '{print int(0.5+0.16*$1)}'` if ($al < 1) then @ al = 1 endif @ au = `echo $nsims | awk '{print $1-"'"$al"'"+1}'` set st1l = `awk '{if (0.+$12>0.) {print $13/$12} else {print "0"}}' mlogd_out.dat | sort -n | head -$al | tail -1` set st2l = `awk '{print $4-$3}' mlogd_out.dat | sort -n | head -$al | tail -1` set st3l = `awk '{print $10}' mlogd_out.dat | sort -n | head -$al | tail -1` set st4l = `awk '{print $11}' mlogd_out.dat | sort -n | head -$al | tail -1` set st5l = `awk '{print $15}' mlogd_out.dat | sort -n | head -$al | tail -1` set st6l = `awk '{print $16}' mlogd_out.dat | sort -n | head -$al | tail -1` set st7l = `awk '{print $17}' mlogd_out.dat | sort -n | head -$al | tail -1` set st1m = `awk '{if (0.+$12>0.) {print $13/$12} else {print "0"}}' mlogd_out.dat | sort -n | head -$am | tail -1` set st2m = `awk '{print $4-$3}' mlogd_out.dat | sort -n | head -$am | tail -1` set st3m = `awk '{print $10}' mlogd_out.dat | sort -n | head -$am | tail -1` set st4m = `awk '{print $11}' mlogd_out.dat | sort -n | head -$am | tail -1` set st5m = `awk '{print $15}' mlogd_out.dat | sort -n | head -$am | tail -1` set st6m = `awk '{print $16}' mlogd_out.dat | sort -n | head -$am | tail -1` set st7m = `awk '{print $17}' mlogd_out.dat | sort -n | head -$am | tail -1` set st1u = `awk '{if (0.+$12>0.) {print $13/$12} else {print "0"}}' mlogd_out.dat | sort -n | head -$au | tail -1` set st2u = `awk '{print $4-$3}' mlogd_out.dat | sort -n | head -$au | tail -1` set st3u = `awk '{print $10}' mlogd_out.dat | sort -n | head -$au | tail -1` set st4u = `awk '{print $11}' mlogd_out.dat | sort -n | head -$au | tail -1` set st5u = `awk '{print $15}' mlogd_out.dat | sort -n | head -$au | tail -1` set st6u = `awk '{print $16}' mlogd_out.dat | sort -n | head -$au | tail -1` set st7u = `awk '{print $17}' mlogd_out.dat | sort -n | head -$au | tail -1` echo $st1m $st2m $st3m $st4m $st5m $st6m $st7m >> ebm.$model echo $st1l $st2l $st3l $st4l $st5l $st6l $st7l >> ebl.$model echo $st1u $st2u $st3u $st4u $st5u $st6u $st7u >> ebu.$model awk '{print $4-$3,$10,$11,$15,$16,$17}' mlogd_out.dat \ > calcprob.mc.$model.dat rm -f mlogd_out.dat end calcprob cat calcprob.out.dat >> ref.mc.probs else foreach ebfile (ebm.0 ebl.0 ebu.0 ebm.1 ebl.1 ebu.1) echo 0 0 0 0 0 0 0 >> $ebfile end echo 0 0 0 0 0 0 >> ref.mc.probs endif end rm -f temp0 #------------------------------------------------------------------------------ #MC sims for treesum errorbars awk '{printf "%s:%s\n",$1,$2}' ../pairs.tree.dat > temp1 awk '{printf "%s:%s\n",$2,$1}' ../pairs.tree.dat >> temp1 paste temp1 tree.dat | \ awk '{if ((0.+$2)<(0.+"'"$tttmax"'")&&(0.+$3)<(0.+"'"$tttmax"'")&&(0.+$13)>0.&&($14/$13)<(0.+"'"$lllmax"'")) {printf "%s:%s\n",$1,$14} else {printf "%s:%s\n",$1,-1}}' > temp0 rm -f tree.mc.probs tebm.0 tebl.0 tebu.0 tebm.1 tebl.1 tebu.1 touch tree.mc.probs tebm.0 tebl.0 tebu.0 tebm.1 tebl.1 tebu.1 rm -f temp1 @ run = 0 foreach i (`cat temp0`) set currenttime = `date +%s` @ totaltime = `echo $starttime $currenttime | awk '{print $2-$1}'` if ($totaltime > $maxruntime) then echo "
Warning: Exceeded maximum allowed run-time. You will have to try fewer simulations. Note that sometimes a run-time overflow may be the result of server overload, in which case you could try running your job at a different time." exit 1 endif rm -f calcprob.obs.dat calcprob.mcs.dat calcprob.mcd.dat @ seed += 2 @ run += 1 set seq2 = `echo $i | awk -F: '{printf "seq.%s\n",$1}'` set seq3 = `echo $i | awk -F: '{printf "seq.%s\n",$2}'` @ ttt = `echo $i | awk -F: '{print $3}'` head -$run tree.dat | tail -1 | \ awk '{print $4-$3,$10,$11,$15,$16,$17}' > calcprob.obs.dat if ($ttt > 0) then foreach model (0 1) echo $refseq2.fasta " " > setup.mcsim.$model echo $seq2.fasta " " >> setup.mcsim.$model echo $seq3.fasta " " >> setup.mcsim.$model echo $wholeseq $range1 $range2 " " >> setup.mcsim.$model echo $model " " >> setup.mcsim.$model echo $ttt $ttt " " >> setup.mcsim.$model echo 0 " " >> setup.mcsim.$model echo 1 " " >> setup.mcsim.$model echo $nsim2 " " >> setup.mcsim.$model echo $seed " " >> setup.mcsim.$model echo $circular " " >> setup.mcsim.$model cp ../$refseq2.fasta . #This is not written over. cp ../$seq2.fasta . #NB - this is written over by one of the sims. cp ../$seq3.fasta . #NB - this is written over by one of the sims. mcsim setup.mcsim.$model >>& ../mclog.txt || exit 1 ls mcseq.[0-9]* > temp1 echo $refseq2.fasta > seqs.dat rm -f pairs.dat touch pairs.dat @ count = 2 foreach i (`cat temp1`) set id = `echo $count | awk '{printf "%03i\n",$1}'` mv $i seq.$id.fasta echo seq.$id.fasta >> seqs.dat echo 001 $id >> pairs.dat @ count += 1 end mlogd setup.mlogd >>& ../mclog.txt || exit 1 foreach i (gaplog gappos glitches info MLElog stoplog stoppos1 stoppos2 startpos1 startpos2 treesum) rm -f mlogd_out.$i end foreach i (temp1 temp2 pairs.dat seqs.dat seq.???.fasta temp.fghjys) rm -f $i end awk '{print $4-$3,$10,$11,$15,$16,$17}' mlogd_out.dat \ > calcprob.mc.$model.dat cp calcprob.mc.$model.dat treedat.$model.$run rm -f mlogd_out.dat end calcprob cat calcprob.out.dat >> tree.mc.probs else echo 0 0 0 0 0 0 >> tree.mc.probs endif end rm -f temp0 foreach model (0 1) rm -f treesum.$model ls treedat.$model.* > treedatfiles.$model @ simcount = 0 while ($simcount < $nsim2) @ simcount += 1 rm -f temp1 foreach file (`cat treedatfiles.$model`) head -$simcount $file | tail -1 >> temp1 end awk '{s1+=$1}{s2+=$2}{s3+=$3}{s4+=$4}{s5+=$5}{s6+=$6}END{print s1*0.25,s2*0.25,s3*0.25,s4*0.25,s5*0.25,s6*0.25}' temp1 >> treesum.$model end end rm -f temp1 set lambdasum = `head -2 tree.info | tail -1 | awk '{printf "%.3f\n",$1*0.25}'` set ntmlesum = `awk '{s+=($4-$3)}END{printf "%.3f\n",s*0.25}' tree.dat` awk '{s1+=($4-$3)}{s2+=$10}{s3+=$11}{s4+=$15}{s5+=$16}{s6+=$17}END{print "'"$lambdasum"'",s1*0.25,s2*0.25,s3*0.25,s4*0.25,s5*0.25,s6*0.25}' tree.dat > treesum.dat touch treesum.dat foreach model (0 1) @ nsims = `wc -l treesum.$model | awk '{print $1}'` if ($nsims > 0) then @ am = `echo $nsims | awk '{print int(0.5+0.50*$1)}'` @ al = `echo $nsims | awk '{print int(0.5+0.16*$1)}'` if ($al < 1) then @ al = 1 endif @ au = `echo $nsims | awk '{print $1-"'"$al"'"+1}'` set st2l = `awk '{print $1}' treesum.$model | sort -n | head -$al | tail -1` set st3l = `awk '{print $2}' treesum.$model | sort -n | head -$al | tail -1` set st4l = `awk '{print $3}' treesum.$model | sort -n | head -$al | tail -1` set st5l = `awk '{print $4}' treesum.$model | sort -n | head -$al | tail -1` set st6l = `awk '{print $5}' treesum.$model | sort -n | head -$al | tail -1` set st7l = `awk '{print $6}' treesum.$model | sort -n | head -$al | tail -1` set st2m = `awk '{print $1}' treesum.$model | sort -n | head -$am | tail -1` set st3m = `awk '{print $2}' treesum.$model | sort -n | head -$am | tail -1` set st4m = `awk '{print $3}' treesum.$model | sort -n | head -$am | tail -1` set st5m = `awk '{print $4}' treesum.$model | sort -n | head -$am | tail -1` set st6m = `awk '{print $5}' treesum.$model | sort -n | head -$am | tail -1` set st7m = `awk '{print $6}' treesum.$model | sort -n | head -$am | tail -1` set st2u = `awk '{print $1}' treesum.$model | sort -n | head -$au | tail -1` set st3u = `awk '{print $2}' treesum.$model | sort -n | head -$au | tail -1` set st4u = `awk '{print $3}' treesum.$model | sort -n | head -$au | tail -1` set st5u = `awk '{print $4}' treesum.$model | sort -n | head -$au | tail -1` set st6u = `awk '{print $5}' treesum.$model | sort -n | head -$au | tail -1` set st7u = `awk '{print $6}' treesum.$model | sort -n | head -$au | tail -1` echo $lambdasum $st2m $st3m $st4m $st5m $st6m $st7m >> tebm.$model echo $lambdasum $st2l $st3l $st4l $st5l $st6l $st7l >> tebl.$model echo $lambdasum $st2u $st3u $st4u $st5u $st6u $st7u >> tebu.$model else foreach tebfile (tebm.$model tebl.$model tebu.$model) echo 0 0 0 0 0 0 0 >> $tebfile end endif end #------------------------------------------------------------------------------ echo "...Done

" echo -n "Making plots...
" #Process mcsim.R and plot. set date = `date +%y-%m-%d` @ w = `wc -l ref.dat | awk '{print $1}'` if ($w == 0) then set ccc1 = "#" echo "Error: can't find 'reference v. non-reference pairs' plot data.
" else set ccc1 = "" endif @ w = `wc -l ref.mc.0.dat | awk '{print $1}'` if ($w == 0) then set ccc2 = "#" echo "Error: can't find null model GD simulations plot data.
" else set ccc2 = "" endif @ w = `wc -l ref.mc.1.dat | awk '{print $1}'` if ($w == 0) then set ccc3 = "#" echo "Error: can't find alternate model GD simulations plot data.
" else set ccc3 = "" endif @ w = `wc -l treesum.dat | awk '{print $1}'` if ($w == 0) then set ccc4 = "#" echo "Error: can't find 'sum over tree' plot data.
" else set ccc4 = "" endif @ w1 = `wc -l ebm.0 | awk '{print $1}'` @ w2 = `wc -l ebl.0 | awk '{print $1}'` @ w3 = `wc -l ebu.0 | awk '{print $1}'` @ w4 = `wc -l ebm.1 | awk '{print $1}'` @ w5 = `wc -l ebl.1 | awk '{print $1}'` @ w6 = `wc -l ebu.1 | awk '{print $1}'` if ($w1 == 0 || $w2 == 0 || $w3 == 0 || $w4 == 0 || $w5 == 0 || $w6 == 0) then set ccc5 = "#" echo "Error: can't find 'reference v. non-reference pairs' EB plot data.
" else set ccc5 = "" endif @ w1 = `wc -l tebm.0 | awk '{print $1}'` @ w2 = `wc -l tebl.0 | awk '{print $1}'` @ w3 = `wc -l tebu.0 | awk '{print $1}'` @ w4 = `wc -l tebm.1 | awk '{print $1}'` @ w5 = `wc -l tebl.1 | awk '{print $1}'` @ w6 = `wc -l tebu.1 | awk '{print $1}'` if ($w1 == 0 || $w2 == 0 || $w3 == 0 || $w4 == 0 || $w5 == 0 || $w6 == 0) then set ccc6 = "#" echo "Error: can't find 'sum over tree' EB plot data.
" else set ccc6 = "" endif cat ../../../SCRIPTS/mcsim.R | \ sed 's/ddd/'$date'/g' | sed 's/lll/'$lambdasum'/g' | \ sed 's/#ccc1/'$ccc1'/g' | sed 's/#ccc2/'$ccc2'/g' | \ sed 's/#ccc3/'$ccc3'/g' | sed 's/#ccc4/'$ccc4'/g' | \ sed 's/#ccc5/'$ccc5'/g' | sed 's/#ccc6/'$ccc6'/g' | \ sed 's/%sss%/'$refseq'/g' | sed 's/%ggg%/'$title'/g' > plot2.R R --no-save --slave < plot2.R >>& ../mclog.txt epstopdf mcsim.eps mv mcsim.png ../mcsim.$count1.png mv mcsim.eps ../mcsim.$count1.eps mv mcsim.pdf ../mcsim.$count1.pdf sed 's/ccc/'$count1'/g' ../../../SCRIPTS/mcsim.plotdata.html \ > ../mcsim.plotdata.$count1.html echo "Error bar data." > ../eb.$count1.txt echo " " >> ../eb.$count1.txt echo "non-reference divergence MLOGD frac frac frac frac frac" >> ../eb.$count1.txt echo "sequence name (mean muts syn nonsyn N1 N2 N3" >> ../eb.$count1.txt echo " per nt) muts muts muts muts muts" >> ../eb.$count1.txt echo " " >> ../eb.$count1.txt echo "Null model, medians." >> ../eb.$count1.txt paste ref.names ebm.0 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebm.0 >> ../eb.$count1.txt echo " " >> ../eb.$count1.txt echo "Null model, 16th percentile." >> ../eb.$count1.txt paste ref.names ebl.0 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebl.0 >> ../eb.$count1.txt echo " " >> ../eb.$count1.txt echo "Null model, 84th percentile." >> ../eb.$count1.txt paste ref.names ebu.0 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebu.0 >> ../eb.$count1.txt echo " " >> ../eb.$count1.txt echo "Alternate model, medians." >> ../eb.$count1.txt paste ref.names ebm.1 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebm.1 >> ../eb.$count1.txt echo " " >> ../eb.$count1.txt echo "Alternate model, 16th percentile." >> ../eb.$count1.txt paste ref.names ebl.1 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebl.1 >> ../eb.$count1.txt echo " " >> ../eb.$count1.txt echo "Alternate model, 84th percentile." >> ../eb.$count1.txt paste ref.names ebu.1 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebu.1 >> ../eb.$count1.txt echo " " >> ../eb.$count1.txt echo "Original reference - non-reference sequence data." > ../ref.dat.$count1.txt echo " " >> ../ref.dat.$count1.txt echo "non-reference divergence MLOGD frac frac frac frac frac" >> ../ref.dat.$count1.txt echo "sequence name (mean muts syn nonsyn N1 N2 N3" >> ../ref.dat.$count1.txt echo " per nt) muts muts muts muts muts" >> ../ref.dat.$count1.txt echo " " >> ../ref.dat.$count1.txt paste ref.names ref.dat | awk '{if (0.+$13>0.) printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$14/$13,$5-$4,$11,$12,$16,$17,$18}' >> ../ref.dat.$count1.txt cat treesum.dat | awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' >> ../ref.dat.$count1.txt echo "General distribution of statistics for null model simulations." > ../ref.mc0.$count1.txt echo " " >> ../ref.mc0.$count1.txt echo "divergence MLOGD frac frac frac frac frac" >> ../ref.mc0.$count1.txt echo "(mean muts syn nonsyn N1 N2 N3" >> ../ref.mc0.$count1.txt echo " per nt) muts muts muts muts muts" >> ../ref.mc0.$count1.txt echo " " >> ../ref.mc0.$count1.txt awk '{if (0.+$12>0.) printf "%10.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$13/$12,$4-$3,$10,$11,$15,$16,$17}' ref.mc.0.dat | sort -n \ >> ../ref.mc0.$count1.txt echo "General distribution of statistics for alternate model simulations." > ../ref.mc1.$count1.txt echo " " >> ../ref.mc1.$count1.txt echo "divergence MLOGD frac frac frac frac frac" >> ../ref.mc1.$count1.txt echo "(mean muts syn nonsyn N1 N2 N3" >> ../ref.mc1.$count1.txt echo " per nt) muts muts muts muts muts" >> ../ref.mc1.$count1.txt echo " " >> ../ref.mc1.$count1.txt awk '{if (0.+$12>0.) printf "%10.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$13/$12,$4-$3,$10,$11,$15,$16,$17}' ref.mc.1.dat | sort -n \ >> ../ref.mc1.$count1.txt echo "...Done

" #------------------------------------------------------------------------------ #Do table of mcsim probs cat ../../../SCRIPTS/mctableheader.txt > ../mcsim.table paste ref.names ref.dat ref.mc.probs | awk '{if (0.+$13>0.) printf "%s%6i%8.3f%8.3f%7.2f%7.2f%7.2f%7.2f%7.2f%9.1f%9.1f%9.1f%9.1f%9.1f%9.1f\n",$1,$13,$14/$13,$5-$4,$11,$12,$16,$17,$18,$21,$22,$23,$24,$25,$26}' >> ../mcsim.table echo "Summed over tree (i.e input pairs)$lambdasum$ntmlesum" >> ../mcsim.table awk '{s1+=$1}{s2+=$2}{s3+=$3}{s4+=$4}{s5+=$5}{s6+=$6}END{printf "%9.1f%9.1f%9.1f%9.1f%9.1f%9.1f\n",s1*0.25,s2*0.25,s3*0.25,s4*0.25,s5*0.25,s6*0.25}' tree.mc.probs >> ../mcsim.table echo '' >> ../mcsim.table #------------------------------------------------------------------------------ cd .. rm -rf MCSIMS chmod a+rw ./* #Write rest of webpage. set id9 = `echo $newurl | sed 's/.*\///'` echo "

Plot of data versus simulations:

Download " echo "png, " echo "eps, " echo "pdf, " echo "description, " echo "raw plot data.
" if (! $download) then echo "Redo Monte Carlo " echo "simulations with more/fewer simulations or new random seed." echo "
(Don't just use the back button in your browser, or the " echo "plot may not refresh.)
" echo "Return to MLOGD server." endif echo "

" echo "Table of log likelihood ratios:

" echo "The log likelihood ratios (ln(LR)) are estimated by comparing the " echo "observed values with the simulated 'Error bar' (EB) distributions " echo "(see Firth & Brown, 2005, Bioinformatics, 21, 282-92 " echo "for details). Positive values favour the alternate model while " echo "negative values favour the null model. The more EB simulations you do, " echo "the more accurate these will be (you can try a couple of different " echo "initial random seeds to see how stable the results are). However " echo "they will still only be approximations " echo "(details) " echo "and you should treat them more as a yes/no classifier for the null " echo "versus the alternate model, rather than as precise probabilities." echo "

" cat mcsim.table echo "
Description of columns." echo "

" chmod a+rw ./* echo Finished mcsims `date` >> mclog.txt