#!/bin/csh
setenv _POSIX2_VERSION 199209
# Copyright (C) 2005 Andrew E Firth, University of Otago, Dunedin,
# New Zealand, aef(at)sanger.otago.ac.nz
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License (version 2) as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#Runs mcsim programmes, runs mlogd on sims, calculates statistics and makes
# plots.
limit stacksize unlimited
@ argc = `echo $argv | awk '{print NF}'`
if ($argc != 6) then
echo "Usage: domcsims newdir nsim1 nsim2 count seed newurl
"
exit 1
endif
set newdir = $1
cd $newdir
source mlogd.param
if (! $download) then
setenv PATH ${PATH}:/home/aef/bin
set formdir = "/aef/MLOGD/FORM"
else
setenv PATH ${PATH}:${HOME}/bin
set formdir = "../../FORM"
endif
@ maxnsim1 = 200
@ maxnsim2 = 200
@ minnsim1 = 10
@ minnsim2 = 5
set tttmax = 5.
set lllmax = 0.5
@ maxruntime = 400
set starttime = `date +%s`
mkdir MCSIMS
@ nsim1 = $2
@ nsim2 = $3
set count1 = $4
@ seed = `echo $5 | awk '{print int(300*$1+1)}'`
set newurl = $6
echo Started mcsims `date` >> mclog.txt
#------------------------------------------------------------------------------
#Check nsim1 and nsim2 are not too small.
if ($nsim1 < $minnsim1) then
echo "Warning: Minimum number of simulations for general distribution is $minnsim1.
"
echo "You entered $nsim1. Adjusting to $minnsim1.
"
@ nsim1 = $minnsim1
endif
if ($nsim2 < $minnsim2) then
echo "Warning: Minimum number of simulations for error bars is $minnsim2.
"
echo "You entered $nsim2. Adjusting to $minnsim2.
"
@ nsim2 = $minnsim2
endif
#------------------------------------------------------------------------------
#Estimated run-time
@ maxvalue = 2000000
@ extn = 20
@ qlength = $qlength
set chkextn = 0
set refseq2 = `awk '{if ($1=="'"$refseq"'") print $2}' seqs.key`
set npairs1 = `wc -l pairs.ref.dat | awk '{print $1}'`
set npairs2 = `wc -l pairs.tree.dat | awk '{print $1}'`
set length2 = `head -1 ref.info | awk '{print 1+$3-$2}'`
if ($circular) then
@ extn1 = `echo $range1 $extn | awk '{print $1-$2}'`
@ extn2 = `echo $range2 $extn | awk '{print $1+$2}'`
if ($extn1 < 1 || $extn2 > $qlength) then
set length2 = `head -1 ref.info | awk '{print $1}'`
set chkextn = 1
endif
endif
@ sumvalue = `echo $length2 $nsim1 $npairs1 $npairs2 $nsim2 | awk '{print int($1*2*($2+($3+2*$4)*$5))}'`
if ($sumvalue > $maxvalue) then
echo "Error: (# query-region nt) x (# of simulations) = $sumvalue exceeds maximum $maxvalue.
"
echo "You must reduce the requested number of simulations.
"
if ($chkextn) then
echo "Note you had a query ORF or nt range crossing, or within $extn nt of, the 'boundary' in a circular genome, which resulted in the query region being extended to the whole sequence, i.e. $length2 nt.
"
endif
exit 1
endif
set runtime = `echo $sumvalue | awk '{print int(4+$1*0.00015)}'`
echo "Running Monte Carlo simulations...
"
echo "Estimated runtime: $runtime seconds.
"
echo "Reference sequence = $refseq.
"
#------------------------------------------------------------------------------
cd MCSIMS
foreach file (aa2codon.dat aa.dat codon.dat nuc.dat orfs.1 orfs.2 setup.mlogd ref.dat tree.dat ref.names tree.info)
cp ../$file .
end
touch ref.dat
#------------------------------------------------------------------------------
#Check nsim1 and nsim2 are not too large.
if ($nsim1 > $maxnsim1) then
echo "Error: Maximum number of simulations for general distribution is $maxnsim1.
"
echo "You entered $nsim1.
"
exit 1
endif
if ($nsim2 > $maxnsim2) then
echo "Error: Maximum number of simulations for error bars is $maxnsim2.
"
echo "You entered $nsim2.
"
exit 1
endif
#------------------------------------------------------------------------------
#MC sims for general distributions
foreach model (0 1)
set currenttime = `date +%s`
@ totaltime = `echo $starttime $currenttime | awk '{print $2-$1}'`
if ($totaltime > $maxruntime) then
echo "
Warning: Exceeded maximum allowed run-time. You will have to try fewer simulations. Note that sometimes a run-time overflow may be the result of server overload, in which case you could try running your job at a different time."
exit 1
endif
echo $refseq2.fasta " " > setup.mcsim.$model
echo $refseq2.fasta " " >> setup.mcsim.$model
echo $refseq2.fasta " " >> setup.mcsim.$model
echo $wholeseq $range1 $range2 " " >> setup.mcsim.$model
echo $model " " >> setup.mcsim.$model
echo 0.01 0.50 " " >> setup.mcsim.$model
echo 1 " " >> setup.mcsim.$model
echo 1 " " >> setup.mcsim.$model
echo $nsim1 " " >> setup.mcsim.$model
echo 343 " " >> setup.mcsim.$model
echo $circular " " >> setup.mcsim.$model
cp ../$refseq2.fasta .
mcsim setup.mcsim.$model >>& ../mclog.txt || exit 1
ls mcseq.[0-9]* > temp1
echo $refseq2.fasta > seqs.dat
rm -f pairs.dat
touch pairs.dat
@ count = 2
foreach i (`cat temp1`)
set id = `echo $count | awk '{printf "%03i\n",$1}'`
mv $i seq.$id.fasta
echo seq.$id.fasta >> seqs.dat
echo 001 $id >> pairs.dat
@ count += 1
end
mlogd setup.mlogd >>& ../mclog.txt || exit 1
foreach i (gaplog gappos glitches info MLElog stoplog stoppos1 stoppos2 startpos1 startpos2 treesum)
rm -f mlogd_out.$i
end
foreach i (temp1 temp2 pairs.dat seqs.dat seq.???.fasta temp.fghjys)
rm -f $i
end
mv mlogd_out.dat ref.mc.$model.dat
touch ref.mc.$model.dat
end
#------------------------------------------------------------------------------
#MC sims for errorbars
awk '{print $2}' ../pairs.ref.dat > temp1
paste temp1 ref.dat | \
awk '{if ((0.+$2)<(0.+"'"$tttmax"'")&&(0.+$3)<(0.+"'"$tttmax"'")&&(0.+$13)>0.&&($14/$13)<(0.+"'"$lllmax"'")) {printf "%s:%s\n",$1,$14} else {printf "%s:%s\n",$1,-1}}' > temp0
rm -f ref.mc.probs ebm.0 ebl.0 ebu.0 ebm.1 ebl.1 ebu.1
touch ref.mc.probs ebm.0 ebl.0 ebu.0 ebm.1 ebl.1 ebu.1
rm -f temp1
@ run = 0
foreach i (`cat temp0`)
set currenttime = `date +%s`
@ totaltime = `echo $starttime $currenttime | awk '{print $2-$1}'`
if ($totaltime > $maxruntime) then
echo "
Warning: Exceeded maximum allowed run-time. You will have to try fewer simulations. Note that sometimes a run-time overflow may be the result of server overload, in which case you could try running your job at a different time."
exit 1
endif
rm -f calcprob.obs.dat calcprob.mcs.dat calcprob.mcd.dat
@ seed += 2
@ run += 1
set seq2 = `echo $i | awk -F: '{printf "seq.%s\n",$1}'`
@ ttt = `echo $i | awk -F: '{print $2}'`
head -$run ref.dat | tail -1 | \
awk '{print $4-$3,$10,$11,$15,$16,$17}' > calcprob.obs.dat
if ($ttt > 0) then
foreach model (0 1)
echo $refseq2.fasta " " > setup.mcsim.$model
echo $refseq2.fasta " " >> setup.mcsim.$model
echo $seq2.fasta " " >> setup.mcsim.$model
echo $wholeseq $range1 $range2 " " >> setup.mcsim.$model
echo $model " " >> setup.mcsim.$model
echo $ttt $ttt " " >> setup.mcsim.$model
echo 0 " " >> setup.mcsim.$model
echo 1 " " >> setup.mcsim.$model
echo $nsim2 " " >> setup.mcsim.$model
echo $seed " " >> setup.mcsim.$model
echo $circular " " >> setup.mcsim.$model
cp ../$refseq2.fasta . #This is not written over.
cp ../$seq2.fasta . #NB - this is written over by one of the sims.
mcsim setup.mcsim.$model >>& ../mclog.txt || exit 1
ls mcseq.[0-9]* > temp1
echo $refseq2.fasta > seqs.dat
rm -f pairs.dat
touch pairs.dat
@ count = 2
foreach i (`cat temp1`)
set id = `echo $count | awk '{printf "%03i\n",$1}'`
mv $i seq.$id.fasta
echo seq.$id.fasta >> seqs.dat
echo 001 $id >> pairs.dat
@ count += 1
end
mlogd setup.mlogd >>& ../mclog.txt || exit 1
foreach i (gaplog gappos glitches info MLElog stoplog stoppos1 stoppos2 startpos1 startpos2 treesum)
rm -f mlogd_out.$i
end
foreach i (temp1 temp2 pairs.dat seqs.dat seq.???.fasta temp.fghjys)
rm -f $i
end
set nsims = `wc mlogd_out.dat | awk '{print $1}'`
@ am = `echo $nsims | awk '{print int(0.5+0.50*$1)}'`
@ al = `echo $nsims | awk '{print int(0.5+0.16*$1)}'`
if ($al < 1) then
@ al = 1
endif
@ au = `echo $nsims | awk '{print $1-"'"$al"'"+1}'`
set st1l = `awk '{if (0.+$12>0.) {print $13/$12} else {print "0"}}' mlogd_out.dat | sort -n | head -$al | tail -1`
set st2l = `awk '{print $4-$3}' mlogd_out.dat | sort -n | head -$al | tail -1`
set st3l = `awk '{print $10}' mlogd_out.dat | sort -n | head -$al | tail -1`
set st4l = `awk '{print $11}' mlogd_out.dat | sort -n | head -$al | tail -1`
set st5l = `awk '{print $15}' mlogd_out.dat | sort -n | head -$al | tail -1`
set st6l = `awk '{print $16}' mlogd_out.dat | sort -n | head -$al | tail -1`
set st7l = `awk '{print $17}' mlogd_out.dat | sort -n | head -$al | tail -1`
set st1m = `awk '{if (0.+$12>0.) {print $13/$12} else {print "0"}}' mlogd_out.dat | sort -n | head -$am | tail -1`
set st2m = `awk '{print $4-$3}' mlogd_out.dat | sort -n | head -$am | tail -1`
set st3m = `awk '{print $10}' mlogd_out.dat | sort -n | head -$am | tail -1`
set st4m = `awk '{print $11}' mlogd_out.dat | sort -n | head -$am | tail -1`
set st5m = `awk '{print $15}' mlogd_out.dat | sort -n | head -$am | tail -1`
set st6m = `awk '{print $16}' mlogd_out.dat | sort -n | head -$am | tail -1`
set st7m = `awk '{print $17}' mlogd_out.dat | sort -n | head -$am | tail -1`
set st1u = `awk '{if (0.+$12>0.) {print $13/$12} else {print "0"}}' mlogd_out.dat | sort -n | head -$au | tail -1`
set st2u = `awk '{print $4-$3}' mlogd_out.dat | sort -n | head -$au | tail -1`
set st3u = `awk '{print $10}' mlogd_out.dat | sort -n | head -$au | tail -1`
set st4u = `awk '{print $11}' mlogd_out.dat | sort -n | head -$au | tail -1`
set st5u = `awk '{print $15}' mlogd_out.dat | sort -n | head -$au | tail -1`
set st6u = `awk '{print $16}' mlogd_out.dat | sort -n | head -$au | tail -1`
set st7u = `awk '{print $17}' mlogd_out.dat | sort -n | head -$au | tail -1`
echo $st1m $st2m $st3m $st4m $st5m $st6m $st7m >> ebm.$model
echo $st1l $st2l $st3l $st4l $st5l $st6l $st7l >> ebl.$model
echo $st1u $st2u $st3u $st4u $st5u $st6u $st7u >> ebu.$model
awk '{print $4-$3,$10,$11,$15,$16,$17}' mlogd_out.dat \
> calcprob.mc.$model.dat
rm -f mlogd_out.dat
end
calcprob
cat calcprob.out.dat >> ref.mc.probs
else
foreach ebfile (ebm.0 ebl.0 ebu.0 ebm.1 ebl.1 ebu.1)
echo 0 0 0 0 0 0 0 >> $ebfile
end
echo 0 0 0 0 0 0 >> ref.mc.probs
endif
end
rm -f temp0
#------------------------------------------------------------------------------
#MC sims for treesum errorbars
awk '{printf "%s:%s\n",$1,$2}' ../pairs.tree.dat > temp1
awk '{printf "%s:%s\n",$2,$1}' ../pairs.tree.dat >> temp1
paste temp1 tree.dat | \
awk '{if ((0.+$2)<(0.+"'"$tttmax"'")&&(0.+$3)<(0.+"'"$tttmax"'")&&(0.+$13)>0.&&($14/$13)<(0.+"'"$lllmax"'")) {printf "%s:%s\n",$1,$14} else {printf "%s:%s\n",$1,-1}}' > temp0
rm -f tree.mc.probs tebm.0 tebl.0 tebu.0 tebm.1 tebl.1 tebu.1
touch tree.mc.probs tebm.0 tebl.0 tebu.0 tebm.1 tebl.1 tebu.1
rm -f temp1
@ run = 0
foreach i (`cat temp0`)
set currenttime = `date +%s`
@ totaltime = `echo $starttime $currenttime | awk '{print $2-$1}'`
if ($totaltime > $maxruntime) then
echo "
Warning: Exceeded maximum allowed run-time. You will have to try fewer simulations. Note that sometimes a run-time overflow may be the result of server overload, in which case you could try running your job at a different time."
exit 1
endif
rm -f calcprob.obs.dat calcprob.mcs.dat calcprob.mcd.dat
@ seed += 2
@ run += 1
set seq2 = `echo $i | awk -F: '{printf "seq.%s\n",$1}'`
set seq3 = `echo $i | awk -F: '{printf "seq.%s\n",$2}'`
@ ttt = `echo $i | awk -F: '{print $3}'`
head -$run tree.dat | tail -1 | \
awk '{print $4-$3,$10,$11,$15,$16,$17}' > calcprob.obs.dat
if ($ttt > 0) then
foreach model (0 1)
echo $refseq2.fasta " " > setup.mcsim.$model
echo $seq2.fasta " " >> setup.mcsim.$model
echo $seq3.fasta " " >> setup.mcsim.$model
echo $wholeseq $range1 $range2 " " >> setup.mcsim.$model
echo $model " " >> setup.mcsim.$model
echo $ttt $ttt " " >> setup.mcsim.$model
echo 0 " " >> setup.mcsim.$model
echo 1 " " >> setup.mcsim.$model
echo $nsim2 " " >> setup.mcsim.$model
echo $seed " " >> setup.mcsim.$model
echo $circular " " >> setup.mcsim.$model
cp ../$refseq2.fasta . #This is not written over.
cp ../$seq2.fasta . #NB - this is written over by one of the sims.
cp ../$seq3.fasta . #NB - this is written over by one of the sims.
mcsim setup.mcsim.$model >>& ../mclog.txt || exit 1
ls mcseq.[0-9]* > temp1
echo $refseq2.fasta > seqs.dat
rm -f pairs.dat
touch pairs.dat
@ count = 2
foreach i (`cat temp1`)
set id = `echo $count | awk '{printf "%03i\n",$1}'`
mv $i seq.$id.fasta
echo seq.$id.fasta >> seqs.dat
echo 001 $id >> pairs.dat
@ count += 1
end
mlogd setup.mlogd >>& ../mclog.txt || exit 1
foreach i (gaplog gappos glitches info MLElog stoplog stoppos1 stoppos2 startpos1 startpos2 treesum)
rm -f mlogd_out.$i
end
foreach i (temp1 temp2 pairs.dat seqs.dat seq.???.fasta temp.fghjys)
rm -f $i
end
awk '{print $4-$3,$10,$11,$15,$16,$17}' mlogd_out.dat \
> calcprob.mc.$model.dat
cp calcprob.mc.$model.dat treedat.$model.$run
rm -f mlogd_out.dat
end
calcprob
cat calcprob.out.dat >> tree.mc.probs
else
echo 0 0 0 0 0 0 >> tree.mc.probs
endif
end
rm -f temp0
foreach model (0 1)
rm -f treesum.$model
ls treedat.$model.* > treedatfiles.$model
@ simcount = 0
while ($simcount < $nsim2)
@ simcount += 1
rm -f temp1
foreach file (`cat treedatfiles.$model`)
head -$simcount $file | tail -1 >> temp1
end
awk '{s1+=$1}{s2+=$2}{s3+=$3}{s4+=$4}{s5+=$5}{s6+=$6}END{print s1*0.25,s2*0.25,s3*0.25,s4*0.25,s5*0.25,s6*0.25}' temp1 >> treesum.$model
end
end
rm -f temp1
set lambdasum = `head -2 tree.info | tail -1 | awk '{printf "%.3f\n",$1*0.25}'`
set ntmlesum = `awk '{s+=($4-$3)}END{printf "%.3f\n",s*0.25}' tree.dat`
awk '{s1+=($4-$3)}{s2+=$10}{s3+=$11}{s4+=$15}{s5+=$16}{s6+=$17}END{print "'"$lambdasum"'",s1*0.25,s2*0.25,s3*0.25,s4*0.25,s5*0.25,s6*0.25}' tree.dat > treesum.dat
touch treesum.dat
foreach model (0 1)
@ nsims = `wc -l treesum.$model | awk '{print $1}'`
if ($nsims > 0) then
@ am = `echo $nsims | awk '{print int(0.5+0.50*$1)}'`
@ al = `echo $nsims | awk '{print int(0.5+0.16*$1)}'`
if ($al < 1) then
@ al = 1
endif
@ au = `echo $nsims | awk '{print $1-"'"$al"'"+1}'`
set st2l = `awk '{print $1}' treesum.$model | sort -n | head -$al | tail -1`
set st3l = `awk '{print $2}' treesum.$model | sort -n | head -$al | tail -1`
set st4l = `awk '{print $3}' treesum.$model | sort -n | head -$al | tail -1`
set st5l = `awk '{print $4}' treesum.$model | sort -n | head -$al | tail -1`
set st6l = `awk '{print $5}' treesum.$model | sort -n | head -$al | tail -1`
set st7l = `awk '{print $6}' treesum.$model | sort -n | head -$al | tail -1`
set st2m = `awk '{print $1}' treesum.$model | sort -n | head -$am | tail -1`
set st3m = `awk '{print $2}' treesum.$model | sort -n | head -$am | tail -1`
set st4m = `awk '{print $3}' treesum.$model | sort -n | head -$am | tail -1`
set st5m = `awk '{print $4}' treesum.$model | sort -n | head -$am | tail -1`
set st6m = `awk '{print $5}' treesum.$model | sort -n | head -$am | tail -1`
set st7m = `awk '{print $6}' treesum.$model | sort -n | head -$am | tail -1`
set st2u = `awk '{print $1}' treesum.$model | sort -n | head -$au | tail -1`
set st3u = `awk '{print $2}' treesum.$model | sort -n | head -$au | tail -1`
set st4u = `awk '{print $3}' treesum.$model | sort -n | head -$au | tail -1`
set st5u = `awk '{print $4}' treesum.$model | sort -n | head -$au | tail -1`
set st6u = `awk '{print $5}' treesum.$model | sort -n | head -$au | tail -1`
set st7u = `awk '{print $6}' treesum.$model | sort -n | head -$au | tail -1`
echo $lambdasum $st2m $st3m $st4m $st5m $st6m $st7m >> tebm.$model
echo $lambdasum $st2l $st3l $st4l $st5l $st6l $st7l >> tebl.$model
echo $lambdasum $st2u $st3u $st4u $st5u $st6u $st7u >> tebu.$model
else
foreach tebfile (tebm.$model tebl.$model tebu.$model)
echo 0 0 0 0 0 0 0 >> $tebfile
end
endif
end
#------------------------------------------------------------------------------
echo "...Done
"
echo -n "Making plots...
"
#Process mcsim.R and plot.
set date = `date +%y-%m-%d`
@ w = `wc -l ref.dat | awk '{print $1}'`
if ($w == 0) then
set ccc1 = "#"
echo "Error: can't find 'reference v. non-reference pairs' plot data.
"
else
set ccc1 = ""
endif
@ w = `wc -l ref.mc.0.dat | awk '{print $1}'`
if ($w == 0) then
set ccc2 = "#"
echo "Error: can't find null model GD simulations plot data.
"
else
set ccc2 = ""
endif
@ w = `wc -l ref.mc.1.dat | awk '{print $1}'`
if ($w == 0) then
set ccc3 = "#"
echo "Error: can't find alternate model GD simulations plot data.
"
else
set ccc3 = ""
endif
@ w = `wc -l treesum.dat | awk '{print $1}'`
if ($w == 0) then
set ccc4 = "#"
echo "Error: can't find 'sum over tree' plot data.
"
else
set ccc4 = ""
endif
@ w1 = `wc -l ebm.0 | awk '{print $1}'`
@ w2 = `wc -l ebl.0 | awk '{print $1}'`
@ w3 = `wc -l ebu.0 | awk '{print $1}'`
@ w4 = `wc -l ebm.1 | awk '{print $1}'`
@ w5 = `wc -l ebl.1 | awk '{print $1}'`
@ w6 = `wc -l ebu.1 | awk '{print $1}'`
if ($w1 == 0 || $w2 == 0 || $w3 == 0 || $w4 == 0 || $w5 == 0 || $w6 == 0) then
set ccc5 = "#"
echo "Error: can't find 'reference v. non-reference pairs' EB plot data.
"
else
set ccc5 = ""
endif
@ w1 = `wc -l tebm.0 | awk '{print $1}'`
@ w2 = `wc -l tebl.0 | awk '{print $1}'`
@ w3 = `wc -l tebu.0 | awk '{print $1}'`
@ w4 = `wc -l tebm.1 | awk '{print $1}'`
@ w5 = `wc -l tebl.1 | awk '{print $1}'`
@ w6 = `wc -l tebu.1 | awk '{print $1}'`
if ($w1 == 0 || $w2 == 0 || $w3 == 0 || $w4 == 0 || $w5 == 0 || $w6 == 0) then
set ccc6 = "#"
echo "Error: can't find 'sum over tree' EB plot data.
"
else
set ccc6 = ""
endif
cat ../../../SCRIPTS/mcsim.R | \
sed 's/ddd/'$date'/g' | sed 's/lll/'$lambdasum'/g' | \
sed 's/#ccc1/'$ccc1'/g' | sed 's/#ccc2/'$ccc2'/g' | \
sed 's/#ccc3/'$ccc3'/g' | sed 's/#ccc4/'$ccc4'/g' | \
sed 's/#ccc5/'$ccc5'/g' | sed 's/#ccc6/'$ccc6'/g' | \
sed 's/%sss%/'$refseq'/g' | sed 's/%ggg%/'$title'/g' > plot2.R
R --no-save --slave < plot2.R >>& ../mclog.txt
epstopdf mcsim.eps
mv mcsim.png ../mcsim.$count1.png
mv mcsim.eps ../mcsim.$count1.eps
mv mcsim.pdf ../mcsim.$count1.pdf
sed 's/ccc/'$count1'/g' ../../../SCRIPTS/mcsim.plotdata.html \
> ../mcsim.plotdata.$count1.html
echo "Error bar data." > ../eb.$count1.txt
echo " " >> ../eb.$count1.txt
echo "non-reference divergence MLOGD frac frac frac frac frac" >> ../eb.$count1.txt
echo "sequence name (mean muts syn nonsyn N1 N2 N3" >> ../eb.$count1.txt
echo " per nt) muts muts muts muts muts" >> ../eb.$count1.txt
echo " " >> ../eb.$count1.txt
echo "Null model, medians." >> ../eb.$count1.txt
paste ref.names ebm.0 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt
awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebm.0 >> ../eb.$count1.txt
echo " " >> ../eb.$count1.txt
echo "Null model, 16th percentile." >> ../eb.$count1.txt
paste ref.names ebl.0 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt
awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebl.0 >> ../eb.$count1.txt
echo " " >> ../eb.$count1.txt
echo "Null model, 84th percentile." >> ../eb.$count1.txt
paste ref.names ebu.0 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt
awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebu.0 >> ../eb.$count1.txt
echo " " >> ../eb.$count1.txt
echo "Alternate model, medians." >> ../eb.$count1.txt
paste ref.names ebm.1 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt
awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebm.1 >> ../eb.$count1.txt
echo " " >> ../eb.$count1.txt
echo "Alternate model, 16th percentile." >> ../eb.$count1.txt
paste ref.names ebl.1 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt
awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebl.1 >> ../eb.$count1.txt
echo " " >> ../eb.$count1.txt
echo "Alternate model, 84th percentile." >> ../eb.$count1.txt
paste ref.names ebu.1 | awk '{printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7,$8}' >> ../eb.$count1.txt
awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' tebu.1 >> ../eb.$count1.txt
echo " " >> ../eb.$count1.txt
echo "Original reference - non-reference sequence data." > ../ref.dat.$count1.txt
echo " " >> ../ref.dat.$count1.txt
echo "non-reference divergence MLOGD frac frac frac frac frac" >> ../ref.dat.$count1.txt
echo "sequence name (mean muts syn nonsyn N1 N2 N3" >> ../ref.dat.$count1.txt
echo " per nt) muts muts muts muts muts" >> ../ref.dat.$count1.txt
echo " " >> ../ref.dat.$count1.txt
paste ref.names ref.dat | awk '{if (0.+$13>0.) printf "%20s %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$14/$13,$5-$4,$11,$12,$16,$17,$18}' >> ../ref.dat.$count1.txt
cat treesum.dat | awk '{printf " Sum over tree: %8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$1,$2,$3,$4,$5,$6,$7}' >> ../ref.dat.$count1.txt
echo "General distribution of statistics for null model simulations." > ../ref.mc0.$count1.txt
echo " " >> ../ref.mc0.$count1.txt
echo "divergence MLOGD frac frac frac frac frac" >> ../ref.mc0.$count1.txt
echo "(mean muts syn nonsyn N1 N2 N3" >> ../ref.mc0.$count1.txt
echo " per nt) muts muts muts muts muts" >> ../ref.mc0.$count1.txt
echo " " >> ../ref.mc0.$count1.txt
awk '{if (0.+$12>0.) printf "%10.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$13/$12,$4-$3,$10,$11,$15,$16,$17}' ref.mc.0.dat | sort -n \
>> ../ref.mc0.$count1.txt
echo "General distribution of statistics for alternate model simulations." > ../ref.mc1.$count1.txt
echo " " >> ../ref.mc1.$count1.txt
echo "divergence MLOGD frac frac frac frac frac" >> ../ref.mc1.$count1.txt
echo "(mean muts syn nonsyn N1 N2 N3" >> ../ref.mc1.$count1.txt
echo " per nt) muts muts muts muts muts" >> ../ref.mc1.$count1.txt
echo " " >> ../ref.mc1.$count1.txt
awk '{if (0.+$12>0.) printf "%10.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f\n",$13/$12,$4-$3,$10,$11,$15,$16,$17}' ref.mc.1.dat | sort -n \
>> ../ref.mc1.$count1.txt
echo "...Done
"
#------------------------------------------------------------------------------
#Do table of mcsim probs
cat ../../../SCRIPTS/mctableheader.txt > ../mcsim.table
paste ref.names ref.dat ref.mc.probs | awk '{if (0.+$13>0.) printf "