#!/bin/csh
setenv _POSIX2_VERSION 199209
# Copyright (C) 2005 Andrew E Firth, University of Otago, Dunedin,
# New Zealand, aef(at)sanger.otago.ac.nz
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License (version 2) as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#Runs mlogd programme and makes plots.
limit stacksize unlimited
@ argc = `echo $argv | awk '{print NF}'`
if ($argc != 4) then
echo "Usage: domlogd newdir newurl mode number
"
exit 1
endif
set newdir = $1
set newurl = $2
cd $newdir
source mlogd.param
if (! $download) then
setenv PATH ${PATH}:/home/aef/bin
set formdir = "/aef/MLOGD/FORM"
else
setenv PATH ${PATH}:${HOME}/bin
set formdir = "../../FORM"
endif
set mode = $3
set number = $4
set halfwindow = 10
cp ../../SCRIPTS/mlenuc.plotdata.html .
if ($mode == "0") then
echo -n "Running MLOGD...
"
endif
#Estimated run-time
@ maxruntime = 200
set qseqs = `wc -l pairs.ref.dat | awk '{print $1+1}'`
set qpairs = `wc -l pairs.tree.dat | awk '{print $1}'`
set qquery = `head -1 setup.mlogd | awk '{print $3-$2+1}'`
@ runtime = `echo $qseqs $qpairs $qlength $qquery | awk '{print int(5+(0.000084*$2*$4)+(0.000007*$1*$3)+(0.00048*$1*$4))}'`
echo "Estimated run-time: $runtime seconds.
"
if ($runtime > $maxruntime) then
echo "
Estimated run-time is too long - your submission won't be processed.
"
echo "You could try using fewer input sequences (e.g. first construct a phylogenetic tree, and pick a subset of 'representative' sequences), fewer sequence pairs, a smaller nucleotide range (e.g. use the 'Only use nt within query CDS(s)' option), or spliting your query up into subsections.
"
echo "Maximum run-time is 200 seconds. Run-time estimated by 5 + (0.000084 x #pairs x nt-range) + (0.000007 x #sequences x sequence-length) + (0.00048 x #sequences x nt-range) seconds.
"
exit 1
endif
#Make refseq to align coords conversion file.
tail +2 seq.001.fasta | sed 's/./&\n/g' | grep "." | \
awk '{if ($1!="-") s+=1}{if ($1!="-") {printf "%6s %6s %6s\n",NR,s,$1} else \
{printf "%6s %6s %6s\n",NR,"-",$1}}' > coords.txt
#Run mlogd for ref-nonref pairs.
cp pairs.ref.dat pairs.dat
mlogd setup.mlogd >>& errorlog.txt || exit 1
foreach suffix (dat info MLElog treesum stoplog gaplog gappos stoppos1 stoppos2 startpos1 startpos2 glitches)
mv mlogd_out.$suffix ref.$suffix
touch ref.$suffix
end
set length = `head -1 ref.info | awk '{print $1}'`
set qbase1 = `head -1 ref.info | awk '{print $2}'`
set qbase2 = `head -1 ref.info | awk '{print $3}'`
set base1 = 1
set base2 = $length
awk '{printf "0 %4i %6i\n",$1,$2}' ref.stoppos1 > stops.data.txt
awk '{printf "1 %4i %6i\n",$1,$2}' ref.stoppos2 >> stops.data.txt
awk '{printf "0 %4i %6i\n",$1,$2}' ref.startpos1 > starts.data.txt
awk '{printf "1 %4i %6i\n",$1,$2}' ref.startpos2 >> starts.data.txt
awk '{printf "%4i %6i\n",$1,$2}' ref.gappos > gaps.data.txt
#Make running mean tracks for ref-nonref pairs.
# Note MLElog only written if not gapped in either sequence so runmean
# jumps gapped regions.
awk '{print $2}' ref.MLElog | sort | uniq > temp1
rm -f ref.runmean
touch ref.runmean
foreach nrseq (`cat temp1`)
awk '{if ($2=="'"$nrseq"'") print $0}' ref.MLElog > temp2
runmean2 temp2 $halfwindow $circular >> ref.runmean || exit 1
end
touch ref.runmean
rm -f temp[12]
#Run mlogd for tree pairs (both forward and backward pairs).
cp pairs.tree.dat pairs.dat
awk '{print $2,$1}' pairs.tree.dat >> pairs.dat
mlogd setup.mlogd >>& errorlog.txt || exit 1
foreach suffix (dat info MLElog treesum stoplog gaplog gappos stoppos1 stoppos2 startpos1 startpos2 glitches)
mv mlogd_out.$suffix tree.$suffix
touch tree.$suffix
end
set lambdasum = `head -2 tree.info | tail -1 | awk '{printf "%.2f\n",$1*0.25}'`
set ntmlesum = `awk '{s+=($4-$3)}END{printf "%.2f\n",s*0.25}' tree.dat`
#Scale treesum
awk '{print $1,$2,$3*0.25,$4*0.25,$5*0.25,$6}' tree.treesum > temp1
mv temp1 tree.treesum
#Make running mean tracks for ungapped regions of treesum
set npairs = `wc -l pairs.dat | awk '{print $1}'`
awk '{if ($2=="'"$npairs"'") print $1,$2,0,$4,$5}' tree.treesum \
> temp1
runmean2 temp1 $halfwindow $circular > tree.runmean || exit 1
touch tree.runmean
rm -f temp[12]
if ($mode == "0") then
echo "...Done
"
echo -n "Making plots...
"
endif
set refseq2 = `awk '{if ($1=="'"$refseq"'") print $2}' seqs.key`
#Null model orfs. Change from refseq to alignment coords.
@ norfs1 = `wc -l orfs.1 | awk '{print $1}'`
if ($norfs1 > 0) then
cut -d " " -f 2- orfs.1 | sed 's/[^0-9]/\n/g' | grep "[0-9]" \
> temp
rm -f temp1; touch temp1
foreach i (`cat temp`)
echo `ntadjust $refseq2.fasta $i` >> temp1
end
set orf1x1 = `awk '{if (NR%2==1) printf "%s,",$0}' temp1 | sed 's/,$//g'`
set orf1x2 = `awk '{if (NR%2==0) printf "%s,",$0}' temp1 | sed 's/,$//g'`
else
set orf1x1 = "0"
set orf1x2 = "0"
endif
rm -f temp
#Alternate model orfs. Change from refseq to alignment coords.
@ norfs2 = `wc -l orfs.2 | awk '{print $1}'`
if ($norfs2 > 0) then
cut -d " " -f 2- orfs.2 | sed 's/[^0-9]/\n/g' | grep "[0-9]" \
> temp
rm -f temp1; touch temp1
foreach i (`cat temp`)
echo `ntadjust $refseq2.fasta $i` >> temp1
end
set orf2x1 = `awk '{if (NR%2==1) printf "%s,",$0}' temp1 | sed 's/,$//g'`
set orf2x2 = `awk '{if (NR%2==0) printf "%s,",$0}' temp1 | sed 's/,$//g'`
else
set orf2x1 = "0"
set orf2x2 = "0"
endif
rm -f temp
#Process mlogd.R and plot.
set nseqs = `wc -l pairs.ref.dat | awk '{print $1+1}'`
set date = `date +%y-%m-%d`
set window = `echo $halfwindow | awk '{print 1+$1*2}'`
awk '{printf "%14.6f\n",sqrt(($5-$4)*($5-$4))}' ref.runmean > temp1
set yyyscale = `minmax temp1 | awk '{if (0.+$2>0.) {print 0.5/$2} else {print "0"}}'`
rm -f temp1
awk '{printf "%14.6f\n",$5-$4}' tree.runmean > temp1
echo 0 >> temp1
set y2min = `minmax temp1 | awk '{print $1}'`
set y2max = `minmax temp1 | awk '{print $2}'`
rm -f temp1
awk '{print $2}' pairs.ref.dat > temp1
rm -f temp2
touch temp2
foreach i (`cat temp1`)
set id = seq.$i
set name = `awk '{if ($2=="'"$id"'") print $1}' seqs.key`
echo $name >> temp2
end
set sss1 = `awk '{printf "%s,",(NR+1-0.5)/"'"$nseqs"'"}' temp2 | sed 's/,$//'`
set sss2 = `awk '{printf "@%s@,",$1}' temp2 | sed 's/,$//'`
set sss3 = `awk '{if (0.+$12>0.) {printf "%.2f,",$13/$12} else {printf "%.2f,","0"}}' ref.dat | sed 's/,$//'`
awk '{print $1}' temp2 > ref.names
rm -f temp[12]
#Check for empty files to edit out of R script to stop R from barfing.
@ w = `wc -l ref.dat | awk '{print $1}'`
if ($w == 0) then
set vvv1 = "#"
else
set vvv1 = ""
endif
@ w = `wc -l coords.txt | awk '{print $1}'`
if ($w == 0) then
set kkk0 = "#"
else
set kkk0 = ""
endif
@ w = `wc -l ref.MLElog | awk '{print $1}'`
if ($w == 0) then
set kkk1 = "#"
else
set kkk1 = ""
endif
@ w = `wc -l ref.stoppos1 | awk '{print $1}'`
if ($w == 0) then
set kkk2 = "#"
else
set kkk2 = ""
endif
@ w = `wc -l ref.stoppos2 | awk '{print $1}'`
if ($w == 0) then
set kkk3 = "#"
else
set kkk3 = ""
endif
@ w = `wc -l ref.gappos | awk '{print $1}'`
if ($w == 0) then
set kkk4 = "#"
else
set kkk4 = ""
endif
@ w = `wc -l tree.treesum | awk '{print $1}'`
if ($w == 0) then
set kkk5 = "#"
else
set kkk5 = ""
endif
@ w = `wc -l ref.runmean | awk '{print $1}'`
if ($w == 0) then
set kkk6 = "#"
else
set kkk6 = ""
endif
@ w = `wc -l tree.runmean | awk '{print $1}'`
if ($w == 0) then
set kkk7 = "#"
else
set kkk7 = ""
endif
if ($mode == "0") then
set kkk8 = "#"
set kkk9 = ""
else
set kkk8 = ""
set kkk9 = "#"
endif
cat ../../SCRIPTS/mlogd.R | \
sed 's/nnn/'$nseqs'/g' | sed 's/ddd/'$date'/g' | \
sed 's/bbb1/'$base1'/g' | sed 's/bbb2/'$base2'/g' | \
sed 's/bbbq1/'$qbase1'/g' | sed 's/bbbq2/'$qbase2'/g' | \
sed 's/www/'$window'/g' | sed 's/lll/'$lambdasum'/g' | \
sed 's/orf1x1/'$orf1x1'/g' | sed 's/orf1x2/'$orf1x2'/g' | \
sed 's/orf2x1/'$orf2x1'/g' | sed 's/orf2x2/'$orf2x2'/g' | \
sed 's/yyyscale/'$yyyscale'/g' | sed 's/fff/'$ntmlesum'/g' | \
sed 's/#vvv1/'$vvv1'/g' | sed 's/#kkk0/'$kkk0'/g' | \
sed 's/#kkk1/'$kkk1'/g' | sed 's/#kkk2/'$kkk2'/g' | \
sed 's/#kkk3/'$kkk3'/g' | sed 's/#kkk4/'$kkk4'/g' | \
sed 's/#kkk5/'$kkk5'/g' | sed 's/#kkk6/'$kkk6'/g' | \
sed 's/#kkk7/'$kkk7'/g' | \
sed 's/#kkk8/'$kkk8'/g' | sed 's/#kkk9/'$kkk9'/g' | \
sed 's/yyy2max/'$y2max'/g' | sed 's/yyy2min/'$y2min'/g' | \
sed 's/sss3/'$sss3'/g' | sed 's/sss1/'$sss1'/g' | \
sed 's/sss2/'$sss2'/g' | sed 's/@/"/g' | \
sed 's/%sss%/'$refseq'/g' | sed 's/%ggg%/'$title'/g' > plot1.R
R --no-save --slave < plot1.R >>& errorlog.txt
epstopdf MLEnuc.eps
epstopdf MLEscatter.eps
if ($mode == "0") then
echo "...Done