/* # Copyright (C) 2005 Andrew E Firth, University of Otago, Dunedin, # New Zealand, aef(at)sanger.otago.ac.nz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License (version 2) as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. */ // Programming for making running means of columns 4 and 5 in input logfile. // Command line input: file name and halfwindow size n (window is 2n + 1). // Leaves columns 1, 2 and 3 unchanged. // Assumes input file is in nucleotide order. // To skip gaps (etc) just miss those nucleotides out of the input file. #include #include #include #include #include #include using namespace std; #define maxlength 50000 int main(int argc, char* argv[]) { if (4 != argc) { cerr << "Aborting; use '" << argv[0] << " logfile window_half_size circular'.\n"; exit(EXIT_FAILURE); } int verbose = 0; int j, k, nlines, col1[maxlength], col2[maxlength], col3[maxlength]; int n, hfw, circular; double col4[maxlength], col5[maxlength], sum1, sum2; hfw = atoi(argv[2]); circular = atoi(argv[3]); // Initialize arrays for (k = 0; k < maxlength; ++k) { col1[k] = col2[k] = col3[k] = 0; col4[k] = col5[k] = 0.; } // Open log file. ifstream logfile(argv[1]); if (!logfile) { cerr << "Aborting: can't find log file '" << argv[1] << "'.\n"; exit(EXIT_FAILURE); } // Number of lines in log file nlines = -1; while (logfile.ignore(1000, '\n')) { ++nlines; } logfile.clear(); logfile.seekg(0); if (verbose) { cout << "There are " << nlines << " data lines.\n\n"; } // Read in logfile for (k = 0; k < nlines; ++k) { logfile >> col1[k]; logfile >> col2[k]; logfile >> col3[k]; logfile >> col4[k]; logfile >> col5[k]; logfile.ignore(1000, '\n'); } // Running mean for (k = 0; k < nlines; ++k) { sum1 = sum2 = 0.; n = 0; for (j = k - hfw; j <= k + hfw; ++j) { if (!circular && j >= 0 && j < nlines || circular) { sum1 += col4[(j+nlines)%nlines]; sum2 += col5[(j+nlines)%nlines]; n += 1; } } if (0 != n) { cout << col1[k] << " " << col2[k] << " " << col3[k] << " " << sum1/float(n) << " " << sum2/float(n) << "\n"; } else { cout << col1[k] << " " << col2[k] << " " << col3[k] << " " << 0 << " " << 0 << "\n"; } } }