/* # Copyright (C) 2005 Andrew E Firth, University of Otago, Dunedin, # New Zealand, aef(at)sanger.otago.ac.nz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License (version 2) as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. */ #include #include #include #include #include #include using namespace std; #define maxlength 50000 int main(int argc, char* argv[]) { int i, nlines, seq1g[maxlength], nseq1; char name[1000], inputseq1[maxlength], seqline[1000]; int orf1g = atoi(argv[2]); if (3 != argc) { cerr << "Usage '" << argv[0] << " gapped_sequence coordinate'.\n"; exit(EXIT_FAILURE); } // Initialize arrays. for (i = 0; i < maxlength; ++i) { seq1g[i] = 9; } // Read initial sequence files. ('U' or 'T' = 0, 'C' = 1, 'A' = 2, 'G' = 3, // unfilled part of array = 9, gaps ('-' and '.') = 8. utcga also accepted. // Also accepts standard ambiguous nt codes. Aborts if any other characters. ifstream sequence1(argv[1]); if (!sequence1) { cerr << "Aborting: can't find file '" << argv[1] << "'.\n"; exit(EXIT_FAILURE); } nlines = 0; while (sequence1.ignore(1000, '\n')) { ++nlines; } sequence1.clear(); sequence1.seekg(0); sequence1.getline(name, 1000, '\n'); for (i = 0; i < nlines - 1; ++i) { sequence1.getline(seqline, 1000, '\n'); if (0 == i) { strcpy(inputseq1, seqline); } else { strcat(inputseq1, seqline); } } sequence1.close(); for (i = 0; ; ++i) { if (inputseq1[i] == 'U' || inputseq1[i] == 'u') { seq1g[i] = 0; } else if (inputseq1[i] == 'T' || inputseq1[i] == 't') { seq1g[i] = 0; } else if (inputseq1[i] == 'C' || inputseq1[i] == 'c') { seq1g[i] = 1; } else if (inputseq1[i] == 'A' || inputseq1[i] == 'a') { seq1g[i] = 2; } else if (inputseq1[i] == 'G' || inputseq1[i] == 'g') { seq1g[i] = 3; } else if (inputseq1[i] == '.' || inputseq1[i] == '-') { seq1g[i] = 8; } else if (inputseq1[i] == 'M' || inputseq1[i] == 'm' || inputseq1[i] == 'R' || inputseq1[i] == 'r' || inputseq1[i] == 'W' || inputseq1[i] == 'w' || inputseq1[i] == 'V' || inputseq1[i] == 'v' || inputseq1[i] == 'H' || inputseq1[i] == 'h' || inputseq1[i] == 'D' || inputseq1[i] == 'd' || inputseq1[i] == 'B' || inputseq1[i] == 'b' || inputseq1[i] == 'S' || inputseq1[i] == 's' || inputseq1[i] == 'Y' || inputseq1[i] == 'y' || inputseq1[i] == 'K' || inputseq1[i] == 'k' || inputseq1[i] == 'N' || inputseq1[i] == 'n' || inputseq1[i] == 'X' || inputseq1[i] == 'x') { seq1g[i] = 5; } else if (inputseq1[i] == '\0') { nseq1 = i; break; } else { cerr << "\nAborting: unknown nucleotide '" << inputseq1[i] << "' at position " << i << " in original sequence.\n"; exit(EXIT_FAILURE); } } //Adjust orf1g for introduced gaps. orf1g -= 1; //Since C++ starts arrays from 0 for (i = 0; i <= orf1g; ++i) { if (seq1g[i] > 6) { orf1g++; } } orf1g += 1; //Since C++ starts arrays from 0 cout << orf1g << "\n"; }