// This software is Copyright (C) 2005 by Andrew Firth, University of Otago, // Dunedin, New Zealand. All rights reserved. // Whereas ntadjust.cxx adjusts coords in the ungapped sequence to coords in // the gapped sequence, this programme adjusts coords in the gapped sequence // to coords in the ungapped sequence. #include #include #include #include #include #include using namespace std; #define maxlength 50000 int main(int argc, char* argv[]) { int i, nlines, seq1g[maxlength], nseq1, count; char name[1000], inputseq1[maxlength], seqline[1000]; int orf1g = atoi(argv[2]); if (3 != argc) { cerr << "Usage '" << argv[0] << " gapped_sequence coordinate'.\n"; exit(EXIT_FAILURE); } // Initialize arrays. for (i = 0; i < maxlength; ++i) { seq1g[i] = 9; } // Read initial sequence files. ('U' or 'T' = 0, 'C' = 1, 'A' = 2, 'G' = 3, // unfilled part of array = 9, gaps ('-' and '.') = 8. utcga also accepted. // Also accepts standard ambiguous nt codes. Aborts if any other characters. ifstream sequence1(argv[1]); if (!sequence1) { cerr << "Aborting: can't find file '" << argv[1] << "'.\n"; exit(EXIT_FAILURE); } nlines = 0; while (sequence1.ignore(1000, '\n')) { ++nlines; } sequence1.clear(); sequence1.seekg(0); sequence1.getline(name, 1000, '\n'); for (i = 0; i < nlines - 1; ++i) { sequence1.getline(seqline, 1000, '\n'); if (0 == i) { strcpy(inputseq1, seqline); } else { strcat(inputseq1, seqline); } } sequence1.close(); for (i = 0; ; ++i) { if (inputseq1[i] == 'U' || inputseq1[i] == 'u') { seq1g[i] = 0; } else if (inputseq1[i] == 'T' || inputseq1[i] == 't') { seq1g[i] = 0; } else if (inputseq1[i] == 'C' || inputseq1[i] == 'c') { seq1g[i] = 1; } else if (inputseq1[i] == 'A' || inputseq1[i] == 'a') { seq1g[i] = 2; } else if (inputseq1[i] == 'G' || inputseq1[i] == 'g') { seq1g[i] = 3; } else if (inputseq1[i] == '.' || inputseq1[i] == '-') { seq1g[i] = 8; } else if (inputseq1[i] == 'M' || inputseq1[i] == 'm' || inputseq1[i] == 'R' || inputseq1[i] == 'r' || inputseq1[i] == 'W' || inputseq1[i] == 'w' || inputseq1[i] == 'V' || inputseq1[i] == 'v' || inputseq1[i] == 'H' || inputseq1[i] == 'h' || inputseq1[i] == 'D' || inputseq1[i] == 'd' || inputseq1[i] == 'B' || inputseq1[i] == 'b' || inputseq1[i] == 'S' || inputseq1[i] == 's' || inputseq1[i] == 'Y' || inputseq1[i] == 'y' || inputseq1[i] == 'K' || inputseq1[i] == 'k' || inputseq1[i] == 'N' || inputseq1[i] == 'n' || inputseq1[i] == 'X' || inputseq1[i] == 'x') { seq1g[i] = 5; } else if (inputseq1[i] == '\0') { nseq1 = i; break; } else { cerr << "\nAborting: unknown nucleotide '" << inputseq1[i] << "' at position " << i << " in original sequence.\n"; exit(EXIT_FAILURE); } } //Adjust orf1g for introduced gaps. count = 0; //Counts number of gaps up to orf1g orf1g -= 1; //Since C++ starts arrays from 0 for (i = 0; i <= orf1g; ++i) { if (seq1g[i] > 6) { count++; } } orf1g += 1; //Since C++ starts arrays from 0 orf1g -= count; cout << orf1g << "\n"; }