require "rnahybrid_module.pm";
require "rnafold_module.pm";
require "iupac_module.pm";
use List::MoreUtils qw(first_index);

sub run_struct_calc_dir {
	my ($dir, $mfe_cutoff, $h_constr, $hcs, $hce, $tt_skip, $tail_cut, $session) = @_;
	
	my $lookup = "$dir/" . "rh_lookup.csv";
	
	open(CSV, $lookup);
	my @csv_rows = <CSV>;
	close(CSV);
	
	open(CSVR, ">>$dir/genome_tracr_summary.csv");
	
	print CSVR "ORGANISM,CONTIG_ID,RR_CONTIG,RR_ID,CD_SCORE,REPEAT_SEQ,RR_START,RR_END,RR_ORI,CAS9_INFO," .
	            "BLAST_HSP_START,BLAST_HSP_END,BLAST_EVALUE,RC_OF_HSP_EXTRACT,". 
	            "HSP_EXTRACT_ID,RH_MFE,HYBRID_SEQS,HYBRID_STRUCT,TAIL_SEQ,TAIL_STRUCT,REPEAT_TRACR,RH_RF_STRUCT,RCF_STRUCT," .
	            "TRACR_RNA_SEQ,TRACR_START,TRACR_END,TRACR_ORI\n";
	
	my $count = 0;
	for (my $i = 0; $i < scalar(@csv_rows); $i++) {
		my $lookup_line = @csv_rows[$i];
		$lookup_line =~ s/\n//g;
		
		my @lookup_toks = split(/\,/, $lookup_line);
		my $num_lookup_toks = scalar(@lookup_toks);
		
		my $tpath = @lookup_toks[$num_lookup_toks - 1];
		my $qpath = @lookup_toks[$num_lookup_toks - 2];
		my $rc = @lookup_toks[$num_lookup_toks - 3];
		my ($ext_start) = ($tpath =~ /.*_GE_(\d+)_\d+_.*/);
		
		my $l_tpath = $tpath;
		$tpath =~ s/\.fa$/_loc\.fa/g;
		
		my @rna_struct_out = run_struct_calc($dir, $qpath, $tpath, $ext_start, $rc, $mfe_cutoff, $h_constr, $hcs, $hce, $l_tpath, $tt_skip, $tail_cut);
		
		my $str = '';
		for (my $j = 0; $j < scalar(@lookup_toks) - 2; $j++) {
			$str .= @lookup_toks[$j];
			$str .= ",";
		}
		
		for (my $j = 0; $j < scalar(@rna_struct_out); $j++) {
			my $rsline = @rna_struct_out[$j];
			my @r_parts = split(/\,/, $rsline);
		
			if (@r_parts[5] !~ /[\(\)]/) {
				next;
			}
			
			print CSVR $str . $rsline . "\n";
			
			#my $contig = @lookup_toks[1];
			#my $start = @r_parts[10];
			#my $end = @r_parts[11];
		
			#my $contig_fa =  "$dir/" . $contig . ".fa"; 
			#open (CTG, $contig_fa);
			#my @ctg_contents = <CTG>;
			#close(CTG);
		
			#my $seq = @ctg_contents[1];
			#$seq =~ s/\n//g;
		
			#if ($end < $start) {
			#	my $temp = $start;
			#	$start = $end;
			#	$end = $temp;
			#}
		
			#my $ctg_start = $start - 200;
			#if ($ctg_start < 0) {
			#	$ctg_start = 0;
			#}
		
			#my $ctg_end = $end + 200;
			#if ($ctg_end > (length($seq) - 1)) {
			#	$ctg_end = length($seq) - 1;
			#}
		
			#$count++;
			#my $id = "TRACR_" . $count . "_" . $start . "_" . $end;
		
			#my $seq_ext = substr($seq, $ctg_start, $ctg_end - $ctg_start + 1);
		
			#open(FAFLNK, ">>$dir/". $id . "_flank.fa");
			#print FAFLNK ">" . $id . "_flank\n";
			#print FAFLNK $seq_ext . "\n";
			#close(FAFLNK);
		
			#my $flag = crispr_call_rerun($dir, $id, $session);
		}	
	}
	close(CSVR);
	return 1;
}

sub run_struct_calc {
	my ($dir, $query, $target, $ext_start, $rc, $mfe_cutoff, $h_constr, $hcs, $hce, $long_target, $tt_skip, $tail_cut) = @_;
	
	open(seqs, $target) or die "can't open txt file";
	open(reps, $query) or die "can't open txt file";
	open(lseqs, $long_target) or die "can't open txt file";
	
	my @seq_contents = <seqs>;
	my $seq_h = @seq_contents[0];
	my $seq = @seq_contents[1];
	$seq =~ s/\n//g;
	
	my @l_seq_contents = <lseqs>;
	my $l_seq_h = @l_seq_contents[0];
	my $l_seq = @l_seq_contents[1];
	$l_seq =~ s/\n//g;

	if ($rc > 0) {
		$seq =~ tr/ATCG/TAGC/;
		$seq = reverse($seq);
		
		$l_seq =~ tr/ATCG/TAGC/;
		$l_seq = reverse($l_seq);
	}

	my @seq_contents_rep = <reps>;
	my $rep_h = @seq_contents_rep[0];
	my $rep = @seq_contents_rep[1];
	
	my $tpath = "$dir/" . "temp_target.fa";
	my $qpath = "$dir/" . "temp_query.fa";
	
	open(out1, ">>$tpath") or die "can't open fa file";
	print out1 @seq_contents[0];
	print out1 $seq . "\n";

	open(out2, ">>$qpath") or die "can't open fa file";
	print out2 @seq_contents_rep[0];
	print out2 $rep;
	
	my $mq = length($rep) + 5;
	my $mt = length($seq) + 5;
	
	my $outpath = "$dir/" . "temp_out.txt";
	my $cmd = "RNAhybrid -t $tpath -q $qpath -m $mt -n $mq -d xi,theta -b 1 -c"; 
	
	$cmd .= " -e $mfe_cutoff";
	
	if ($h_constr == 1) {
		$cmd .= " -f $hcs,$hce";
	}
	
	$cmd .= " > $outpath";
	
	my $flag = system($cmd);
	my @out_table = parse_rnahybrid_output($outpath);
	
	my @outdata = ();
	for ($i = 0; $i < scalar(@out_table); $i++) {
		my $hybrid = $out_table[$i][7];
		$hybrid =~ s/N//g;
	
		$out_table[$i][5] = get_exact_start($l_seq, $hybrid, $out_table[$i][5] + index($l_seq, $seq));
		my $tail_seq = get_sl_seq($l_seq, $hybrid, $out_table[$i][5] + index($l_seq, $seq), $tt_skip, $tail_cut);
	
		if (length($tail_seq) >= 2) {
			open(out3, ">>$dir/seqin.fa") or die "can't create fa file";
			open(out4, ">>$dir/rcfin.fa") or die "can't create fa file";
			print out3 @seq_contents[$ind_header];
			print out3 $tail_seq;
			print out4 $out_table[$i][6] . "&" . $hybrid . $tail_seq;
		
			my $cmd2 = "RNAfold -p -d2 --noLP -P andronescu_2007.par < $dir/seqin.fa > ".
		        "$dir/" . "temp_rf_out.txt";
			my $flag2 = system($cmd2);
		
			my $cmd3 = "RNAcofold -a -d2 --noLP -P andronescu_2007.par < $dir/rcfin.fa > ".
		        "$dir/" . "temp_rcf_out.txt";
			my $flag3 = system($cmd3);
		
			my @rf_out = parse_rnafold_output("$dir/" . "temp_rf_out.txt");
			my @rcf_out = parse_rnacofold_output("$dir/" . "temp_rcf_out.txt");
		
			my $outstr =  $out_table[$i][0] . "," . $out_table[$i][3] . "," . 
		      	          $out_table[$i][6] . "&" . $hybrid . "," . 
		                  $out_table[$i][8] . "&" . $out_table[$i][9] . "," .
		                  $tail_seq . "," . @rf_out[0] . ",".
		                  $out_table[$i][6] . "&" . $hybrid . $tail_seq . "," .
		                  $out_table[$i][8] . "&" . $out_table[$i][9] . @rf_out[0] . "," .
		                  @rcf_out[0] . "," . $hybrid . $tail_seq;
		    
		    my $true_pos = $ext_start + $out_table[$i][5] + 1;
			if ($rc > 0) {
				#$true_pos = $ext_start + ($out_table[$i][1] - $out_table[$i][5]) + 1;
				$true_pos = $ext_start + (length($l_seq) - $out_table[$i][5]) + 1;
			}
			
			my $hyb_len = length($hybrid);
			my $tail_len = length($tail_seq);
			my $tracr_start = $true_pos;
			my $tracr_end =  $true_pos + $hyb_len + $tail_len;
			if ($rc > 0) {
				$tracr_start = $true_pos - $hyb_len - $tail_len;
				$tracr_end =  $true_pos;
			}
			
			$outstr .= ",";
			$outstr .= "$tracr_start";
			$outstr .= ",";
			$outstr .= "$tracr_end";
			$outstr .= ",";
			$outstr .= "$rc";
			
			print "RNAhybrid finds an anti-repeat: " . $out_table[$i][6] . "&" . $hybrid . 
		          " MFE: " . $out_table[$i][3] . "\n";
			push(@outdata, $outstr);
			
			unlink("$dir/seqin.fa");
			unlink("$dir/rcfin.fa");
			unlink("$dir/" . "temp_rf_out.txt");
			unlink("$dir/" . "temp_rcf_out.txt");
		
			my @dir = glob("*.ps");
			for ($j = 0; $j < scalar(@dir); $j++) {
				if (-e @dir[$j]) {
					unlink(@dir[$j]);
				}
			}
		
			close(out3);
			close(out4);
		} else {
			push(@outdata, ",,,,,,,,,,,,");
		}
	}

	unlink($outpath);
	unlink($tpath);
	unlink($qpath);
	close(seqs);
	close(reps);
	close(out1);
	close(out2);
	
	return @outdata;
}

sub patscan_tail {
	my($dir, $hybrid, $tail, $tail_pat_file) = @_;
	
	my $hyb_last = substr($hybrid, length($hybrid) - 1, 1);
	my $seq_test = $hyb_last . $tail;
	#my $seq_test = $tail;
	
	open(SFM_IN, ">>$dir/seq_sfm.fa") or die "can't create FA file";
	print SFM_IN '>test' . "\n";
	print SFM_IN $seq_test;
	close(SFM_IN);
	
	my $cmd = "scan_for_matches $tail_pat_file < $dir/seq_sfm.fa > $dir/sfm_out.txt";
	my $flag = system($cmd);
	
	open(SFM_OUT, "$dir/sfm_out.txt") or die "can't open TXT file";
	my @sfm_content = <SFM_OUT>;
	close(SFM_OUT);
	
	my $rfm_out_line = '';
	my $rfm_first_line = '';
	
	for (my $i = 0; $i < scalar(@sfm_content); $i++) {
		my $line = @sfm_content[$i];
		if ($i == 0) {
			$rfm_first_line = $line;
		}
		if ($line =~ /\:\[\d+\,\d+\]/) {
			if ($i + 1 < scalar(@sfm_content)) {
				my $next_line = @sfm_content[$i + 1];
				$next_line =~ s/^\s+//;
				$rfm_out_line = $next_line;
				last;
			} else {
				next;
			}
		} else {
			next;
		}
	}
	
	unlink("$dir/sfm_out.txt");
	unlink("$dir/seq_sfm.fa");
	
	return $rfm_out_line;
}

sub get_sfm_pat {
	my ($path) = @_;
	open(PAT, $path) or die "can't open TXT file";
	my @pat_content = <PAT>;
	close(PAT);
	
	my $result = '';
	
	for (my $i = 0; $i < scalar(@pat_content); $i++) {
		my $line = @pat_content[$i];
		$line =~ s/\n//g;
		$result .= ($line . ";");
	}
	
	return $result;
}

sub search_tail_pattern {
	my ($dir, $tail_pat_file) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary_classified.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(DAT, "lookup_pat.dat");
	my @pat_contents =<DAT>;
	close(DAT);
	
	open(GFFR, ">>$dir/genome_tracr_summary_classified_2.csv");
	for(my $i = 0; $i < scalar(@contents); $i++) {
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		if ($i == 0) {
			$line .= ",";
			for (my $j = 0; $j < scalar(@pat_contents); $j++) {
				$line .= "TAIL_SIG_$j,PAT_" . $j . "_MATCHED,"
			}
			$line .= "TAIL_SIG_USER,PAT_USER_MATCHED,KPAT_FOUND\n";
			print GFFR $line;
			next;
		}
		
		my @row_toks = split(/\,/, $line);
		my $hybrid =  @row_toks[16];
		my $tail =  @row_toks[18];
		
		my $count = 0;
		$line .= ",";
		for (my $j = 0; $j < scalar(@pat_contents); $j++) {
			my $datline = @pat_contents[$j];
			$datline =~ s/\n//g;
			
			my @ptoks = split('#', $datline);
			my $pp = patscan_tail($dir, $hybrid, $tail, @ptoks[1]);
			my $bb = 0;
			if (length($pp) > 0) {
				$bb = 1;
				$count++;
			} else {
				$pp = "<none>";
			}
			$line .= "$bb,$pp,";
		}
		
		my $kpatb = 0;
		if ($count > 0) {
			$kpatb = 1; 
		}
		
		my $pat = patscan_tail($dir, $hybrid, $tail, $tail_pat_file);
		$pat =~ s/\n//g;
		
		my $b = 0;
		if (length($pat) > 0) {
			$b = 1;
		}
		
		if ($b == 0) {
			$pat = "<none>";
		}
		
		$line .= "$b,$pat,$kpatb";
		$line =~ s/\n//g;
		print GFFR $line . "\n";
	}
	close(GFFR);
	
	unlink("$dir/genome_tracr_summary_classified.csv");
	my $flag = system("mv $dir/genome_tracr_summary_classified_2.csv $dir/genome_tracr_summary_classified.csv");
	return 1;
}

sub parse_tail_fold {
	my ($dir) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(GFFR, ">>$dir/genome_tracr_summary_2.csv");
	for(my $i = 0; $i < scalar(@contents); $i++) {
		my $line = @contents[$i];
		$line =~ s/\n//g;
		my @toks = split(/\,/, $line);
		
		if ($i == 0) {
			$line .= ",TAIL_FOLD_SUMMARY,NUM_H,MIN_H,MAX_H,FIRST_H," . 
			         "NUM_C,MIN_C,MAX_C,FIRST_C,NUM_L,MIN_L,MAX_L,FIRST_L," . 
			         "TETRA_T_END,XRAY_FOLD\n";
			print GFFR $line;
			next;
		}
		
		my $tail = @toks[19];
		my $tail_seq = @toks[18];
		my $tail_fold_str = parse_dbstr($tail);
		my $b_tetra = 0;
		
		my @tail_fold_str_toks = split(/\,/, $tail_fold_str);
		my $nh = @tail_fold_str_toks[1];
		my $nc = @tail_fold_str_toks[5];
		my $minh = @tail_fold_str_toks[2];
		my $maxh = @tail_fold_str_toks[3];
		
		$tail_seq =~ s/\n//g;
		if ($tail_seq =~ /[ATCGN]+TTTT$/g) {
			$tail_fold_str .= ",1";
			$b_tetra = 1;
		} else {
			$tail_fold_str .= ",0";
		}
		
		if (($minh >= 8) && ($maxh <= 40) &&
		    ($nh == 2 || $nh == 3) && ($nc == 0) && 
		    ($b_tetra > 0)) {
			$tail_fold_str .= ",1\n";
		} else {
			$tail_fold_str .= ",0\n";
		}
		
		$line .= ",";
		$line .= $tail_fold_str;
		print GFFR $line;
	}
	
	unlink("$dir/genome_tracr_summary.csv");
	my $flag = system("mv $dir/genome_tracr_summary_2.csv $dir/genome_tracr_summary.csv");
	return 1;
}

sub generate_tracr_gff {
	my ($dir, $p_cutoff) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary_classified.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(GFFR, ">>$dir/genome_tracr_gff.gff");
	
	my @gfflines = ();
	my @pps = ();
	for(my $i = 0; $i < scalar(@contents); $i++) {
		if ($i == 0) {
			next;
		}
	
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		my @row_toks = split(/\,/, $line);
		my $seqname = @row_toks[1];
		my $source = "TracrApp";
		my $feature = "transcript";
		my $start = @row_toks[24];
		my $end = @row_toks[25];
		my $score = @row_toks[54] * 100.0;
		
		my $strand = "+";
		if (@row_toks[26] == 1) {
			$strand = "-";
		}
		
		my $gene_gff_path = "$dir/" . $seqname . "_gene.gff";
		my $cmd = "grep $pid $gene_gff_path";
		
		my $frame = ".";
		my $id = "TRACR_" . $i . "_" . $start . "_" . $end;
		my $name = "TRACR_RNA_" . $i;
		my $note = "CDREF " . @row_toks[3] . " HYBRID " . @row_toks[16] . " MFE " . @row_toks[15]; 		
		my $attr = "ID=" . $id . ";Name=" . $name . ";Note=" . $note;
		
		my $gff_line = "$seqname\t$source\t$feature\t$start\t$end\t$score\t$strand\t$frame\t$attr";
		push(@gfflines, $gff_line);
		push(@pps, @row_toks[54]);
		
		my $line_next = "####,####\n";
		if ($i < scalar(@contents) - 1) {
			$line_next = @contents[$i + 1];
		}
		
		my @row_toks_next = split(/\,/, $line_next);
		my $seqname_next = @row_toks_next[1];
		if ($seqname_next ne $seqname) {
			my $cmd = 'grep source ' . $dir . '/' . $seqname . '.gff';
			my @grep_result = `$cmd`;
			my $line0 = @grep_result[0];
			$line0 =~ s/\n//g;
			
			my $org_name = 'Unknown species';
			if ($line0 =~ /source.*\;organism=.*/) {
				my ($org_name_) = ($line0 =~ /source.*\;organism=(.*)\s*/);
				$org_name = $org_name_;
				$org_name =~ s/\,//g;
			}
			
			my @gtoks = split(/[\t]/, $line0);
			
			my $g_gff_line = "";
			for (my $j = 0; $j < 8; $j++) {
				$g_gff_line .= @gtoks[$j];
				$g_gff_line .= "\t";			
			}
			
			my $g_attr = "ID=" . $seqname . ";Name=Contig_" . $seqname . ";Organism=" . $org_name;
			$g_gff_line .= $g_attr;
			print GFFR $g_gff_line . "\n";
			
			for (my $j = 0; $j < scalar(@gfflines); $j++) {
				if (@pps[$j] < $p_cutoff) {
					next;
				}
				my $gline = @gfflines[$j];
				$gline .= ";Parent=";
				$gline .= $seqname;
				print GFFR $gline . "\n";
			}
			
			open(RR_GFF, $dir . '/' . $seqname . '_repeat.gff');
			my @rr_contents = <RR_GFF>;
			close(RR_GFF);
			
			for (my $j = 0; $j < scalar(@rr_contents); $j++) {
				my $rr_line = @rr_contents[$j];
				$rr_line =~ s/\n//g;
				
				if ($rr_line =~ /repeat_region/) {
					$rr_line .= ";Parent=";
					$rr_line .= $seqname;
					print GFFR $rr_line . "\n";
				} else {
					print GFFR $rr_line . "\n";
				}
			}		
			@gfflines = ();
			@pps = ();
		}
	}
	
	return 1;
}

sub generate_tracr_txt {
	my ($dir, $p_cutoff, $tail_pat_file, $session) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary_classified.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(DAT, "lookup_pat.dat");
	my @pat_contents =<DAT>;
	close(DAT);
	
	my ($gcf) = ($dir =~ /.*(GCF_\d+_\d+)_.*/);
	$gcf =~ s/_/\./g;
	
	open(TXTOUT, ">>$dir/genome_tracr_txt.txt");
	open(MFA, ">>$dir/genome_tracr_seqs.fa");
	for(my $i = 0; $i < scalar(@contents); $i++) {
		if ($i == 0) {
			next;
		}
		
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		my @row_toks = split(/\,/, $line);
		my $organism = @row_toks[0];
		my $contig = @row_toks[1];
		my $start = @row_toks[24];
		my $end = @row_toks[25];
		my $prob = @row_toks[54];
		my $mfe = @row_toks[15];
		my $id = "TRACR_" . $i . "_" . $start . "_" . $end;
		
		my $hybrid =  @row_toks[16];
		my $hybrid_struct =  @row_toks[17];
		my $tail =  @row_toks[18];
		my $tail_struct =  @row_toks[19];
		
		my $rr_id = @row_toks[3];
		my $rr_rc = "-";
		if ($rr_id =~ /\d+_RC/) {
			$rr_rc = "+";
		}
		
		my $rc = "+";
		if (@row_toks[26] == 0) {
			$rc = "-";
		}
		
		my $rep = @row_toks[5];
		my $rcstr = $rr_rc . "/" . $rc;
		
		my @pats = ();
		for (my $j = 0; $j < scalar(@pat_contents); $j++) {
			my $datline = @pat_contents[$j];
			$datline =~ s/\n//g;
			
			my @ptoks = split('#', $datline);
		
			my $found = @row_toks[(2 * $j) + 56];
			if ($found == 1) {
				my $match_pat = get_sfm_pat(@ptoks[1]);
				my $desc = @ptoks[2];
				my $str = $desc .": " . $match_pat;
				push(@pats, $str);
			}
		}
		
		my $sp_pat = @row_toks[scalar(@row_toks) - 3];
		my $sp_pat_found = "No";
		my $sp_pat_match = "";
		if ($sp_pat == 1) {
			my $sp_pat_match = get_sfm_pat($tail_pat_file);
			my $str = "User pattern" .": " . $sp_pat_match;
			push(@pats, $str);
		}
		
		if (scalar(@pats) > 0) {
			$sp_pat_found = "Yes";
			for (my $j = 0; $j < scalar(@pats); $j++) {
				$sp_pat_match .= @pats[$j];
				$sp_pat_match .= "\n";
			}
		} else {
			$sp_pat_match = "NA\n";
		}
		
		if (@row_toks[54] < $p_cutoff) {
			next;
		}
		
		my $b_xray = @row_toks[53];
		my $xray = "No";
		if ($b_xray == 1) {
			$xray = "Yes";
		}
		
		my $strfold = @row_toks[39];
		
		my $overlap_str = "No";
		my $overlap = @row_toks[71];
		my $overlap_gene = @row_toks[72];
		if ($overlap > 0) {
			$overlap_str = "Yes ($overlap_gene)";
		}
		
		print TXTOUT "          TracrRNA prediction $i, ID: $id, Predicted by TracrRNAFinder 0.00\n" .
		      "====================================================================================================\n" .
		      "Organism:                                           $organism\n" . 
		      "Contig ID:                                          $contig\n". 
		      "Position:                                           [$start, $end]\n" .
		      "Repeat sequence (predicted by CRISPRDetect 2.1):    $rep\n" .
		      "Hybrid free energy:                                 $mfe\n" .
		      "True-positive probability (hybrid SVM):             $prob\n" .
		      "Reverse complement used (repeat/anti-repeat):       $rcstr\n" . 
		      "Known conserved motifs in the tail (by patscan):    $sp_pat_found\n" .
		      "Has 2-3 8-40 nt hairpins in the tail:               $xray\n" .
		      "Tail folding:                                       $strfold\n" .
		      "Overlapping with an annotated gene:                 $overlap_str\n".
		      "====================================================================================================\n" .
		      "\n" .
		      "Hybrid structure (repeat & anti-repeat): \n".
		      "$hybrid\n" .
		      "$hybrid_struct\n" .
		      "\n" .
		      "Tail structure: \n" .
		      "$tail\n" .
		      "$tail_struct\n" .
		      "\n" . 
		      "Known conserved motif matched: \n" .
		      "$sp_pat_match" .
		      "\n" .
		      "//\n\n"; 
		
		my @strs = split("&", $hybrid);
		my $ar = @strs[1];
		
		print MFA ">$id\n";
		print MFA $ar . $tail . "\n";    
	}
	close(TXTOUT);
	close(MFA);
	return 1;
}

sub add_Cas9_data {
	my ($dir) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(GFFR, ">>$dir/genome_tracr_summary_2.csv");
	for(my $i = 0; $i < scalar(@contents); $i++) {
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		if ($i == 0) {
			$line .= ",N_CONTIG,CONTIG_LEN,CAS9_START,CAS9_END,CAS9_ID,CAS9_ORI,CAS9_PRED_TYPE\n";
			print GFFR $line;
			next;
		}
		
		
		my @row_toks = split(/\,/, $line);
		my $contig = @row_toks[1];
		
		my $cmd = "grep $contig $dir/cas9_genome_summary.csv";
		my @results = `$cmd`;
		
		if (scalar(@results) == 0) {
			$line .= ",,,,,,,\n";
			print GFFR $line;
			next;		
		}
		
		my $cas9_line = @results[0];
		
		$cas9_line =~ s/\n//g;
		
		my @toks = split(/\,/, $cas9_line);
		
		my $str = "";
		for (my $j = 1; $j < (scalar(@toks) - 1); $j++) {
			$str .= ",";
			$str .= @toks[$j];
		}
		$str .= "\n";
		$str =~ s/NA//g;
		$line .= $str;
		print GFFR $line;
	}
	close(GFFR);
	
	unlink("$dir/genome_tracr_summary.csv");
	my $flag = system("mv $dir/genome_tracr_summary_2.csv $dir/genome_tracr_summary.csv");
	return 1;
}

sub add_cas_classification {
	my ($dir) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary_classified.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(GFFR, ">>$dir/genome_tracr_summary_classified_2.csv");
	
	my $n = 0;
	for(my $i = 0; $i < scalar(@contents); $i++) {
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		my @row_toks = split(/\,/, $line);
		
		if ($i == 0) {
			$line .= ",CAS_CLASS\n";
			print GFFR $line;
			$n = scalar(@row_toks);
			next;
		}
		
		my @row_toks = split(/\,/, $line);
		my $contig = @row_toks[1];
		
		my $cmd = "grep $contig $dir/cas9_genome_summary.csv";
		my @results = `$cmd`;
		
		if (scalar(@results) == 0) {
			$line .= ",unknown\n";
			print GFFR $line;
			next;		
		}
		
		my $cas9_line = @results[0];
		
		$cas9_line =~ s/\n//g;
		
		my @toks = split(/\,/, $cas9_line);
		my $n = scalar(@toks);
		
		$line .= "," . @toks[$n - 1] . "\n";
		print GFFR $line;
	}
	close(GFFR);
	
	unlink("$dir/genome_tracr_summary_classified.csv");
	my $flag = system("mv $dir/genome_tracr_summary_classified_2.csv $dir/genome_tracr_summary_classified.csv");
	return 1;
}

sub add_distance_data {
	my ($dir) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(GFFR, ">>$dir/genome_tracr_summary_2.csv");
	
	my $n = 0;
	for(my $i = 0; $i < scalar(@contents); $i++) {
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		my @row_toks = split(/\,/, $line);
		
		if ($i == 0) {
			$line .= ",DIST_TRACR_CAS,DIST_TRACR_RR,DIST_RR_CAS\n";
			print GFFR $line;
			$n = scalar(@row_toks);
			next;
		}
		
		my $contig_tracr = @row_toks[1];
		my $contig_rr = @row_toks[2];
		my $n_contig = @row_toks[27];
		my $contig_size = @row_toks[28];
		my $cas_start = @row_toks[29];
		my $cas_end = @row_toks[30];
		my $tracr_start = @row_toks[24];
		my $tracr_end = @row_toks[25];
		my $rr_start = @row_toks[6];
		my $rr_end = @row_toks[7];
		
		if ( $cas_start eq '') {
	   		$cas_start = 9**9**9; 
		}
	
		if ( $cas_end eq '') {
	   		$cas_end = 9**9**9; 
		}
	
		if ( $tracr_start eq '') {
	   		$tracr_start = 9**9**9; 
		}
	
		if ( $tracr_end eq '') {
	   		$tracr_end = 9**9**9; 
		}
	
		if ( $rr_start eq '') {
	   		$rr_start = 9**9**9; 
		}
	
		if ( $rr_end eq '') {
	   		$rr_end = 9**9**9; 
		}
		
		my $str = getDistances($cas_start, $cas_end, $tracr_start, $tracr_end, $rr_start, $rr_end, $contig_tracr, $contig_rr, $n_contig, $contig_size);
		$line .= ",";
		$line .= $str;
		$line .= "\n";
		print GFFR $line;
	}
	close(GFFR);
	
	unlink("$dir/genome_tracr_summary.csv");
	my $flag = system("mv $dir/genome_tracr_summary_2.csv $dir/genome_tracr_summary.csv");
	return 1;
}

sub add_gene_overlapping_flag {
	my ($dir) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary_classified.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(GFFR, ">>$dir/genome_tracr_summary_classified_2.csv");
	
	my $n = 0;
	for(my $i = 0; $i < scalar(@contents); $i++) {
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		my @row_toks = split(/\,/, $line);
		
		if ($i == 0) {
			$line .= ",OVERLAPPING,OL_GENE\n";
			print GFFR $line;
			$n = scalar(@row_toks);
			next;
		}
		
		my $contig_tracr = @row_toks[1];
		my $tracr_start = @row_toks[24];
		my $tracr_end = @row_toks[25];
		my $gff_path = $dir . "/" . $contig_tracr . "_gene.gff";
		my @rs = find_overlapping_gene($gff_path, $tracr_start, $tracr_end);
		
		$line .= "," . @rs[0] . "," . @rs[1] . "\n";
		print GFFR $line;
	}
	close(GFFR);
	
	unlink("$dir/genome_tracr_summary_classified.csv");
	my $flag = system("mv $dir/genome_tracr_summary_classified_2.csv $dir/genome_tracr_summary_classified.csv");
	return 1;
}

sub add_additional_data {
	my ($dir, $rep_start, $junct) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(GFFR, ">>$dir/genome_tracr_summary_2.csv");
	
	my $n = 0;
	for(my $i = 0; $i < scalar(@contents); $i++) {
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		my @row_toks = split(/\,/, $line);
		
		if ($i == 0) {
			$line .= ",DIR_SCORE,TS_SCORE\n";
			print GFFR $line;
			$n = scalar(@row_toks);
			next;
		}
		
		my $rep = @row_toks[5];
		my $hyb = @row_toks[16];
		my $rr_id = @row_toks[3];
		my $tail = @row_toks[18];
		#my $dist = @row_toks[66];
		
		my $rc = 0;
		if ($rr_id =~ /\d+_RC/) {
			$rc = 1;
		}
		
		my $b = check_repeat($rep_start, $hyb); #direction_rank($rep_start, $rep, $hyb, $rc);
		my $tb = check_junction($junct, $tail);
		#my $dsb = 0;
		#if ($dist <= 3000) {
		#	$dsb = 1;
		#}
		
		$line .= ",";
		$line .= $b . "," . $tb;
		$line .= "\n";
		print GFFR $line;
	}
	close(GFFR);
	
	unlink("$dir/genome_tracr_summary.csv");
	my $flag = system("mv $dir/genome_tracr_summary_2.csv $dir/genome_tracr_summary.csv");
	return 1;
}

sub crispr_call_rerun{

	my ($dir, $id, $session) = @_;
	
	if (! (-d "CRISPRDetect_2.3/work" . $session)) {
		my $flag = system("mkdir CRISPRDetect_2.3/work" . $session)
	}

	my $cmd1 = 'cp ' .$dir . '/' . $id . '_flank.fa CRISPRDetect_2.3/work' . $session; 
	my $cmd2 = "CRISPRDetect_2.3";
	my $cmd3 = 'perl CRISPRDetect.pl -f work' . $session . '/' . $id . '_flank.fa -o ' . $id . '_repeat_redo.txt' . ' -array_quality_score_cutoff 0.0' . 
	           ' -minimum_word_repeatation 2 -minimum_no_of_repeats 2 -word_length 7' . ' -ea_allowed_percent_similarity 50 -sa_allowed_percent_similarity 50' .
	           ' -minimum_repeat_length 15 > /dev/null';
	my $cmd4 = "..";
	my $cmd5 = 'rm -f CRISPRDetect_2.3/work' . $session . '/' . $id . '_flank.fa';
	my $cmd6 = 'mv CRISPRDetect_2.3/' . $id . '_repeat_redo.txt ' . $dir . '/' . $id . '_repeat_redo.txt';
	my $cmd7 = 'mv CRISPRDetect_2.3/' . $id . '_repeat_redo.txt.fp ' . $dir . '/' . $id . '_repeat_redo.fp';
	my $cmd8 = 'mv CRISPRDetect_2.3/' . $id . '_repeat_redo.txt.gff ' . $dir . '/' . $id . '_repeat_redo.gff';
	my $cmd9 = 'rm -rf CRISPRDetect_2.3/work' . $session;

	my $flag = system($cmd1);
	my $flag = chdir($cmd2);
	my $flag = system($cmd3);
	my $flag = chdir($cmd4);
	my $flag = system($cmd5);
	my $flag = system($cmd6);
	my $flag = system($cmd7);
	my $flag = system($cmd8);
	my $flag = system($cmd9);
	
	#my $output_file_size = -s $dir . '/' . $id . '_repeat.txt';
	#if(! (defined $output_file_size and $output_file_size > 5)) {
	#	unlink($dir . '/' . $id . '_repeat.txt');
	#	unlink($dir . '/' . $id . '_repeat.gff');
	#	unlink($dir . '/' . $id . '_repeat.fp');
	#}
	
	return $flag;
}

sub disc_fcn {
	my @nums = @_;
	
	if (scalar(@nums) <= 1) {
		return 0;
	}
	
	if (scalar(@nums) == 2) {
		return 1; 
	}
	
	my $c = 0;
	for (my $i = 2; $i < scalar(@nums); $i++) {
		my $a2 = @nums[$i];
		my $a1 = @nums[$i - 1];
		my $a0 = @nums[$i - 2];
		
		if ($a1 > $a2 && $a1 > $a0) {
			$c++;
		}
	}
	return $c;
}

sub max_min {
	my @nums = @_;
	
	my $maxsofar = 0;
	my $minsofar = 0;
	
	if (scalar(@nums) > 0) {
		$maxsofar = @nums[0];
		$minsofar = @nums[0];
	}
	
	for (my $i = 0; $i < scalar(@nums); $i++) {
		if (@nums[$i] >= $maxsofar) {
			$maxsofar = @nums[$i];
		}
		
		if (@nums[$i] <= $minsofar) {
			$minsofar = @nums[$i];
		}
	} 
	
	my @out = ($maxsofar, $minsofar);
	return @out;
}

sub orderCSV {
	my ($dir) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary_classified.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	my $head = @contents[0];
	$head =~ s/\n//g;
	
	my @headtoks = split(/\,/, $head);
	
	#my $ind_xray = 54;
	#my $ind_kpat = 44;
	#my $ind_q = 70;
	
	open (TEMP, ">>$dir/temp.csv");
	for (my $j = 1; $j < scalar(@contents); $j++) {
		my $line = @contents[$j];
		$line =~ s/\n//g;
		print TEMP $line . "\n";

	}
	close(TEMP);
	
	open(SDAT, ">>$dir/genome_tracr_summary_classified_2.csv");
	print SDAT $head . "\n";
	my $flag = system("cat $dir/temp.csv \| sort -t $\',\' -u -k17,17" . 
	                  " \| sort -t $\',\' -k72n,72 -k38nr,38 -k54nr,54 " .
	                  "-k39nr,39 -k35n,35 >> " . 
	                  "$dir/genome_tracr_summary_classified_2.csv");
	unlink("$dir/temp.csv");
	unlink("$dir/genome_tracr_summary_classified.csv");
	$flag = system("mv $dir/genome_tracr_summary_classified_2.csv $dir/genome_tracr_summary_classified.csv");
	return 1;
}

sub filterCSV {
	my ($dir, $rep_start, $tail_start, $min_mismatch, $max_mismatch, $helix_size) = @_;
	
	open(RAWDAT, "$dir/genome_tracr_summary.csv");
	my @contents = <RAWDAT>;
	close(RAWDAT);
	
	open(GFFR, ">>$dir/genome_tracr_summary_2.csv");
	
	for(my $i = 0; $i < scalar(@contents); $i++) {
		my $line = @contents[$i];
		$line =~ s/\n//g;
		
		if ($i == 0) {
			print GFFR $line . "\n";
			next;
		}
		
		my @row_toks = split(/\,/, $line);
	
		my $rep = @row_toks[5];
		my $hyb = @row_toks[16];
		my $hyb_struct = @row_toks[17];
		my $tail = @row_toks[18];
		my $rr_id = @row_toks[3];
	
		my $rc = 0;
		if (index($rr_id, "_RC") >= 0) {
			$rc = 1;
		}
	
		my $b = 1;
	
		if ((length($rep_start) > 0) && (length($tail_start) > 0)) {
			$b *= direction_rank_2($rep_start, $rep, $rc, $tail_start, $tail);
			#$b *= direction_rank($rep_start, $rep, $hyb, $rc, $tail_start, $tail);
			#$b *= direction_rank_hyb($rep_start, $rep, $rc);
		}
	
		my $ll = max_hybrid_helix_length($hyb_struct);
		my $nm = num_mismatches_in_hybrid($hyb_struct);
		my $hyb_start = index($hyb_struct, "(");
		
		if ($hyb_start <= 2) {
			$b *= 1;
		} else {
			$b *= 0;
		}
	
		if (($min_mismatch > $max_mismatch) && ($max_mismatch >= 0) && ($min_mismatch >= 0)) {
			my $temp = $max_mismatch;
			$max_mismatch = $min_mismatch;
			$min_mismatch = $temp;
		}
	
		if (($min_mismatch < 0) && ($max_mismatch >= 0)) {
			if ($nm <= $max_mismatch) {
				$b *= 1;
			} else {
				$b *= 0;
			}
		}
	
		if (($min_mismatch >= 0) && ($max_mismatch < 0)) {
			if ($nm >= $min_mismatch) {
				$b *= 1;
			} else {
				$b *= 0;
			}
		}
	
		if (($max_mismatch >= 0) && ($min_mismatch >= 0)) {
			if (($nm <= $max_mismatch) && ($nm >= $min_mismatch)) {
				$b *= 1;
			} else {
				$b *= 0;
			}
		}
	
		if ($helix_size > 0) {
			if ($ll >= $helix_size) {	
				$b *= 1;
			} else {
				$b *= 0;
			}
		}
		
		if ($b > 0) {
			print GFFR $line . "\n";
		}
	}
	close(GFFR);
	
	unlink("$dir/genome_tracr_summary.csv");
	my $flag = system("mv $dir/genome_tracr_summary_2.csv $dir/genome_tracr_summary.csv");
	return 1;
}

sub getDistances {
	my ($cas_start, $cas_end, $tracr_start, $tracr_end, $rr_start, $rr_end, $contig_tracr, $contig_rr, $n_contig, $contig_size) = @_;
	
	my @coords = ($cas_start, $cas_end, $tracr_start, $tracr_end, $rr_start, $rr_end);
	
	my @starts = (@coords[0], @coords[2], @coords[4]);
	my @inds = ("Cas9", "Tracr", "RR");
	my @poss = (0,2,4);
	
	for (my $i = 0; $i < scalar(@starts); $i++) {
		my $min = @starts[$i];
		for (my $j = $i + 1; $j < scalar(@starts); $j++) {
			if (@starts[$j] < $min) {
				$min = @starts[$j];
		
				my $temp = @starts[$i];
				@starts[$i] = @starts[$j];
				@starts[$j] = $temp; 
			
				$temp = @inds[$i];
				@inds[$i] = @inds[$j];
				@inds[$j] = $temp;
			
				$temp = @poss[$i];
				@poss[$i] = @poss[$j];
				@poss[$j] = $temp;
			}
		}
	}
	
	@coords = (@coords[@poss[0]], @coords[@poss[0] + 1], 
               @coords[@poss[1]], @coords[@poss[1] + 1], 
           	   @coords[@poss[2]], @coords[@poss[2] + 1]);

	my $ind0 = first_index { $_ eq "Tracr" } @inds;
	my $ind1 = first_index { $_ eq "Cas9" } @inds;

	my $dtc = abs(@coords[(2 * $ind0) + 1] - @coords[(2 * $ind1)]);
	if ($ind1 < $ind0) {
		$dtc = abs(@coords[(2 * $ind1) + 1] - @coords[(2 * $ind0)]);
	}

	$ind0 = first_index { $_ eq "Tracr" } @inds;
	$ind1 = first_index { $_ eq "RR" } @inds;

	my $dtr = abs(@coords[(2 * $ind0) + 1] - @coords[(2 * $ind1)]);
	if ($ind1 < $ind0) {
		$dtr = abs(@coords[(2 * $ind1) + 1] - @coords[(2 * $ind0)]);
	}

	$ind0 = first_index { $_ eq "Cas9" } @inds;
	$ind1 = first_index { $_ eq "RR" } @inds;

	my $dcr = abs(@coords[(2 * $ind0) + 1] - @coords[(2 * $ind1)]);
	if ($ind1 < $ind0) {
		$dcr = abs(@coords[(2 * $ind1) + 1] - @coords[(2 * $ind0)]);
	}
	
	if ($n_contig == 1) {
		if (abs($contig_size - $dtc) < $dtc) {
			$dtc = abs($contig_size - $dtc);
		}
		
		if (abs($contig_size - $dtr) < $dtr) {
			$dtr = abs($contig_size - $dtr);
		}
		
		if (abs($contig_size - $dcr) < $dcr) {
			$dcr = abs($contig_size - $dcr);
		}
	}
	
	if ($contig_tracr ne $contig_rr) {
		$dtr = "inf";
		$dcr = "inf";
	}

	$str =  $dtc . "," . $dtr . "," . $dcr;
	$str =~ s/inf/10000000/g;
	
	return $str;
}

sub direction_rank {
	my ($iupac, $rep, $hyb, $rc, $tail_start, $tail) = @_;
	
	my $b = 0;
	
	if ((iupac_match_seq_start($iupac, $hyb) == 1) && (iupac_match_seq_start($iupac, $rep) == 0)) {
		$b = 1;
	} elsif ((iupac_match_seq_start($iupac, $hyb) == 1) && (iupac_match_seq_start($iupac, $rep) == 1) && ($rc == 0)) {
		$b = 1;
	} elsif ((iupac_match_seq_start($iupac, $hyb) == 1) && (iupac_match_seq_start($iupac, $rep) == 1) && ($rc == 1)) {
		if ((iupac_match_seq_start($tail_start, $tail) == 1)) {
			$b = 1;
		} else {
			$b = 0;
		}
	} elsif ((iupac_match_seq_start($iupac, $hyb) == 0) && (iupac_match_seq_start($iupac, $rep) == 0) && ($rc == 0)) {
		$b = 1;
	} elsif ((iupac_match_seq_start($iupac, $hyb) == 0) && (iupac_match_seq_start($iupac, $rep) == 0) && ($rc == 1)) {
		if ((iupac_match_seq_start($tail_start, $tail) == 1)) {
			$b = 1;
		} else {
			$b = 0;
		}
	} else {
		$b = 0;
	}
	
	return $b;
}

sub direction_rank_2 {
	my ($iupac, $rep, $rc, $tail_start, $tail) = @_;
	
	my $b = 0;
	
	my $rcrep = $rep;
	$rcrep =~ tr/ATCG/TAGC/;
	$rcrep = reverse($rcrep);
	
	if ((iupac_match_seq_start($iupac, $rep) == 0) && (iupac_match_seq_start($iupac, $rcrep) == 1) && ($rc == 1)) {
		$b = 1;
	} elsif ((iupac_match_seq_start($iupac, $rep) == 0) && (iupac_match_seq_start($iupac, $rcrep) == 1) && ($rc == 0)) {
		$b = 0;
	} elsif ((iupac_match_seq_start($iupac, $rep) == 1) && (iupac_match_seq_start($iupac, $rcrep) == 0) && ($rc == 1)) {
		$b = 0;
	} elsif ((iupac_match_seq_start($iupac, $rep) == 1) && (iupac_match_seq_start($iupac, $rcrep) == 0) && ($rc == 0)) {
		$b = 1;
	} else {
		if ($rc == 0) {
			$b = 1;
		} else {
			if (iupac_match_seq_start($tail_start, $tail) == 1) {
				$b = 1;
			} else {
				$b = 0;
			}
		}
	}
	
	return $b;
}

sub direction_rank_hyb {
	my ($iupac, $rep, $rc) = @_;
	
	my $b = 0;
	
	my $rcrep = $rep;
	$rcrep =~ tr/ATCG/TAGC/;
	$rcrep = reverse($rcrep);
	
	if ((iupac_match_seq_start($iupac, $rep) == 0) && (iupac_match_seq_start($iupac, $rcrep) == 1) && ($rc == 1)) {
		$b = 1;
	} elsif ((iupac_match_seq_start($iupac, $rep) == 0) && (iupac_match_seq_start($iupac, $rcrep) == 1) && ($rc == 0)) {
		$b = 0;
	} elsif ((iupac_match_seq_start($iupac, $rep) == 1) && (iupac_match_seq_start($iupac, $rcrep) == 0) && ($rc == 1)) {
		$b = 0;
	} elsif ((iupac_match_seq_start($iupac, $rep) == 1) && (iupac_match_seq_start($iupac, $rcrep) == 0) && ($rc == 0)) {
		$b = 1;
	} else {
		if ($rc == 0) {
			$b = 1;
		} else {
			$b = 0;
		}
	}
	
	return $b;
}

sub check_repeat {
	my ($iupac, $seq) = @_;
	my $b = iupac_match_seq_start($iupac, $seq);
	return $b;
}

sub check_junction {
	my ($iupac, $seq) = @_;
	my $b = iupac_match_seq_start($iupac, $seq);
	return $b;
}

sub find_overlapping_gene {
	my ($gffpath, $start, $end) = @_;
	
	open(GFF, $gffpath);
	my @contents = <GFF>;
	close(GFF);
	
	my $indmax = scalar(@contents) - 1;
	my @indxy_0 = (1, $indmax);
	
	my @inds = b_search(\@contents, \@indxy_0, $start);
	
	my $n_overlap = 0;
	my $gene_info = "";
	for (my $i = 0; $i < scalar(@inds); $i++) {
		my $gffline = @contents[@inds[$i]];
		my @gxy = get_gene_span($gffline);
		
		$n_overlap = overlap_nt($start, $end, @gxy[0], @gxy[1]);
		$gene_info = get_gene_info($gffline);
		if ($n_overlap > 0) {
			last;
		}
	}
	
	if ($n_overlap == 0) {
		$gene_info = "<none>";
	}
	
	return ($n_overlap, $gene_info);
}

sub b_search {
	my ($gffcontents, $indxy, $start) = @_;
	my @contents = @{$gffcontents};
	my @ixy = @{$indxy};
	
	if (abs(@ixy[1] - @ixy[0]) == 1) {
		return @ixy;
	}
	
	my $mid = int((@ixy[1] + @ixy[0]) / 2);
	
	my @gxy0 = get_gene_span(@contents[@ixy[0]]);
	my @gxy1 = get_gene_span(@contents[$mid]);
	my @gxy2 = get_gene_span(@contents[@ixy[1]]);
	
	if (($start < @gxy1[0]) && ($start >= @gxy0[0])) {
		my @new_ixy = (@ixy[0], $mid);
		return b_search($gffcontents, \@new_ixy, $start);
	} elsif (($start <= @gxy2[0]) && ($start > @gxy1[0])) {
		my @new_ixy = ($mid, @ixy[1]);
		return b_search($gffcontents, \@new_ixy, $start);
	} else {
		my @new_ixy = ($mid, $mid);
		return @new_ixy;
	}
}

sub get_gene_span {
	my ($line) = @_;
	$line =~ s/\n//g;
	my @toks = split(/[\t]/, $line);
	my $start = @toks[3];
	my $end = @toks[4];
	
	if ($end < $start) {
		my $temp = $start;
		$start = $end;
		$end = $temp;
	}
	
	return ($start, $end);
}

sub get_gene_info {
	my ($line) = @_;
	$line =~ s/\n//g;
	my @toks = split(/[\t]/, $line);
	my $str = @toks[8];
	
	my @strtoks = split(/\;/, $str);
	
	return @strtoks[0];
}

sub overlap_nt {
	my ($start_1, $end_1, $start_2, $end_2) = @_;
	
	if ($start_1 > $end_1) {
		my $temp = $start_1;
		$start_1 = $end_1;
		$end_1 = $temp;
	}
	
	if ($start_2 > $end_2) {
		my $temp = $start_2;
		$start_2 = $end_2;
		$end_2 = $temp;
	}
	
	my $b1 = (($start_1 >= $start_2) && ($start_1 <= $end_2) && 
	         ($end_2 >= $start_1) && ($end_2 <= $end_1));
	my $b2 = (($start_2 >= $start_1) && ($start_2 <= $end_1) && 
	         ($end_1 >= $start_2) && ($end_1 <= $end_2));
	my $b3 = ($start_1 >= $start_2) && ($end_1 <= $end_2);
	my $b4 = ($start_1 <= $start_2) && ($end_1 >= $end_2);
	
	my $out = 0;
	
	if ($b1 > 0) {
		$out = abs($start_1 - $end_2);
	} elsif ($b2 > 0) {
		$out = abs($start_2 - $end_1);
	} elsif ($b3 > 0) {
		$out = abs($start_1 - $end_1);
	} elsif ($b4 > 0) {
		$out = abs($start_2 - $end_2);
	} else {
		$out = 0;
	}
	
	return $out;
}

return 1;

#sub direction_rank {
#	my ($rep, $hyb, $rc) = @_;
#	
#	my $b = 0;
#	
#	if ((check_GYY($hyb) == 1) && (check_GYY($rep) == 0)) {
#		$b = 1;
#	} elsif ((check_GYY($hyb) == 1) && (check_GYY($rep) == 1) && ($rc == 0)) {
#		$b = 1;
#	} elsif ((check_GYY($hyb) == 0) && (check_GYY($rep) == 0)) {
#		$b = 0;
#	} else {
#		$b = 0;
#	}
#	
#	return $b;
#}

#sub check_GYY {
#	my ($hyb) = @_;
#	my @chars = split('', $hyb);
#	
#	my $c1 = @chars[0];
#	my $c2 = @chars[1];
#	my $c3 = @chars[2];
#	
#	my $b1 = ($c1 eq "G");
#	my $b2 = (($c2 eq "T") || ($c2 eq "C"));
#	my $b3 = (($c3 eq "C") || ($c3 eq "T"));
#	
#	my $b = 0;
#	
#	if ($b1 && $b2 && $b3) {
#		$b = 1;
#	}
#	
#	return $b;
#}

#sub check_APP {
#	my ($seq) = @_;
#	my @chars = split('', $seq);
#	
#	my $c1 = @chars[0];
#	my $c2 = @chars[1];
#	my $c3 = @chars[2];
#	
#	my $b1 = ($c1 eq "A");
#	my $b2 = (($c2 eq "A") || ($c2 eq "G"));
#	my $b3 = (($c3 eq "G") || ($c3 eq "A"));
#	
#	my $b = 0;
#	
#	if ($b1 && $b2) {
#		$b = 1;
#	}
#	
#	return $b;
#}

#sub parse_dbstr {
#	my ($str) = @_;
#	
#	my @chars = split('', $str);
#	
#	my $br_count = 0;
#	my $d_count = 0;
#	my $hc_count = 0;
#	my @br_count_list = ();
#	my @str_list = ();
#	my @h_list = ();
#	my @c_list = ();
#	my @l_list = ();
#	for (my $i = 0; $i < scalar(@chars); $i++) {
#		my $ch = @chars[$i];
#		if ($ch =~ /\./) {
#			if ($i == 0 || $br_count == 0) {
#				$d_count++;
#			}
#		}
#		
#		if ($ch =~ /\(/) {
#			#$br_count = $br_count + 1;
#			push(@br_count_list, $br_count++);
#			if ($d_count > 0) {
#				push(@str_list, 'L' . $d_count);
#				push(@l_list, $d_count);
#				$d_count = 0
#			}
#		}
#		
#		if ($ch =~ /\)/) {
#			#$br_count = $br_count - 1;
#			push(@br_count_list, $br_count--);
#		}
#		
#		if ($br_count == 0) {
#			if (scalar(@br_count_list) > 0) {
#				$hc_count++;
#				my $c = disc_fcn(@br_count_list);
#				if ($c == 1) {
#					push(@str_list, 'H' . $hc_count);
#					push(@h_list, $hc_count);
#				} else {
#					push(@str_list, 'C' . $hc_count);
#					push(@c_list, $hc_count);
#				}
#				@br_count_list = ();
#				$hc_count = 0;
#			}
#		} else {
#			$hc_count++;
#		}
#		
#		if ($i == scalar(@chars) - 1) {
#			if ($d_count > 0) {
#				push(@str_list, 'L' . $d_count);
#				push(@l_list, $d_count);
#				$d_count = 0
#			}
#		}
#	}
#	
#	my $out = '';
#	for (my $i = 0; $i < scalar(@str_list); $i++) {
#		if ($i == scalar(@str_list) - 1) {
#			$out .= @str_list[$i];
#		} else {
#			$out .= (@str_list[$i] . '-');
#		}
#	}
#	
#	my @max_min_h = max_min(@h_list);
#	my @max_min_c = max_min(@c_list);
#	my @max_min_l = max_min(@l_list);
#	
#	my $nh = scalar(@h_list);
#	my $nc = scalar(@c_list);
#	my $nl = scalar(@l_list);
#	
#	my $first_h = 0;
#	my $first_c = 0;
#	my $first_l = 0;
#	
#	if ($nh > 0) {
#		$first_h = @h_list[0];
#	}
#	
#	if ($nc > 0) {
#		$first_c = @c_list[0];
#	}
#	
#	if ($nl > 0) {
#		$first_l = @l_list[0];
#	}
#
#	$out .= "\," . $nh . "\," . @max_min_h[1] . "\," . @max_min_h[0] . "\," . $first_h .
#	        "\," . $nc . "\," . @max_min_c[1] . "\," . @max_min_c[0] . "\," . $first_c .
#	        "\," . $nl . "\," . @max_min_l[1] . "\," . @max_min_l[0] . "\," . $first_l;
#	
#	return $out;
#}