sub uniq {
    my %seen;
    grep(!$seen{$_}++, @_);
}

sub parse_rnahybrid_output {
	($str) = @_;
	my $path = $str;
	my @out_table = ();
	
	open(rnaout, $path) or die "can't open txt file";
	
	while(<rnaout>) {
		my $line = $_;
		$line =~ s/\n//g;
	
		my @content = split(/\:/, $line);
		
		if (scalar(@content) == 11) {
			my $id_target = @content[0];
			my $len_target = @content[1];
		    my $id_query = @content[2];
			my $len_query = @content[3];
		
			my $min_fe = @content[4];
			my $p_value = @content[5];
			my $position = @content[6];
	
			my $part1 = @content[7];
			my $part2 = @content[8];
			my $part3 = @content[9];
			my $part4 = @content[10];
		
			if (! has_hybrid(@content)) {
				next;
			}
		
			my @dbstr = convert_to_dot_bracket($part1, $part2, $part3, $part4);
		
			my $seq_t = @dbstr[0];
			my $seq_q = @dbstr[1];
			my $dbs_t = @dbstr[2];
			my $dbs_q = @dbstr[3];
		
			$seq_t =~ s/U/T/g;
			$seq_q =~ s/U/T/g;
			$seq_q =~ tr/ATCG/TAGC/;
		
			if (! test_exact_match($seq_t, $seq_q)) { 
				$seq_q =~ tr/ATCG/TAGC/;
				$seq_q = reverse($seq_q);
				$dbs_q = reverse($dbs_q);
				
				my @data_row = ($id_target, $len_target, $len_query,
			             	 	$min_fe, $p_value, $position, 
			             	 	$seq_q, $seq_t, $dbs_q, $dbs_t);
				push(@out_table, [@data_row]);
			}
		} else {
			next;
		}	
	}
	close(rnaout);
	
	return @out_table;
}

sub parse_rh_seq_string {
	($str1, $str2) = @_;
	my $part1 = $str1;
	my $part2 = $str2;
	
	my $seq = '';
	my @char_1 = split(//, $part1);
	my @char_2 = split(//, $part2);
	my $arr_size = scalar(@char_1);
	
	if (scalar(@char_1) > scalar(@char_2)) {
		$arr_size = scalar(@char_2);
	}
		
	for (my $i = 0; $i < $arr_size; $i++) {
	 	if (@char_1[$i] !~ /\s|N/) {
			$seq .= @char_1[$i];
		}
		if (@char_2[$i] !~ /\s|N/) {
			$seq .= @char_2[$i];
		}
	}
	
	return $seq;
}

sub convert_to_dot_bracket {
	($str1, $str2, $str3, $str4) = @_;
	my $part1 = $str1;
	my $part2 = $str2;
	my $part3 = $str3;
	my $part4 = $str4;
	
	my $seq1 = '';
	my $seq2 = '';
	my $dbseq1 = '';
	my $dbseq2 = '';
	
	my @char_1 = split(//, $part1);
	my @char_2 = split(//, $part2);
	my @char_3 = split(//, $part3);
	my @char_4 = split(//, $part4);
	
	my $arr_size = scalar(@char_1);
	
	for (my $i = 0; $i < $arr_size; $i++) {
		if ((@char_2[$i] !~ /\s|N/) && (@char_3[$i] !~ /\s|N/)) {
			$dbseq1 .= ')';
			$dbseq2 .= '(';
			$seq1 .= @char_2[$i];
			$seq2 .= @char_3[$i];
		}
		
		if (@char_1[$i] !~ /\s|N/) {
			$dbseq1 .= '.';
			$seq1 .= @char_1[$i];
		} else {
			$dbseq1 .= ' ';
			$seq1 .= ' ';
		}
		
		if (@char_4[$i] !~ /\s|N/) {
			$dbseq2 .= '.';
			$seq2 .= @char_4[$i];
		} else {
			$dbseq2 .= ' ';
			$seq2 .= ' ';
		}	
	}
	
	$seq1 =~ s/\s//g;
	$seq2 =~ s/\s//g;
	$dbseq1 =~ s/\s//g;
	$dbseq2 =~ s/\s//g;
	
	return ($seq1, $seq2, $dbseq1, $dbseq2);
}

sub test_exact_match {
	($str1, $str2) = @_;
	my $seq1 = $str1;
	my $seq2 = $str2;
	
	my $ind = -1;
	if (length($seq1) > length($seq2)) {
		$ind = index($seq1, $seq2);
	} else {
		$ind = index($seq2, $seq1);
	}
	
	return ($ind >= 0);
}

sub has_hybrid {
	my @row_dat = @_;
	my $sum1 = length(@row_dat[7]) + length(@row_dat[8]);
	my $sum2 = length(@row_dat[9]) + length(@row_dat[10]);
	return !($sum1 == 0 || $sum2 == 0);
}

sub get_sl_seq {
	($str1, $str2, $n, $sk, $cut) = @_;
	my $seq = $str1;
	my $match_seq = $str2;
	my $start = $n;
	$seq =~ s/\n//g;
	$match_seq =~ s/\n//g;
	
	my $pos_best = get_exact_start($seq, $match_seq, $start);

	my $tail_start = $pos_best + length($match_seq);
	my $utseq = 'TTTT';
	my $skip = $sk;
	
	$offset = $tail_start + $skip;
	my $indt = index($seq, $utseq, $offset);

	if ($indt < 0) {
		$indt = $tail_start + $cut;	
	}
	
	my $tail_seq = substr($seq, $tail_start, $indt - $tail_start + 4);
	
	if (length($tail_seq) > $cut) {
		$tail_seq = substr($tail_seq, 0, $cut);
	}
	
	if (length($tail_seq) == 0) {
		$tail_seq = substr($tail_seq, 0, $cut);
	}
	
	return $tail_seq;
}

sub get_exact_start {
	($str1, $str2, $n) = @_;
	my $seq = $str1;
	my $match_seq = $str2;
	my $start = $n;
	
	my @pos = ();
    my $offset = 0;
	my $ind = index($seq, $match_seq, $offset);
	push(@pos, ($ind));

	while ($ind != -1) {
		$offset = $ind + length($match_seq);
		$ind = index($seq, $match_seq, $offset);
		if ($ind != -1) {
			push(@pos, ($ind));
		}
	}

	my $pos_best = @pos[0];
	my $diff_min = abs($pos_best - $start);

	foreach my $p (@pos) {
		my $diff = abs($p - $start);
		if ($diff < $diff_min) {
			$pos_best = $p;
			$diff_min = $diff;
		}	
	}

	if ($pos_best < 0) {
		$pos_best = $n;
	}
	
	return $pos_best;
}

sub num_mismatches_in_hybrid {
	my ($hyb) = @_;
	
	my @hyb_parts = split("&", $hyb);

	my $rep = @hyb_parts[0];
	$rep =~ s/^\.+//g;
	$rep =~ s/\.+$//g;

	my $ar = @hyb_parts[1];
	$ar =~ s/^\.+//g;
	$ar =~ s/\.+$//g;

	my $hyb_trimmed = $rep . $ar;
	
	my $count = 0;
	while ($hyb_trimmed =~ /\./g) { $count++; }
	
	return $count;
}

sub max_hybrid_helix_length {
	my ($hyb) = @_;
	
	my @hyb_parts = split("&", $hyb);

	my $rep = @hyb_parts[0];
	$rep =~ s/^\.+//g;
	$rep =~ s/\.+$//g;

	my $ar = @hyb_parts[1];
	$ar =~ s/^\.+//g;
	$ar =~ s/\.+$//g;

	my $hyb_trimmed = $rep . $ar;
	
	my @yval = ();
	my @xval = ();

	my @chars = split('', $hyb_trimmed);

	my $count = 0;
	my $yval_max = 0;
	push(@yval, $count);
	for (my $i = 0; $i < scalar(@chars); $i++)  {
		push(@xval, $i);
	
		if (@chars[$i] eq '(') {
			$count += 1;
		} elsif (@chars[$i] eq ')') {
			$count -= 1;
		} else {
			$count += 0;
		}

		push(@yval, $count);
	
		if ($count > $yval_max) {
			$yval_max = $count;
		}
	}
	
	my @tps = ();
	push(@tps, 0);
	push(@tps, $yval_max);
	for (my $i = 1; $i < (scalar(@yval) - 1); $i++) {
		my $a = @yval[$i - 1];
		my $b = @yval[$i];
		my $c = @yval[$i + 1];
		if (($a != $b) && ($b == $c)) {
			push(@tps, $b);
		}
	}
	
	my @tps_uniq = uniq(@tps);
	my @tps_sorted = sort {$a <=> $b} @tps_uniq;

	my $len_max = 0;
	for (my $i = 1; $i < scalar(@tps_sorted); $i++) {
		my $len = abs(@tps_sorted[$i] - @tps_sorted[$i - 1]);
		if ($len > $len_max) {
			$len_max = $len;
		}
	}
	
	return $len_max;
} 

sub ar_basepair_stats {
	my ($dbstr, $part) = @_;
		
	my @toks = split('&', $dbstr);
	my $ar = @toks[$part];
	
	my $count_junct = 0;
	my $bp_count_junct = 0;
	my @ar_chars = split('', $ar);
	
	if ($part == 1) {
		for (my $i = (scalar(@ar_chars) - 1); $i >= 0; $i--) {
			if ($count_junct >= 25) {
				last;
			} 
			if (@ar_chars[$i] eq ')') {
				$bp_count_junct++;
			}
			$count_junct++;
		}
	} else {
		for (my $i = 0; $i < scalar(@ar_chars); $i++) {
			if ($count_junct >= 25) {
				last;
			} 
			if (@ar_chars[$i] eq '(') {
				$bp_count_junct++;
			}
			$count_junct++;
		}
	}
	
	my $r_bp_junct_ar = ($bp_count_junct / $count_junct);
	
	my $bp_count_all = 0;
	for (my $i = 0; $i < scalar(@ar_chars); $i++) { 
		if ($part == 1 && @ar_chars[$i] eq ')') {
			$bp_count_all++;
		} elsif ($part == 0 && @ar_chars[$i] eq '(') {
			$bp_count_all++;
		}
	}
	my $n = scalar(@ar_chars);
	my $r_bp_all_ar = ($bp_count_all / $n);
	
	return ($r_bp_junct_ar, $r_bp_all_ar);
}

return 1;