% Written by Erik Cheever % Modified by Allison Barlow and Alex Benn % Last Modified 2008-02-08 %----------------------------------------- % Open file and read sequence. f1='hiv_hum.txt'; f=fopen(f1); seq_hum=fread(f,'*char')'; % Find beginning of DNA sequence and read to end-of-line (cr='13') x=findstr(seq_hum,'ORIGIN'); seq_hum=seq_hum(x:end); x=findstr(seq_hum,13); seq_hum=seq_hum(x:end); seq_hum=seq_hum(isletter(seq_hum)); % just take letters (drop numbers and spaces). seq_hum_a=double((seq_hum=='a')); % find all of the letter 'a' and replace with 1. seq_hum_c=double((seq_hum=='c')); % find all of the letter 'c' and replace with 1. seq_hum_g=double((seq_hum=='g')); % find all of the letter 'g' and replace with 1. seq_hum_t=double((seq_hum=='t')); % find all of the letter 't' and replace with 1. fclose(f); f2='hiv_seq1.txt'; f=fopen(f2); seq_unk1=fread(f,'*char')'; % Find beginning of DNA sequence and read to end-of-line (cr='13') x=findstr(seq_unk1,'ORIGIN'); seq_unk1=seq_unk1(x:end); x=findstr(seq_unk1,13); seq_unk1=seq_unk1(x:end); seq_unk1=seq_unk1(isletter(seq_unk1)); % just take letters (drop numbers and spaces). seq_unk1_a=double((seq_unk1=='a')); % find all of the letter 'a' and replace with 1. seq_unk1_c=double((seq_unk1=='c')); % find all of the letter 'c' and replace with 1 seq_unk1_g=double((seq_unk1=='g')); % find all of the letter 'g' and replace with 1 seq_unk1_t=double((seq_unk1=='t')); % find all of the letter 't' and replace with 1 fclose(f); len_hum = length(seq_hum_a); len_unk1= length(seq_unk1_a); if (len_hum>len_unk1) l_zeros = zeros(1, floor((len_hum - len_unk1)/2)); r_zeros = zeros(1, ceil((len_hum - len_unk1)/2)); seq_unk1_a = horzcat(l_zeros, seq_unk1_a, r_zeros); seq_unk1_c = horzcat(l_zeros, seq_unk1_c, r_zeros); seq_unk1_g = horzcat(l_zeros, seq_unk1_g, r_zeros); seq_unk1_t = horzcat(l_zeros, seq_unk1_t, r_zeros); else l_zeros = zeros(1, floor((len_unk1 - len_hum)/2)); r_zeros = zeros(1, ceil((len_unk1 - len_hum)/2)); seq_hum_a = horzcat(l_zeros, seq_hum_a, r_zeros); seq_hum_c = horzcat(l_zeros, seq_hum_c, r_zeros); seq_hum_g = horzcat(l_zeros, seq_hum_g, r_zeros); seq_hum_t = horzcat(l_zeros, seq_hum_t, r_zeros); end; x_hum_unk1_a = xcorr(seq_hum_a, seq_unk1_a, 'unbiased'); x_hum_unk1_c = xcorr(seq_hum_c, seq_unk1_c, 'unbiased'); x_hum_unk1_g = xcorr(seq_hum_g, seq_unk1_g, 'unbiased'); x_hum_unk1_t = xcorr(seq_hum_t, seq_unk1_t, 'unbiased'); x_hum_unk1 = x_hum_unk1_a+x_hum_unk1_c+x_hum_unk1_g+x_hum_unk1_t; f3='hiv_seq2.txt'; f=fopen(f3); seq_unk2=fread(f,'*char')'; % Find beginning of DNA sequence and read to end-of-line (cr='13') x=findstr(seq_unk2,'ORIGIN'); seq_unk2=seq_unk2(x:end); x=findstr(seq_unk2,13); seq_unk2=seq_unk2(x:end); seq_unk2=seq_unk2(isletter(seq_unk2)); % just take letters (drop numbers and spaces). seq_unk2_a=double((seq_unk2=='a')); % find all of the letter 'a' and replace with 1. seq_unk2_c=double((seq_unk2=='c')); % find all of the letter 'c' and replace with 1 seq_unk2_g=double((seq_unk2=='g')); % find all of the letter 'g' and replace with 1 seq_unk2_t=double((seq_unk2=='t')); % find all of the letter 't' and replace with 1 fclose(f); len_unk2= length(seq_unk2_a); if (len_hum>len_unk2) l_zeros = zeros(1, floor((len_hum - len_unk2)/2)); r_zeros = zeros(1, ceil((len_hum - len_unk2)/2)); seq_unk2_a = horzcat(l_zeros, seq_unk2_a, r_zeros); seq_unk2_c = horzcat(l_zeros, seq_unk2_c, r_zeros); seq_unk2_g = horzcat(l_zeros, seq_unk2_g, r_zeros); seq_unk2_t = horzcat(l_zeros, seq_unk2_t, r_zeros); else l_zeros = zeros(1, floor((len_unk2 - len_hum)/2)); r_zeros = zeros(1, ceil((len_unk2 - len_hum)/2)); seq_hum_a = horzcat(l_zeros, seq_hum_a, r_zeros); seq_hum_c = horzcat(l_zeros, seq_hum_c, r_zeros); seq_hum_g = horzcat(l_zeros, seq_hum_g, r_zeros); seq_hum_t = horzcat(l_zeros, seq_hum_t, r_zeros); end; x_hum_unk2_a = xcorr(seq_hum_a, seq_unk2_a, 'unbiased'); x_hum_unk2_c = xcorr(seq_hum_c, seq_unk2_c, 'unbiased'); x_hum_unk2_g = xcorr(seq_hum_g, seq_unk2_g, 'unbiased'); x_hum_unk2_t = xcorr(seq_hum_t, seq_unk2_t, 'unbiased'); x_hum_unk2 = x_hum_unk2_a+x_hum_unk2_c+x_hum_unk2_g+x_hum_unk2_t; % subplot(4,1,1); % hold on; % stem(x_hum_unk1_a, 'b'); % stem(x_hum_unk2_a, 'r'); % subplot(4,1,2); % hold on; % stem(x_hum_unk1_c, 'b'); % stem(x_hum_unk2_c, 'r'); % subplot(4,1,3); % hold on; % plot(x_hum_unk1_g, 'b'); % plot(x_hum_unk2_g, 'r'); % subplot(4,1,4); % hold on; % plot(x_hum_unk1_t, 'b'); % plot(x_hum_unk2_t, 'r'); clf; hold on; % stem(x_hum_unk1(floor(length(x_hum_unk1)/2):end), 'b') % stem(x_hum_unk2(floor(length(x_hum_unk2)/2):end), 'r') stem(x_hum_unk1, 'ob') stem(x_hum_unk2, 'xr') hold off;