% I got the following script from Dr. Joseph Fass:

% script to parse the results of "maq pileup"

warning ("off", "Octave:divide-by-zero"); % new way
warn_divide_by_zero = 0; % old way

fid_in = fopen("pileup.out","rt");
fid_out = fopen("parsed_pileup.txt","wt");

this_path = split(pwd,'/');
library = this_path(end,:); % base library name on current directory name (mkdir mindfully!)

output = cell(0); % initialize output as a cell array; elements are strings with unequal lengths
while ~feof(fid_in)
%for dummy = 1:1532
  line = fgetl(fid_in);
  if ~(line==-1)
    sep = find(line=="\t"); % find tabs separating fields
    % column headers of the maq pileup output are: reference sequence name, position, reference base, depth of reads at that position, read identities beginning with '@', read qualities beginning with '@', and assembly qualities beginning with '@'
    refseq = line(1:sep(1)-1);
    position = str2num(line(sep(1)+1:sep(2)-1));
    refbase = line(sep(2)+1:sep(3)-1);
    depth = str2num(line(sep(3)+1:sep(4)-1));
    changes = line(sep(4)+2:sep(5)-1); % start after '@' symbol
    if (length(sep)==6) % meaning I haven't yet cut off the last field: "assembly qualities"
      qualities = line(sep(5)+2:sep(6)-1);
    else % I have cut off last field
      qualities = line(sep(5)+2:end);
    end % if
    A_ind = find(changes=='A'); A = length(A_ind);
    a_ind = find(changes=='a'); a = length(a_ind);
    T_ind = find(changes=='T'); T = length(T_ind);
    t_ind = find(changes=='t'); t = length(t_ind);
    C_ind = find(changes=='C'); C = length(C_ind);
    c_ind = find(changes=='c'); c = length(c_ind);
    G_ind = find(changes=='G'); G = length(G_ind);
    g_ind = find(changes=='g'); g = length(g_ind);
    forward_ind = find(changes==','); forward = length(forward_ind); % forward reads matching consensus
    reverse_ind = find(changes=='.'); reverse = length(reverse_ind); % reverse reads matching consensus
    total = A+a+T+t+C+c+G+g+forward+reverse; % total # of reads
    
    A_qual = toascii(qualities([A_ind]))-33;
    a_qual = toascii(qualities([a_ind]))-33;
    T_qual = toascii(qualities([T_ind]))-33;
    t_qual = toascii(qualities([t_ind]))-33;
    C_qual = toascii(qualities([C_ind]))-33;
    c_qual = toascii(qualities([c_ind]))-33;
    G_qual = toascii(qualities([G_ind]))-33;
    g_qual = toascii(qualities([g_ind]))-33;
    forward_qual = toascii(qualities([forward_ind]))-33;
    reverse_qual = toascii(qualities([reverse_ind]))-33;
    % JNF: agree_qual may be empty if zero depth ... following line is to avoid resulting error in var call below ...
    clear agree_qual;
    agree_qual = toascii(qualities([forward_ind,reverse_ind]))-33;

    % compute high vs low quality cutoff
    % note: assumes that quality scores are in integer form
    % JNF: agree_qual may be empty if zero depth ... following lines are to avoid resulting error in var call ...
    clear quality_cutoff;
    if (exist("agree_qual") && ~isempty(agree_qual))
	if (length(agree_qual) == 1)	% VVM: I just fixed a syntax error in the if statement
	    quality_cutoff = agree_qual(1)
	else
    	    quality_cutoff = ceil(mean(agree_qual) - sqrt(var(agree_qual)));
	endif
    else
    	quality_cutoff = 20; % meaningless in this case, but shouldn't be zero
    endif % if
 
    % for each base, find the indices with high quality scores
    A_ind_HQ = A_ind(A_qual >= quality_cutoff); A_HQ = length(A_ind_HQ); if (length(A_ind_HQ) == 0) A_ind_HQ=[]; endif;
    a_ind_HQ = a_ind(a_qual >= quality_cutoff); a_HQ = length(a_ind_HQ); if (length(a_ind_HQ) == 0) a_ind_HQ=[]; endif;
    T_ind_HQ = T_ind(T_qual >= quality_cutoff); T_HQ = length(T_ind_HQ); if (length(T_ind_HQ) == 0) T_ind_HQ=[]; endif;
    t_ind_HQ = t_ind(t_qual >= quality_cutoff); t_HQ = length(t_ind_HQ); if (length(t_ind_HQ) == 0) t_ind_HQ=[]; endif;
    C_ind_HQ = C_ind(C_qual >= quality_cutoff); C_HQ = length(C_ind_HQ); if (length(C_ind_HQ) == 0) C_ind_HQ=[]; endif;
    c_ind_HQ = c_ind(c_qual >= quality_cutoff); c_HQ = length(c_ind_HQ); if (length(c_ind_HQ) == 0) c_ind_HQ=[]; endif;
    G_ind_HQ = G_ind(G_qual >= quality_cutoff); G_HQ = length(G_ind_HQ); if (length(G_ind_HQ) == 0) G_ind_HQ=[]; endif;
    g_ind_HQ = g_ind(g_qual >= quality_cutoff); g_HQ = length(g_ind_HQ); if (length(g_ind_HQ) == 0) g_ind_HQ=[]; endif;
    forward_ind_HQ = forward_ind(forward_qual >= quality_cutoff); forward_HQ = length(forward_ind_HQ); % high-quality forward reads matching consensus
    reverse_ind_HQ = reverse_ind(reverse_qual >= quality_cutoff); reverse_HQ = length(reverse_ind_HQ); % high-quality reverse reads matching consensus
    if (length(forward_ind_HQ) == 0) forward_ind_HQ=[]; endif;
    if (length(reverse_ind_HQ) == 0) reverse_ind_HQ=[]; endif;
    total_HQ = A_HQ+a_HQ+T_HQ+t_HQ+C_HQ+c_HQ+G_HQ+g_HQ+forward_HQ+reverse_HQ; % total # of high-quality reads
    depth_HQ = total_HQ;
    
    % throw out indices with low quality scores
    Aa_qual_HQ = toascii(qualities([A_ind_HQ,a_ind_HQ]))-33;
    Tt_qual_HQ = toascii(qualities([T_ind_HQ,t_ind_HQ]))-33;
    Cc_qual_HQ = toascii(qualities([C_ind_HQ,c_ind_HQ]))-33;
    Gg_qual_HQ = toascii(qualities([G_ind_HQ,g_ind_HQ]))-33;
    for_qual_HQ = toascii(qualities(forward_ind_HQ))-33; rev_qual_HQ = toascii(qualities(reverse_ind_HQ))-33;
    agree_qual_HQ = toascii(qualities([forward_ind_HQ,reverse_ind_HQ]))-33;
    all_qual_HQ = toascii(qualities([A_ind_HQ,a_ind_HQ,T_ind_HQ,t_ind_HQ,C_ind_HQ,c_ind_HQ,G_ind_HQ,g_ind_HQ,forward_ind_HQ,reverse_ind_HQ]))-33;

    % get mean quality for each type of base change 
    mean_Aa_qual_HQ = sum(Aa_qual_HQ)/(A_HQ+a_HQ);
    mean_Tt_qual_HQ = sum(Tt_qual_HQ)/(T_HQ+t_HQ);
    mean_Cc_qual_HQ = sum(Cc_qual_HQ)/(C_HQ+c_HQ);
    mean_Gg_qual_HQ = sum(Gg_qual_HQ)/(G_HQ+g_HQ);
    mean_agree_qual_HQ = sum(agree_qual_HQ)/(forward_HQ+reverse_HQ);
    mean_all_qual_HQ = sum(all_qual_HQ)/depth_HQ;

    % compare mean qualities for each type of base change to mean quality for reference base
    delta_mean_Aa_qual_HQ = - (mean_agree_qual_HQ - mean_Aa_qual_HQ);
    delta_mean_Tt_qual_HQ = - (mean_agree_qual_HQ - mean_Tt_qual_HQ);
    delta_mean_Cc_qual_HQ = - (mean_agree_qual_HQ - mean_Cc_qual_HQ);
    delta_mean_Gg_qual_HQ = - (mean_agree_qual_HQ - mean_Gg_qual_HQ);

    % frequency of base changes from reference to different nucleotides
    FrAa_HQ = (A_HQ+a_HQ)/total_HQ;
    FrTt_HQ = (T_HQ+t_HQ)/total_HQ;
    FrCc_HQ = (C_HQ+c_HQ)/total_HQ;
    FrGg_HQ = (G_HQ+g_HQ)/total_HQ;
    
    % compare [A:a] and [forward:reverse] ratios
    SkewAmod_HQ = ((A_HQ+1)/(a_HQ+1)) * ((reverse_HQ+1)/(forward_HQ+1));
    SkewTmod_HQ = ((T_HQ+1)/(t_HQ+1)) * ((reverse_HQ+1)/(forward_HQ+1));
    SkewCmod_HQ = ((C_HQ+1)/(c_HQ+1)) * ((reverse_HQ+1)/(forward_HQ+1));
    SkewGmod_HQ = ((G_HQ+1)/(g_HQ+1)) * ((reverse_HQ+1)/(forward_HQ+1));

    % determine number of LQ bases for each nucleotide type
    Aa_LQ = (A + a) - (A_HQ + a_HQ);
    Tt_LQ = (T + t) - (T_HQ + t_HQ);
    Cc_LQ = (C + c) - (C_HQ + c_HQ);
    Gg_LQ = (G + g) - (G_HQ + g_HQ);
    ref_LQ = (forward + reverse) - (forward_HQ + reverse_HQ);

    % determine total number of HQ and LQ bases for each nucleotide type
    Aa_HQ_and_LQ = (A + a);
    Tt_HQ_and_LQ = (T + t);
    Cc_HQ_and_LQ = (C + c);
    Gg_HQ_and_LQ = (G + g);
    ref_HQ_and_LQ = (forward + reverse);

    % determine percentage of HQ bases for each nucleotide type
    FrHQ_for_Aa = (A_HQ + a_HQ) / (A + a);
    FrHQ_for_Tt = (T_HQ + t_HQ) / (T + t);
    FrHQ_for_Cc = (C_HQ + c_HQ) / (C + c);
    FrHQ_for_Gg = (G_HQ + g_HQ) / (G + g);
    FrHQ_for_ref = (forward_HQ + reverse_HQ) / (forward + reverse);

    % now save output line for this position ...
    % headers: refseq position library refbase A a T t C c G g , . depth FrAa FrTt FrCc FrGg SkewAmod SkewTmod SkewCmod SkewGmod MQ(Aa) MQ(Tt) MQ(Cc) MQ(Gg) MQ(,.) MQ(all) deltaQ-A deltaQ-T deltaQ-C deltaQ-G Aa_HQ_and_LQ Tt_HQ_and_LQ Cc_HQ_and_LQ Gg_HQ_and_LQ ref_HQ_and_LQ FrHQ_for_Aa FrHQ_for_Tt FrHQ_for_Cc FrHQ_for_Gg FrHQ_for_ref quality_cutoff
    % where A is the count of high-quality forward reads with base changed to A
    %   ... a is the count of high-quality reverse reads with base changed to A
    %   ... , and . are the counts of high-quality forward and reverse reads agreeing with the refbase
    %   ... MQ(Aa) is the mean quality of all high-quality bases changed to A or a
    output{end+1} = [refseq,"\t",num2str(position),"\t",library,"\t",refbase,"\t",num2str(A_HQ),"\t",num2str(a_HQ),"\t",num2str(T_HQ),"\t",num2str(t_HQ),"\t",num2str(C_HQ),"\t",num2str(c_HQ),"\t",num2str(G_HQ),"\t",num2str(g_HQ),"\t",num2str(forward_HQ),"\t",num2str(reverse_HQ),"\t",num2str(depth_HQ),"\t",num2str(FrAa_HQ),"\t",num2str(FrTt_HQ),"\t",num2str(FrCc_HQ),"\t",num2str(FrGg_HQ),"\t",num2str(SkewAmod_HQ),"\t",num2str(SkewTmod_HQ),"\t",num2str(SkewCmod_HQ),"\t",num2str(SkewGmod_HQ),"\t",num2str(mean_Aa_qual_HQ),"\t",num2str(mean_Tt_qual_HQ),"\t",num2str(mean_Cc_qual_HQ),"\t",num2str(mean_Gg_qual_HQ),"\t",num2str(mean_agree_qual_HQ),"\t",num2str(mean_all_qual_HQ),"\t",num2str(delta_mean_Aa_qual_HQ),"\t",num2str(delta_mean_Tt_qual_HQ),"\t",num2str(delta_mean_Cc_qual_HQ),"\t",num2str(delta_mean_Gg_qual_HQ),"\t",num2str(Aa_HQ_and_LQ),"\t",num2str(Tt_HQ_and_LQ),"\t",num2str(Cc_HQ_and_LQ),"\t",num2str(Gg_HQ_and_LQ),"\t",num2str(ref_HQ_and_LQ),"\t",num2str(FrHQ_for_Aa),"\t",num2str(FrHQ_for_Tt),"\t",num2str(FrHQ_for_Cc),"\t",num2str(FrHQ_for_Gg),"\t",num2str(FrHQ_for_ref),"\t",num2str(quality_cutoff)];
  end % if ~isempty
end % while
for i = 1:length(output)
  fdisp(fid_out,output{i});
end % for
fclose(fid_in);
fclose(fid_out);
