%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% SRAP analysis and simulations for eisosomes data
% Written by Mike Lacy for Berro lab at Yale University. 
% Last updated May 2017.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This code cannot be executed as-is. Requires some input data/files:
% lengths[] - array of measured eisosome lengths from various images
% files{} - one file for each image containing the following data, generated from ImageJ macro:
%  f[] - filament measure results from ImageJ macro giving filament end 
%    line traces
%  c[] - spot data, results from ImageJ macro measuring circles for each 
%    SRAP spot
%  smlm[] - output from GDSC SMLM PeakFit plugin, containing columns for 
%    frame, X, Y, Precision, Signal Intensity, SNR
% Note, all X,Y measurements from ImageJ are given in Pixel units, and
%   the origin (0,0) is the *top* left, so any distance calculations 
%   between measured and calculated coordinates need to invert Y.
% User can make edits at locations marked with XX in comments
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Specify sections to execute:
doFilQuant = 0;
doSimulatedDatasets = 1;
doFitSimulations = 0;
showPlots = 0;  %set to 1 to show plots and pause for each filament fit, or 0 to skip (faster)
nmPerPixel = 70;	%determined for our microscope

%% Import and fit filament ends from intensity profile line traces
% call to filament_data to arrays containing filament profiles
% and spot data, from ImageJ macro results

% XX user will edit here:
files = {'BP_filament_data3','BO_filament_data3','BM_filament_data3','BL_filament_data3','BK_filament_data3'}; 
%could be one, or a list of files with the Results data below:
% Each file in the list should have arrays f[], c[], and smlm[].
%   Other data table formats could be used for alternate analyses but would
%   need to adapt the code written as below.
%
%f[] has filament line "measure" result followed by pixel intensity profile.
% filament line entries where length f(:,2) is only 1-2 pixels means the 
% spot was off a filament and I measured a "null" entry.
%columns in f[] = 1:n 2:Area 3:Mean 4:X(line start) 5:Y(line start) 
% 6:ANGLE(deg) 7:slice 8:LENGTH 9:Pixel(profile) 10:Value 11:SpotID
%
%c[] has spot measure results for all the SRAP events measured in ImageJ
% macro, in the same order as the filament line measures.
%columns in c[] = 1:n 2:Area 3:Mean 4:X 5:Y 6:XM 7:YM 8:slice
% XM,YM are the brightness-weighted center of mass
%
%smlm[] has results of GDSC SMLM PeakFit localizations on frames 50-200 of recorded movies.
% The results table has been cleaned in Excel so that only the following columns are used:
% columns in smlm[] = 1:frame, 2: fitX, 3: fitY, 4:Precision, 5:Signal, 6:SNR 

if(doFilQuant)

allData = struct('datafile', files, 'fits', cell(1), 'dists', [], 'distsnm', []);
%initialize array of data structures to store fits, distance calculations, etc. results

eisfittype = fittype('a*0.5*(1-erf((x-m)/(sqrt(2)*1.85)))',...
    'dependent',{'y'}, 'independent',{'x'},'coefficients',{'a','m'});
% custom fit model for eisosome end intensity. This function is the result of  
%    the Heaviside step function convolved with a Gaussian with sigma set 
%    to 1.85 (in pixels=135nm) for diffraction-limited PSF in our microscope.

smlmcutoff = 1;	%distance radius, in pixels, of how far away to allow SMLM hits from a given spot centroid
allSmlm = [];

for b = 1:length(files)
    run(files{b});
    ind = f(:,8)>3; %get the row indices of true line traces.
    % if a line trace was only 1 or 2 pixels, it was a 'null' entry, a spot that wasn't near a filament.
    lines = f(ind,:);
    profile = [];
    
    allSmlm = [allSmlm; smlm];

    for i=1:length(lines)
        spotID = lines(i,11);
        profile = f(f(:,11)==spotID,9:10);
        profile(:,2) = profile(:,2)-min(profile(:,2));  %subtract baseline
        
        adjust = 0;
        if length(profile)<8    %fit() doesn't work if too few points in trace
            adjust = 1; %set flag to subtract the extra points later.
            	%append data points: add 2 initial points at max height
            profile = [0, max(profile(:,2)); 1, max(profile(:,2)); ...
                        profile(:,1)+2, profile(:,2)];
        end
        % Pad end of the intensity trace with zeros. Sometimes it was hard
        % to get much of the line past the end, especially when the images 
        % were crowded. This improves fitting with error function
        profile = [profile; profile(end,1)+1, 0; profile(end,1)+2, 0];

        %% Fitting: 
        eisfit = fit(profile(:,1), profile(:,2), eisfittype, 'StartPoint', [max(profile(:,2)), 0.5*max(profile(:,1))]);
        allData(b).fits{i} = eisfit;
        ci = confint(eisfit);
        allData(b).errIntA(i) = eisfit.a - ci(1,1);
        allData(b).errIntM(i) = eisfit.m - ci(1,2);

        if(showPlots)
            figure(1), clf, hold on;
            plot(profile(:,1), profile(:,2), '.', 'MarkerSize', 16);
            plot(allData(b).fits{i});
            title(sprintf('filament #%d, i = %d', lines(i,11),i));
            legend('data', 'fit to eisfittype model');
            plot([eisfit.m eisfit.m], [0 eisfit.a*0.5], '-b');

            hold off;
        end

        
        %% Calculate XY coordinate position of end, using line geometry
        theta = lines(i,6);
    	lineLength = lines(i,8);

        %ImageJ "Measure" result gives the *midpoint* of the line - find Start
        lineStartX = lines(i,4) - (lineLength/2)*cosd(theta);
        lineStartY = imageSizeY - lines(i,5) - (lineLength/2)*sind(theta);    %ImageJ uses flipped Y coordinate, so the origin (0,0) is the top left.

        spotX = c(spotID,6);    %center of mass of spot measure
        spotY = imageSizeY - c(spotID,7);
        
        h = eisfit.m;  %position of the end step, on line profile
        if(adjust)	% added dummy values to profile[] to help fitting
            h = h - 2;
            profile = profile(3:end,:);
        end
        
        xEnd = lineStartX + h*cosd(theta);
        yEnd = lineStartY + h*sind(theta);

        allData(b).profiles{i} = profile;
        allData(b).h(i) = h;
        allData(b).theta(i) = theta;
        allData(b).lineLength(i) = lineLength;
        allData(b).spotX(i) = spotX;
        allData(b).spotY(i) = spotY;
        allData(b).lineStartX(i) = lineStartX;
        allData(b).lineStartY(i) = lineStartY;
        allData(b).xEnd(i) = xEnd;
        allData(b).yEnd(i) = yEnd;
        
        %from the single-molecule localizations, find all the locs that are within () pixels of the spotX/Y
        smlmHits = find(abs(smlm(:,2)-c(spotID,6))<smlmcutoff & abs(smlm(:,3)-c(spotID,7))<smlmcutoff);

        allData(b).smlm{i} = smlm(smlmHits,:);
        allData(b).smlm{i}(:,3) = imageSizeY - allData(b).smlm{i}(:,3);   %flip Y coordinate
        allData(b).Nsmlm(i) = length(smlmHits);
        if allData(b).Nsmlm(i)>1
            errs = sqrt((allData(b).smlm{i}(:,2)-mean(allData(b).smlm{i}(:,2))).^2 ...
                +(allData(b).smlm{i}(:,3)-mean(allData(b).smlm{i}(:,3))).^2);
            allData(b).smlmRMSE(i) = sqrt(mean(errs.^2));
            %root mean square error of smlm localizations, in pixel distance
        else
            allData(b).smlmRMSE(i) = NaN;
        end
        
        if allData(b).Nsmlm(i)>0
            allData(b).tAppear(i) = min(allData(b).smlm{i}(:,1));
        else
            allData(b).tAppear(i) = NaN;
        end
        
        %Euclidean distance to the end point, in pixels and nm
        d = sqrt((spotX-xEnd)^2 + (spotY-yEnd)^2);   
        allData(b).dists(i) = d;
        allData(b).distsnm(i) = d*nmPerPixel;
        
        %calculate spot's distance along eisosome profile line, 
        % given angle from (line start), (end X,Y), to (spot X,Y)
        vec1 = [lineStartX-xEnd, lineStartY-yEnd, 0];
        vec2 = [spotX-xEnd, spotY-yEnd, 0];
        phi = atan2(norm(cross(vec1,vec2)),dot(vec1,vec2));
        allData(b).pe(i) = d*cos(phi);
        allData(b).distToLine(i) = d*sin(phi);
        allData(b).phi(i) = rad2deg(phi);
        
        if(allData(b).Nsmlm(i)>1)
            for s = 1:length(allData(b).smlm{i}(:,1))
                allData(b).superd{i}(s) = sqrt((allData(b).smlm{i}(s,2)-xEnd)^2 + (allData(b).smlm{i}(s,3)-yEnd)^2);
                vec1 = [lineStartX-xEnd, lineStartY-yEnd, 0];
                vec2 = [allData(b).smlm{i}(s,2)-xEnd, allData(b).smlm{i}(s,3)-yEnd, 0];
                suphi = atan2(norm(cross(vec1,vec2)),dot(vec1,vec2));
                allData(b).superpe{i}(s) = allData(b).superd{i}(s)*cos(suphi);
                allData(b).superdistToLine{i}(s) = allData(b).superd{i}(s)*sin(suphi);
            end
        else
            allData(b).superpe{i} = allData(b).pe(i);   %just take the pe value based on the spot if no quality super-res loc's
        end

        allData(b).superpeAvg(i) = mean(allData(b).superpe{i});
        
        if(showPlots)
            figure(2), clf, hold on;    
            xPend = lineStartX + lineLength*cosd(theta);   %x,y of profile end
            yPend = lineStartY + lineLength*sind(theta);
            plot([lineStartX, xEnd, xPend], [lineStartY, yEnd, yPend], '*-');
            plot(lineStartX, lineStartY, '*r', 'MarkerSize', 16); %filament line start
            plot(spotX, spotY, 'o');
            plot(allData(b).smlm{i}(:,2), allData(b).smlm{i}(:,3), 'r.','MarkerSize',10)
            axis equal;
            title(sprintf('Filament and spot #%d, distance = %.4g pix', spotID, d));
	    pause(3);
        end
    end
end

superpe = [];
superpeAvg = [];
for i=1:numel(allData)
    superpe = [superpe, [allData(i).superpe{:}]];
    superpeAvg = [superpeAvg, [allData(i).superpeAvg]];
end

% can set some thresholds to eliminate bad fits or outliers:
dtl=[allData(:).distToLine];
errIntMs = [allData(:).errIntM];
errIntAs = [allData(:).errIntA];
cutoffDtl = mean(dtl) + 2*std(dtl);
cutoffM = mean(errIntMs) + 2*std(errIntMs);
cutoffA = mean(errIntAs) + 2*std(errIntAs);
eliminate = [dtl>cutoffDtl] | [errIntMs>cutoffM] | [errIntAs>cutoffA];
% there is some overlap among these (e.g. a bad fit gives outlier in both M and A). typically flags only 5-10% of spots

% superpe(eliminate) = [];	%remove data if they are flagged as bad fits or outliers
% superpeAvg(eliminate) = [];

measureData = superpeAvg .* nmPerPixel;

end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% Eisosome filaments distance calculation and simulations

run('eisLengths.m');    %file that loads a list of all measured eisosome lengths
% XX paste measurement or load from data file, an array called lengths(),
% results of measured in imageJ in units of pixels.
eis = lengths .* nmPerPixel; % get lengths of eisosome filaments in nm
meanLength = mean(eis);
stdLength = std(eis);

%% Simulations to generate datasets of 'distance from end' distributions of
% recovery event positions for each filament length.
if(doSimulatedDatasets)

nSim = 10000;
%values in nm, XX determined from experimental data or simulations XX:
spotNoise = 30; % mean precision of PeakFit single-molecule localizations
endNoise = 60;	% standard deviation of simulated error-function fits
endError = -35;  % fitting noisy intensity profiles with erf model has some systematic error
skewNoise = 55; % standard deviation for simulated fits with "dynamic end" artifact
skewEnd = -35;	% mean end error for simulated fit errors with "dynamic end" artifact
endZone = 200;	% hybrid model, length of 'dynamic zone' in nm. Could try any value.

uniformModel = zeros(length(eis),nSim);
endModel = zeros(length(eis),nSim);
endModelskew = zeros(length(eis),nSim);
hybridModel = zeros(length(eis),nSim);

for i = 1:length(eis)
	% uniform model: uniform distribution of positions 0 to 0.5*(eis,nm) + spot loc error - end loc at zero w/error.
	uniformModel(i,:) = rand(nSim,1).*(0.5*eis(i)) + spotNoise.*randn(nSim,1) - (endError + endNoise.*randn(nSim,1));

	% end model: spot localization at zero w/error - end localization at zero w/error.
	endModel(i,:) = spotNoise.*randn(nSim,1) - (endNoise.*randn(nSim,1)); %can also add endError here to account for bias of erf() model fit.

	% end model: spot localization at zero w/error,  end localization shifted by additional skew value.
	endModelskew(i,:) = spotNoise.*randn(nSim,1) - (endError + skewEnd + skewNoise.*randn(nSim,1));

	% hybrid model: uniform distribution of positions 0 to endZone+ spot loc error - end loc at zero w/error.
	hybridModel(i,:) = rand(nSim,1).*(endZone) + spotNoise.*randn(nSim,1) - (endError + endNoise.*randn(nSim,1));
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% Plotting
% make histograms of analyzed data and models

figure, hold on;
bins = -500:50:1200;
leg = {}; 

histogram(uniformModel(:), bins,'Normalization', 'probability', 'DisplayStyle', 'stairs');
leg{end+1}='sim: uniform model';
histogram(endModel(:), bins,'Normalization', 'probability', 'DisplayStyle', 'stairs');
leg{end+1}='sim: end model';
histogram(endModelskew(:), bins,'Normalization', 'probability', 'DisplayStyle', 'stairs');
leg{end+1}='sim: end model with dynamic skew noise';
histogram(measureData(:), bins,'Normalization', 'probability', 'DisplayStyle', 'stairs', 'EdgeColor', [0 0 0]);
leg{end+1}='filament traces/super-res locs Avg to end';

legend(leg);
xlabel('position from eisosome filament end, nm');
ylabel('probability');

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% Fit simulations test
% A test of the accuracy of the error function model for fitting the eisosome end intensity profile.
% Simulate a set of emitter positions, create an intensity profile as a sum of Gaussians,
% then do fit and compare to true position.
%
% Because of the heavy load of 100-1000x repeated fitting function, this 
% section may take several minutes to run.
if(doFitSimulations)

showPlots = 0;	% set to 1 to show plots of traces and fits, very slow.
doDiscrete = 0;   % set to 1 to run simulations for discrete n emitters
doFullDistro = 1; % set to 1 to run simulations for full distribution of emitter numbers 

x = 0:10;     % range to use for x positions
doUpstream = 1; % set to 1 to include emitters on the eisosome upstream of simulated x
filamentPixelEnd = 5;   % x position of true filament end
Ntests = 1000;
maxNCenters = 20; % max # for discrete n emitters

%XX Variables determined emprically or supplied by user estimates:
sig = 1.85;   % diffraction-limited PSF radius (pixels), use as stdev for Gaussian emitter profile
nmPerPixel = 70;    % camera pixel size
labelFrac = 0.03; % fraction of labeled sites in eisosome, for calculating numbers of emitters
pil1PerNm = 2.8;    % density of pil1 molecules in eisosomes, molecules/nm
Ynoise = 1; % relative value of noise to add to Y intensity values. set to 0 if no noise to add
%^this noise term is somewhat arbitrary, as fluor profile is in AU. Try a
%few values and compare visually to the profiles extracted from data images
endSkew = ez  % "dynamic end artifact" by adding N extra fluors at the end position.
% i.e. how many new fluorophores will be added during the initial frames used to make measurement
% can set to 1 or use a more heavy weighted "skew" to correspond to faster dynamics

eisfittype = fittype('a*0.5*(1-erf((x-m)/(sqrt(2)*1.85)))',...
    'dependent',{'y'}, 'independent',{'x'},'coefficients',{'a','m'});
% same fit model equationas used for data fitting

%% discrete simulations of specified number of emitters
if(doDiscrete)

metricE = zeros(Ntests,maxNCenters);
metricEdb = zeros(Ntests,maxNCenters);

for nC = 1:maxNCenters
    fprintf('Running fit test for nC = %d emitters...\n',nC);
    for i=1:Ntests
        centers = rand(nC,1)*filamentPixelEnd; %uniformly random in range from 0 to 5
        pdist = [];
        pdist2 = [];
        comby = zeros(1,numel(x));
        oneFluor = [];
        for k=1:nC
            pdist = makedist('Normal','mu',centers(k),'sig',sig);
            oneFluor = pdf(pdist,x);
            comby = comby + oneFluor;
            if(doUpstream)
                %also put an emitter in the -5:0 range and get its tail contribution to the profile on 0:10
                pdist2 = makedist('Normal','mu',-centers(k),'sig',sig);
                comby = comby + pdf(pdist2,x);
            end
        end
        
        if(endSkew>0) %put an extra fluorophore on the filament end
                pdist = makedist('Normal','mu',filamentPixelEnd,'sig',sig);
                comby = comby + pdf(pdist,x).*endSkew; %can set to 1 or use a more heavy weighted "skew"
        end
        
        if(Ynoise>0)  % add noise to the sum intensity profile.
            comby = comby + Ynoise*randn(1,numel(comby));
        end
        
        comby = comby - min(comby); % adjust so baseline =0

        % do fits, with appropriate bounds
        ferf = fit(x',comby',eisfittype,'StartPoint',[max(comby),0.5*max(x)],...
            'Lower',[0, min(x)], 'Upper',[max(comby), max(x)]);

        if(showPlots)
            figure(1);
            plot(x, comby, 'o');
            hold on;
            plot(ferf);
            legend(sprintf('data for %d emitters',nC),'ERF fit');

            if(endSkew>0)
                text(filamentPixelEnd,0,'endSkew ON');
            end
            hold off;
            pause(4);
        end

        %Calculate the difference from the fit result to the true position:
        % metric[]: calculates the distance from the last emitter position
        % metric[]db: calculate the distance from the true end of the filament, which may or may not have a fluor there.
        metricE(i,nC) = ferf.m - max(centers);
        metricEdb(i,nC) = ferf.m - filamentPixelEnd;
    end
end
end

%% full distribution of emitters, not each discrete N
if(doFullDistro)
fprintf('Running fit tests for full distribution of emitters with %g labeling...\n',labelFrac);
%generate list of nCenters according to binomial distribution for some labeling fraction
% and for estimated dimensions of eisosome, using #trials = # Pil1 in the pixel length of the profile.
binoC = random('Binomial',round(filamentPixelEnd*nmPerPixel*pil1PerNm),labelFrac,Ntests,1);
binoC(binoC==0) = [];   %remove any cases of zero fluors. 
% Be careful in case there is a sizeable # 0's at very low labelfrac?

metricEfullD = zeros(numel(binoC),1);
for i=1:length(binoC)
    %simulate centers
    centers = rand(binoC(i),1)*filamentPixelEnd; %uniformly random in range from 0 to 5
    pdist = [];
    pdist2 = [];
    comby = zeros(1,numel(x));
    oneFluor = [];
    for k=1:binoC(i)
        pdist = makedist('Normal','mu',centers(k),'sig',sig);
        oneFluor = pdf(pdist,x);
        comby = comby + oneFluor;
        if(doUpstream)
            %also put an emitter in the -5:0 range and get its contribution to the 0:10 profile
            pdist2 = makedist('Normal','mu',-centers(k),'sig',sig);
            comby = comby + pdf(pdist2,x);
        end
    end
    if(endSkew>0) %put an extra fluorophore on the filament end
            pdist = makedist('Normal','mu',filamentPixelEnd,'sig',sig);
            comby = comby + pdf(pdist,x).*endSkew; %can set to 1 or use a more heavy weighted "skew"
    end

    if(Ynoise>0)  % add noise to the sum intensity profile.
        comby = comby + Ynoise*randn(1,numel(comby));
    end

    comby = comby - min(comby); % adjust so baseline =0

    % fits with appropriate bounds
    ferf = fit(x',comby',eisfittype,'StartPoint',[max(comby),0.5*max(x)],...
        'Lower',[0, min(x)], 'Upper',[max(comby), max(x)]);

    if(showPlots)
        figure(1);
        plot(x, comby, 'o');
        hold on;
        plot(ferf);
        legend(sprintf('data for %d emitters',binoC(i)),'ERF fit');
        if(endSkew>0)
            text(filamentPixelEnd,0.1,'endSkew ON');
        end
        hold off;
        pause(4);
    end
    metricEfullD(i) = ferf.m - filamentPixelEnd;
end
end

%% plot histograms for fit error simulations
if(doFullDistro)
% histogram of errors for full simulated distribution of fluor numbers
    figure, hold on;
    leg1 = {};

    histogram(metricEfullD.*nmPerPixel, -350:25:350,'Normalization','probability');
    leg1{end+1} = 'ERF model fitting net error';

    xlabel('end error vs. true end, pixels');
    ylabel('probability');
    if(endSkew>0)
        title(sprintf('Sum error vs. true end position, N=%d\nwith simulated distribution of fluor #s at %g labeled,\nusing pixel zone end at %d as true end position\n!Skewed by %d extra fluors at true end!',numel(binoC),labelFrac,filamentPixelEnd,endSkew));
    else
        title(sprintf('Sum error vs. true end position, N=%d\nwith simulated distribution of fluor #s at %g labeled,\nusing pixel zone end at %d as true end position', numel(binoC), labelFrac, filamentPixelEnd));
    end
    legend(leg1);
end
end