#############1: differential expression clear; data=csvread('gene_expression_254.csv');%'miRNA_expression_250.csv' group=data(1,:); exp=data(2:end,:); [ngenes ns]=size(exp); clear p p2 ; for r=1:ngenes [h(r) p(r)]=ttest2(exp(r,group==1),exp(r,group==0)); [p2(r), h2(r)] = ranksum(exp(r,group==1),exp(r,group==0));%runksum test is equivalent to a Mann-Whitney U-test end xlswrite('pvalue_gene.xlsx',cat(2,p',p2'),1);%'pvalue_miRNA.xlsx' ################2: linear regression clear; fidw=fopen('miRNA_gene_regression_247.csv','w'); % OUTput file name by part; fmt_record='%s,%s,%e,%e,%e\n'; [miRData miRName]=xlsread('miRNA_cc_247_human.xlsx'); [nmiRs nsamples]=size(miRData); % Input expression data and gene name; [Y GeneName]=xlsread('gene_cc_247.xlsx'); nGenes=size(Y,1); % Calculation and output regression results; CVALUE=1E-4; for i=1:nmiRs X=cat(2,ones(nsamples,1),miRData(i,:)'); results=olsvectory(Y',X); for j=1:nGenes clear pv record; pv=results.pvalue(2,j); if pv=n_X/2 || std(tmp(~isnan(tmp)))<1e-10 Itmp(i)=0; end end I=find(Itmp==1); I=I'; W=X(I,:); %%%%%%%%%% standardization for X according to rows % X m*n matrix % W m*n matrix function W=standardization(X) [m,n]=size(X); mu=mean(X,2); S=std(X,0,2); X=X-repmat(mu,1,n); W=diag(1./S)*X; end function results=olsvectory(y,x) % PURPOSE: least-squares regression %--------------------------------------------------- % ???? ?????y?x, ?????????? % % USAGE: results = olsvectory(y,x) % where: y = dependent variable vector (nobs x m) % x = independent variables matrix (nobs x nvar) %--------------------------------------------------- % RETURNS: a structure % results.meth = 'ols' % results.beta = bhat (nvar x m) % results.tstat = t-stats (nvar x m) % results.pvalue= p-values (nvar x m) % results.bstd = std deviations for bhat (nvar x m) % results.yhat = yhat (nobs x m) % results.resid = residuals (nobs x m) % results.sige = e'*e/(n-k) (1*m) % results.rsqr = rsquared (1*m) % results.rbar = rbar-squared (1*m) % results.dw = Durbin-Watson Statistic % results.nobs = nobs % results.nvar = nvars % results.y = y data vector (nobs x m) % results.bint = (nvar x2 ) vector with 95% confidence intervals on beta %Warning!!!!!! R-square and the F statistic are not well-defined unless x has a column of ones. %--------------------------------------------------- % SEE ALSO: prt(results), plt(results) %--------------------------------------------------- % Change by Xiangzhong Fang from the program by % James P. LeSage, Dept of Economics % University of Toledo % 2801 W. Bancroft St, % Toledo, OH 43606 % jlesage@spatial-econometrics.com % % Barry Dillon (CICG Equity) % added the 95% confidence intervals on bhat if (nargin ~= 2); error('Wrong # of arguments to ols'); else [nobs nvar] = size(x); [nobs2 m] = size(y); if (nobs ~= nobs2); error('x and y must have same # obs in ols'); end; end; results.meth = 'ols'; results.y = y; results.nobs = nobs; results.nvar = nvar; if nobs < 10000 [q r] = qr(x,0); xpxi = (r'*r)\eye(nvar); else % use Cholesky for very large problems xpxi = (x'*x)\eye(nvar); end; results.beta = xpxi*(x'*y); results.yhat = x*results.beta; results.resid = y - results.yhat; sigu = sum(results.resid.*results.resid);%m*1 vector results.sige = sigu/(nobs-nvar); tmp = (diag(xpxi))*(results.sige); sigb=sqrt(tmp); results.bstd = sigb; %tcrit=-tinv(1-.025,nobs); %results.bint=[results.beta-tcrit.*sigb, results.beta+tcrit.*sigb]; results.tstat = results.beta./(sqrt(tmp)); ym = y - repmat(mean(y),nobs,1); rsqr1 = sigu; rsqr2 = sum(ym.*ym); %m*1 vector results.rsqr = 1.0 - rsqr1./rsqr2; % r-squared %rsqr1 = rsqr1/(nobs-nvar); %rsqr2 = rsqr2/(nobs-1.0); %if rsqr2 ~= 0 %results.rbar = 1 - (rsqr1/rsqr2); % rbar-squared %else % results.rbar = results.rsqr; %end; results.pvalue=2*(1-tcdf(abs(results.tstat),nobs-nvar));%Fang %ediff = results.resid(2:nobs) - results.resid(1:nobs-1); %results.dw = (ediff'*ediff)/sigu; % durbin-watson