Supplementary material for Faulhammer et al. (2000) Proc. Natl. Acad. Sci. USA 97 (4), 1385-1389.
PERMUTE
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#define BITS 10 /*NUMBER OF BITS WANT TO ENCODE*/
#define SPACER_L 5 /*SIZE OF SPACER IN BETWEEN BITS*/
#define BIT_L 15 /*NUMBER OF NUCLEOTIDES ENCODING EACH BIT*/
#define WIN_SIZE 20 /*WHEN COMPARING TWO SEQUENCES, SIZE OF WINDOW FOR COMPARISON*/
#define HAMMING 15 /*MINIMUM HAMMING DISTANCE BETWEEN TWO WINDOWS*/
#define SUB_STR 7 /*MAXIMUM SIZE OF IDENTICAL SUBSTRINGS IN STRINGS*/
#define NUC_ROW 5 /*MAXIMUM SIZE OF NUCLEOTIDES IN A ROW*/
#define BP_WIN_SIZE 8
#define BP_HAMMING 8
int fixer(int fix_error[WIN_SIZE],char P[BITS-1][SPACER_L],char X[2][BITS][BIT_L],int curr_assign[],char curr_pick[BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24])
{
int fix_bit,fix_char,rand_char,num_corr=0;
int k=0;
while((fix_error[k]!=-5)&&(k<WIN_SIZE)&&(num_corr<2))
{
fix_error[k]-=24;
if((fix_error[k]>0)&&(fix_error[k]<(BITS*BIT_L+(BITS-1)*SPACER_L)))
{
fix_bit=fix_error[k]/(BIT_L+SPACER_L);
fix_char=fix_error[k]%20;
if((fix_char<13)&&(fix_char>1))
{
rand_char=lrand48()%3;
if(rand_char==0)
X[curr_assign[fix_bit]][fix_bit][fix_char]=curr_pick[fix_error[k]+24]='A';
if(rand_char==1)
X[curr_assign[fix_bit]][fix_bit][fix_char]=curr_pick[fix_error[k]+24]='T';
if(rand_char==2)
X[curr_assign[fix_bit]][fix_bit][fix_char]=curr_pick[fix_error[k]+24]='C';
num_corr++;
}
}
k++;
}
return(num_corr);
}
main()
{
char P[BITS-
1][SPACER_L]={"TCTAC","ATAAC","CTTAA","TTTAC","TACAA","TCCTT","TCAAT","TCCAA","AC
ACA"};
char
X[2][BITS][BIT_L]={{"TCCTCACATTACTTA","ACTTCCTTTATATCC","TTATAACAAACAT
CC","TTAAAATCTTCCCTC","CTATTTATCCACACC","GCTTCAAACAATTCC","AACTCTCAAAT
TCAA","CTAACCTTTACTTCA","CATTCCTTATCCCAC","CACCCTTTCTCCTCT"},{"CTCTTAC
TCAATTCT","CATATCAACATCTTA","ATCCTCCACTTCACA","ACATAACCCTCTTCA","ACCTT
ACTTTCCATA","GTACATTCTCCCTAC","CATAATCTTATATTC","ATAATCACATACTTC","TCC
ACCAACTACCTA","TTTTAAATTTCACAA"}};
>
char prefix[24]="GGCATCATCATCATCTCGAGAATT",
suffix[32]="CCTTGGTCATTAGGATCCCGTAGTAGTAGTAG";
char curr_pick[BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24];
int
i,j,k,l,num_cor,c_count,currvalue,remain,currunit,fix_bit,fix_char,
rand_char,sta1,sta2,found=0,spacer_unique=0,nuc_repeat=1,fix,curr_
assign[BITS],bits_unique=0, fix_error[WIN_SIZE], fix_error2[WIN_SIZE];
long int temp,trial=0;
found=1;
printf("The prefix is:");
for(i=0;i<24;i++) printf("%c",prefix[i]); printf("\n");
printf("The suffix is:");
for(i=0;i<32;i++) printf("%c",suffix[i]); printf("\n");
found=0;
while(found==0)
{
found=1;
/*confirm that each spacer is different*/
while(spacer_unique==0)
{
spacer_unique=1;
for(i=0;i<BITS-1;i++)
for(j=(i+1);j<BITS-1;j++)
{
k=0;
while((P[i][k]==P[j][k])&&(k<=(SPACER_L-1))) k++;
if (k==SPACER_L)
{
printf("Spacer %d was found to be equal to spacer %d\n",i+1,j+1);
spacer_unique=0;
for(k=0;k<SPACER_L;k++) P[i][k]='X';
temp=lrand48()%2;
c_count=1+temp;
for(k=0;k<c_count;k++)
{
temp=lrand48()%SPACER_L;
if (P[i][temp]!='C') P[i][temp]='C';
else k--;
}
for(k=0;k<SPACER_L;k++)
{
if (P[i][k]!='C')
{
temp=lrand48()%2;
if (temp==0) P[i][k]='A';
if (temp==1) P[i][k]='T';
}
}
printf("Correction for the spacer %d, the string is:",j+1);
for(k=0;k<SPACER_L;k++) printf("%c",P[j][k]); printf("\n");
}
}
}
bits_unique=0;
while(bits_unique==0)
{
bits_unique=1;
for(i=0;i<BITS;i++)
for(j=7-BIT_L;j<BIT_L-7;j++)
{
temp=0;
for(k=0;k<BIT_L;k++)
if ((j+k<BIT_L)&&(j>=0)&&(X[0][i][j+k]==X[1][i][k]))
{
fix_error[temp]=k;
fix_error2[temp]=j+k;
temp++;
}
k=0;
num_cor=0;
if ((temp>=7))
{
while (num_cor<2)
{
if((fix_error[k]<13)&&(fix_error[k]>1))
{
rand_char=lrand48()%3;
if(rand_char==0) X[1][i][fix_error[k]]='A';
if(rand_char==1) X[1][i][fix_error[k]]='T';
if(rand_char==2) X[1][i][fix_error[k]]='C';
num_cor++;
bits_unique=0;
}
k++;
}
}
}
}
for(i=0;i<24;i++) curr_pick[i]=prefix[i];
for(i=0;i<32;i++) curr_pick[i+BITS*(BIT_L)+(BITS-
1)*SPACER_L+24]=suffix[i];
if (found==1) for(currvalue=0;currvalue<=(int)(pow(2.0,(double)BITS)-
1);currvalue++)
{
remain=currvalue;
for(j=0;j<BITS;j++)
{
currunit=(int)pow(2,(BITS-(j+1)));
if(((remain/currunit)!=0)&&(remain>=currunit))
{temp=1; remain-=currunit;}
else temp=0;
for(k=0;k<BIT_L;k++)
curr_pick[SPACER_L*(j)+BIT_L*j+24+k]=X[temp][j][k];
curr_assign[j]=temp;
if (j!=BITS-1)
for(k=0;k<SPACER_L;k++)
curr_pick[(BIT_L*(j+1)+SPACER_L*j)+k+24]=P[j][k];
}
if (found==1)
{
printf("the current pick,representing %d:",currvalue);
for(j=0;j<(BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24);j++)
printf("%c",curr_pick[j]); printf("\n");
}
/*Check current pick to see if it has a run of more than
NUC_ROW As Cs or Ts in it*/
temp=1;
if ( found==1)
for(j=0;j<(BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24-1);j++)
{
if (curr_pick[j]==curr_pick[j+1])
{
fix_error[temp-1]=j;
temp++;
}
else
{
temp=1;
for(k=0;k<10;k++) fix_error[k]=-5;
}
if
((temp>=NUC_ROW)||((temp==4)&&(curr_pick[fix_error[0]]=='C')))
{
printf("Correcting Nucleotides found in a row @
position %d\n", fix_error[0]);
found=0;
k=0;
fixer(fix_error,P,X,curr_assign,curr_pick);
}
}
/*Check to see if there is a Substring within the strand of
length SUB_STR identical to another substring within the strand*/
if (found==1)
for(sta1=0;sta1<(BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24-
SUB_STR);sta1++)
for(sta2=sta1+SUB_STR;sta2<(BITS*(BIT_L)+(BITS-
1)*SPACER_L+32+24-SUB_STR);sta2++)
> {
for(k=0;k<=SUB_STR;k++)
{
fix_error[k]=-5;
fix_error2[k]=-5;
}
temp=0;
i=sta1; j=sta2;
while ((curr_pick[i++]==curr_pick[j++])&&(i-sta1<=SUB_STR))
{
fix_error[temp]=i-1;
fix_error2[temp++]=j-1;
}
if(temp==SUB_STR)
{
printf("position %d matches position %d for %d
contiguous bases--correcting it\n",sta1,sta2,SUB_STR);
found=0;
if (fixer(fix_error2,P,X,curr_assign,curr_pick)==0)
fixer(fix_error,P,X,curr_assign,curr_pick);
}
}
/*Check to see if there is a match within tolerance HAMMING
over a window size WIN_SIZE*/
if (found==1)
for(sta1=0;sta1<(BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24-
HAMMING);sta1++)
for(sta2=sta1+8;sta2<(BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24-
HAMMING);sta2++)
{
for(k=0;k<=HAMMING;k++)
{
fix_error[k]=-5;
fix_error2[k]=-5;
}
temp=0;
i=sta1;j=sta2;
while((i-sta1<WIN_SIZE)&&(j<BITS*(BIT_L+SPACER_L)))
if(curr_pick[i++]==curr_pick[j++])
{
fix_error[temp]=i-1;
fix_error2[temp++]=j-1;
}
if(temp>=HAMMING)
{
printf("%d Hamming distance match over %d bases at
position %d and %d\n",temp,WIN_SIZE,sta1,sta2);
found=0;
if (fixer(fix_error2,P,X,curr_assign,curr_pick)==0)
fixer(fix_error,P,X,curr_assign,curr_pick);
}
}
if(found==1)
for(sta1=(BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24-
BP_HAMMING);sta1>=0;sta1--)
for(sta2=1;(sta2+sta1)<(BITS*(BIT_L)+(BITS-1)*SPACER_L+32+24-
BP_HAMMING)&&((sta1-sta2-BP_WIN_SIZE)>0);sta2++)
{
for(k=0;k<=BP_HAMMING;k++) { fix_error[k]=-5; fix_error2[k]=-5;}
temp=0;
for(i=0;i<=BP_WIN_SIZE;i++)
if(((curr_pick[sta1+sta2+i]=='A')&&(curr_pick[sta1-sta2-
i]=='T'))||((curr_pick[sta1+sta2+i]=='T')&&(curr_pick[sta1-sta2-
i]=='A'))||((curr_pick[sta1+sta2+i]=='C')&&(curr_pick[sta1-sta2-
i]=='G'))||((curr_pick[sta1+sta2+i]=='G')&&(curr_pick[sta1-sta2-
i]=='C'))||((curr_pick[sta1+sta2+i]=='G')&&(curr_pick[sta1-sta2-
i]=='T'))||((curr_pick[sta1+sta2+i]=='T')&&(curr_pick[sta1-sta2-i]=='G')))
{
fix_error[temp]=sta1-sta2-i;
fix_error2[temp++]=sta1+sta2+i;
}
if(temp>=BP_HAMMING)
{
printf("%d base pairing match over %d bases at
position %d and %d\n",temp,BP_WIN_SIZE,sta1+sta2,sta1-sta2);
found=0;
if (fixer(fix_error2,P,X,curr_assign,curr_pick)==0)
fixer(fix_error,P,X,curr_assign,curr_pick);
}
}
}
}
for(i=0;i<2;i++)
for(j=0;j<BITS;j++)
{
printf("For the bit %d set to %d, the string is:",j+1,i);
for(k=0;k<BIT_L;k++) printf("%c",X[i][j][k]);
printf("\n");
}
for(i=0;i<(BITS-1);i++)
{
printf("For the spacer %d the string is:", i+1);
for(j=0;j<SPACER_L;j++) printf("%c",P[i][j]);
printf("\n");
}
}