Skip to main content
. 2009 Aug 27;10(Suppl 8):S4. doi: 10.1186/1471-2105-10-S8-S4

Table 1.

Regular expression patterns for the detection of residue mentions in text. The patterns recognise single (SITE) or multiple wild-type residue sites (SITES), a sequence range or residue pair (RANGE/PAIR), and point mutation (MUTATION). The set covers abbreviated notations of residues as well as grammatic expressions found in text.

RANGE-TO = ("-"+ ("to" "-+")? |"to");
CONVERT-TO = ("to"|"-"+">"?);
XAA = ("X"|"XAA"|"xaa");
POS = (1–9)(0–9)*;
RESN1 = [ARNDCQEGHILKMFPSTWYVOUBZX];
RESN3 = ([aA]la|ALA | [aA]rg|ARG | [aA]sn|ASN | [aA]sp|ASP | [cC]ys|CYS
| [gG]ln|GLN | [gG]lu|GLU | [gG]ly|GLY | [hH]is|HIS | [iI]le|ILE
| [lL]eu|LEU | [lL]ys|LYS | [mM]et|MET | [pP]he|PHE | [pP]ro|PRO
| [sS]er|SER | [tT]hr|THR | [tT]rp|TRP | [tT]yr|TYR | [vV]al|VAL
| [pP]yl|PYL | [sS]ec|SEC | [aA]sx|ASX | [gG]lx|GLX | [xX]aa|XAA);
RESNF = ([aA]lanine | [aA]rginine | [aA]sparagine | [aA]spart(ate|ic acid) | [cC]ysteine
| [gG]lutamine | [gG]lutam(ate|ic acid) | [gG]lycine | [hH]istidine | [iI]soleucine
| [lL]eucine | [lL]ysine | [mM]ethionine | [pP]henylalanine | [pP]roline
| [sS]erine | [tT]hreonine | [tT]ryptophan | [tT]yrosine | [vV]aline
| [pP]yrrolysine | [sS]elenocysteine | [aA]spartic acid or [aA]sparagine
| [gG]lutamic acid or [gG]lutamine);
SITE = ((RESN3 | RESNF) POS "residue"?
| (RESN3 | RESNF)"-"+ POS "residue"?
| (RESN3 | RESNF)"residue"? "at position"? POS "residue"?
| (RESN3 | RESNF)"("POS")" "residue"?
|"amino acid"? "residue" "at position"? POS
|"amino acid" "residue"? "at position"? POS
| RESNF "residue" POS);
SITES = (RESNF"s"((","|"and"|"or") RESNF"s")*
| RESNF"s"? ("at position" "s"?)? (","|"and"|"or") (("at position" "s"?)? (","|"and
|"or") POS)+
| RESNF "residue" "s"?
| RESN3 "residue" "s"? ("at position" "s"?)? POS (("at position" "s"?)? (","|"and" | "or") POS)+
| RESN3 "residue" "s"?
|"residue" "s"? ("at position" "s"?)? POS (","|"and"|"or") POS)+
| (RESN3 | RESNF)"for"(RESN3 | RESNF)"at position"POS (","|"and"|"or") POS)+
| RESNF ("," | "and" | "or") POS)* "residue" "s"?);
RANGE/PAIR = ("residue" "s"? ("," | "and" | "or") RANGE-TO POS)+
|"amino acid" "residue"? "s"? ("," | "and" | "or") RANGE-TO POS)+
| ("resiude" "s"?)? "at position" "s"? ("," | "and" | "or") RANGE-TO POS)+
| RESI RANGE-TO RESI);
MUTATION = (RESN1 POS RESN1
| RESN1 "-" POS "-" RESN1
| RESN1 "(" POS ")" RESN1
| RESI CONVERT-TO (RESN3 | RESNF)
| RESI RESN3
|"from" (RESNF | RESN3) CONVERT-TO (RESNF | RESN3)"at position"POS
| (RESN3 | RESNF) "for" (RESN3 | RESNF) "at position" POS
| RESI ("-"+ | CONVERT-TO) RESI "substitution");