| Algorithm 1 GSG Pseudo-code |
| Input: |
| Training Set(D) = {Set(min), set(max)} |
| # N is the number of fault class samples. |
| # M is the number of normal class samples. |
| Output: |
| A training Set (D′) with more fault class samples than the original training Set(D). |
| 1: # Calculate the class imbalance rate of the original dataset. |
| 2: b = random(r,1) |
| 3: a = int(M*b-N) # Calculate the number of samples to be expanded in Set(min). |
| 4: C = BIC(Set(min)) # Determine an optimal number of components. |
| 5: Gi = GMM(Set(min), C) # Use GMM to cluster Set(min) into C clusters. |
| 6: for i ←1 to C do |
| 7: Set(G″i) = {} # Create C empty sample sets. |
| 8: if then |
| 9: |
| # Use SMOTE to synthesize new samples based on the Gi cluster samples. |
| 10: G′I = GMM(Set(min) + Set(xnew_k), C) |
| # Use GMM again to cluster Set(min) and Set(xnew_k) into C clusters. |
| 11: for k ←1 to int(a* num(Gi)/N) do |
| 12: if xnew_k in G′i then # The xnew_k is the k-th sample in Set(x_new). |
| 13: Add the xnew_k to Set(G″i) # Add the xnew_k sample to Set(G″i). |
| 14: if xnew_k not in G′i then |
| 15: Remove the xnew_k |
| 16: end if |
| 17: end if |
| 18: end for |
| 19: end if |
| 20: Set(N′) = concatenate (Set(G″i)) |
| 21: end for |
| 22: Set(D′) = concatenate (Set(max), Set(min), Set(N′)) |
| 23: return Set(D′) |