|
Algorithm 2: QGA for Plagiarism Detection
|
|
Input: Dataset Xsrc; Suspicious Document Xsusp; QGA Parameters, WordNet |
-
1-
while n < size of documents do
-
2-
S ← Sentence Segmentation (Xsrc)
-
3-
y ←0
-
4-
While y < S ! = NULL do
-
5-
T ← Tokenization (S)
-
6-
z←0
-
7-
while z < size of T do
-
8-
M ← POS Tagging (T)
-
9-
N ← Lemmatization (M)
-
10-
z++
-
11-
end
-
12-
tf-isf (N)
-
13-
y++
-
14-
end
-
15-
n++
-
16-
end
-
17-
t ← 0
-
18-
while termination condition not satisfied do
-
19-
t ← t+1
-
20-
Call Algorithm 1 // QGA Procedure
-
21-
Return Best_Pop ←New_Pop // Store the best solution among P(t)
-
22-
end
-
23-
sim1← sum of words in Xsusp // the number of common word-level concepts in Xsusp
-
24-
sim2← sum of words in Xsrc // the number of common word-level concepts in Xsrc
-
25-
-
26-
Doc. Status = =Plagiarized
-
27-
end
-
28-
For each suspicious-source word pair (wq,wk) //To compute the semantic similarity
-
29-
-
-
30-
of each word.
-
31-
- Only synsets in the same POS class as the word are retrieved for these lists
-
32-
end
-
33-
Count ←The common words between the compared suspicious-source sentence
//
is the best set of selected source sentences
extracted from QGA’ procedure
-
34-
If count > τ
-
35-
Doc. Status = = Plagiarized
-
36-
end
-
37-
Else
-
38-
Doc. Status = = not plagiarized
-
39-
end
-
40-
Output = Doc. Status
|