LEARNet: A Learning Entropy-Aware Representation Network for Educational Video Understanding

View full-text article in PMC

. 2025 Dec 19;28(1):3. doi: 10.3390/e28010003

Algorithm 2: Spatial-Semantic Decoder (SSD)

1:
Input: Test keyframe $I_{t}$ , Reference set $R = ({r e g i o n}_{1}, {l a b e l}_{1}), \dots, ({r e g i o n}_{k}, {l a b e l}_{k})$ , Detection threshold $τ_{d e t e c t i o n}$ , Similarity threshold θ
2:
Output: Educational scene graph G
3:
// Level 1: Open-World Object Proposal Model
4:
$E_{t} \leftarrow E n c o d e I m a g e (I_{t})$
5:
// Initialize reference embed
6:
dings
7:
for each $({r e g i o n}_{r}, {l a b e l}_{r}) \in R$ do
8:
$E_{r} \leftarrow E n c o d e I m a g e ({r e g i o n}_{r})$
9:
Store $(E_{r}, {l a b e l}_{r}) \in$ in reference database $D_{r e f}$
10:
end for
11:
Initialize B ← ∅ // Set of detected bounding boxes
12:
for each patch embedding $v_{j}^{t}$ do
13:
for each $(E_{r}, {l a b e l}_{r}) \in D_{r e f}$ do
14:
Compute similarity ← $\frac{(v_{j}^{t} \cdot E_{r})}{∥ v_{j}^{t} ∥ ∥ E_{r} ∥}$
15:
if similarity > $τ_{d e t e c t i o n}$ then
16:
$b b o x \leftarrow L o c a l i z e r e g i o n {(v}_{j}^{t})$
17:
$B \leftarrow B \cup b b o x$
18:
break
19:
end if
20:
end for
21:
end for
22:
// Step 2: Pixel-Accurate Mask Refiner
23:
Initialize M ← ∅
24:
for each do
25:
$m a s k \leftarrow R e f i n e S e g m e n t a t i o n (I_{t}, b b o x)$
26:
${r e f i n e d}_{b b o x} \leftarrow B o u n d i n g B o x F r o m M a s k (m a s k)$
27:
$M \leftarrow M \cup {r e f i n e d}_{b b o x}, m a s k$
28:
end for
29:
// Step 3: Entropy reduction, verification, Semantic graph formation, and annotation
30:
Initialize G ← ConstructSceneGraph(M) using Verified region labels
31:
for each $c o m p o n e n t c \in G$ do
32:
${c a n d i d a t e}_{r e g i o n} \leftarrow E x t r a c t R e g i o n (I_{t}, c . b b o x)$
33:
${e m b e d d i n g}_{c a n d i d a t e} \leftarrow E m b e d ({c a n d i d a t e}_{r e g i o n})$
34:
${b e s t}_{s i m i l a r i t y} \leftarrow - \infty$
35:
${b e s t}_{l a b e l} \leftarrow U N D E F I N E D$
36:
for each $(E_{r}, {l a b e l}_{r}) \in D_{r e f}$ do
37:
$s i m i l a r i t y \leftarrow S i m i l a r i t y ({e m b e d d i n g}_{c a n d i d a t e}, E_{r})$
38:
if $s i m i l a r i t y > {b e s t}_{s i m i l a r i t y}$ then
39:
${b e s t}_{s i m i l a r i t y} \leftarrow s i m i l a r i t y$
40:
${b e s t}_{l a b e l} \leftarrow {l a b e l}_{r}$
41:
end if
42:
end for
43:
if ${b e s t}_{s i m i l a r i t y} > θ$ then
44:
$c . l a b e l \leftarrow {b e s t}_{l a b e l}$
45:
else
46:
$c . l a b e l \leftarrow I n i t i a l L a b e l ({c a n d i d a t e}_{r e g i o n})$
47:
end if
48:
end for
49:
return G