-
Notifications
You must be signed in to change notification settings - Fork 65
Expand file tree
/
Copy pathMediaStructure.php
More file actions
154 lines (140 loc) · 3.71 KB
/
MediaStructure.php
File metadata and controls
154 lines (140 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
<?php
declare( strict_types = 1 );
namespace Wikimedia\Parsoid\Core;
use Wikimedia\Parsoid\DOM\Element;
use Wikimedia\Parsoid\DOM\Node;
use Wikimedia\Parsoid\Utils\DiffDOMUtils;
use Wikimedia\Parsoid\Utils\DOMUtils;
use Wikimedia\Parsoid\Utils\WTUtils;
use Wikimedia\Parsoid\Wikitext\Consts;
/**
* All media should have a fixed structure:
*
* ```
* <containerElt>
* <linkElt><mediaElt /></linkElt>
* <captionElt>...</captionElt>
* </containerElt>
* ```
*
* Pull out this fixed structure, being as generous as possible with
* possibly-broken HTML.
*/
class MediaStructure {
/**
* Node names: figure, span
*
* @var ?Element
*/
public $containerElt;
/**
* Node names: a, span
*
* @var ?Element
*/
public $linkElt;
/**
* Node names: img, audio, video, span
*
* @var ?Element
*/
public $mediaElt;
/**
* Node names: figcaption
*
* @var ?Element
*/
public $captionElt;
/**
* @param Element $mediaElt
* @param ?Element $linkElt
* @param ?Element $containerElt
*/
public function __construct(
Element $mediaElt, ?Element $linkElt = null,
?Element $containerElt = null
) {
$this->mediaElt = $mediaElt;
$this->linkElt = $linkElt;
$this->containerElt = $containerElt;
if ( $containerElt && DOMUtils::nodeName( $containerElt ) === 'figure' ) {
// FIXME: Support last child, which is not the linkElt, as the caption?
$this->captionElt = DOMCompat::querySelector( $containerElt, 'figcaption' );
}
}
/**
* We were not able to fetch info for the title, so the media was
* considered missing and rendered as a span.
*
* @return bool
*/
public function isRedLink(): bool {
return ( DOMUtils::nodeName( $this->mediaElt ) === 'span' );
}
/**
* @return ?string the resource name if it exists, otherwise null
*/
public function getResource(): ?string {
return DOMCompat::getAttribute( $this->mediaElt, 'resource' );
}
/**
* @return ?string The alt text if it exists, otherwise null
*/
public function getAlt(): ?string {
return DOMCompat::getAttribute( $this->mediaElt, 'alt' );
}
/**
* @return ?string The media href if it exists, otherwise null.
*/
public function getMediaUrl(): ?string {
return $this->linkElt ?
DOMCompat::getAttribute( $this->linkElt, 'href' ) :
null;
}
/**
* @param Node $node
* @return ?MediaStructure
*/
public static function parse( Node $node ): ?MediaStructure {
if (
// Be a bit more liberal than WTUtils::isGeneratedFigure to support
// serializing arbitrary HTML and old Flow boards
DOMUtils::nodeName( $node ) !== 'figure' &&
!WTUtils::isInlineMedia( $node )
) {
return null;
}
'@phan-var Element $node'; // @var Element $node
$linkElt = $node;
do {
// Try being lenient, maybe there was a content model violation when
// parsing and an active formatting element was reopened in the wrapper
$linkElt = DiffDOMUtils::firstNonSepChild( $linkElt );
} while (
$linkElt instanceof Element && DOMUtils::nodeName( $linkElt ) !== 'a' &&
isset( Consts::$HTML['FormattingTags'][DOMUtils::nodeName( $linkElt )] )
);
if (
!( $linkElt instanceof Element &&
in_array( DOMUtils::nodeName( $linkElt ), [ 'a', 'span' ], true ) )
) {
if ( $linkElt instanceof Element ) {
// Try being lenient, maybe this is the media element and we don't
// have a link elt. See the test, "Image: from basic HTML (1)"
$mediaElt = $linkElt;
$linkElt = null;
} else {
return null;
}
} else {
$mediaElt = DiffDOMUtils::firstNonSepChild( $linkElt );
}
if (
!( $mediaElt instanceof Element &&
in_array( DOMUtils::nodeName( $mediaElt ), [ 'audio', 'img', 'span', 'video' ], true ) )
) {
return null;
}
return new MediaStructure( $mediaElt, $linkElt, $node );
}
}