@@ -1083,35 +1083,31 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf =
1083
1083
1084
1084
}
1085
1085
1086
- // Check for u- url
1087
- if (! array_key_exists ( ' url ' , $ return ) && ! $ is_backcompat ) {
1088
- $ url = null ;
1089
- // Look for img @src
1090
- if ($ e ->tagName == 'a ' or $ e ->tagName == 'area ' ) {
1091
- $ url = $ e ->getAttribute ('href ' );
1092
- }
1093
-
1094
- // Look for nested a @ href
1095
- foreach ( $ this -> xpath -> query ( './a[count(preceding-sibling::a)+ count(following-sibling::a)=0] ' , $ e ) as $ em ) {
1096
- $ emNames = mfNamesFromElement ( $ em , ' h- ' );
1097
- if ( empty ( $ emNames )) {
1098
- $ url = $ em -> getAttribute ( ' href ' );
1099
- break ;
1100
- }
1101
- }
1102
-
1103
- // Look for nested area @src
1104
- foreach ( $ this ->xpath ->query (' ./area[count(preceding-sibling::area)+count(following-sibling::area)=0] ' , $ e ) as $ em ) {
1105
- $ emNames = mfNamesFromElement ( $ em , ' h- ' );
1106
- if ( empty ( $ emNames )) {
1107
- $ url = $ em -> getAttribute ( ' href ' ) ;
1108
- break ;
1086
+ // Do we need to imply a url property?
1087
+ // if no explicit " url" property, and no other explicit u-* properties, and no nested microformats
1088
+ if (! array_key_exists ( ' url ' , $ return ) && ! in_array ( ' u- ' , $ prefixes ) && ! $ has_nested_mf && ! $ is_backcompat ) {
1089
+ // a.h-x[href] or area.h-x[href]
1090
+ if (( $ e ->tagName === 'a ' || $ e ->tagName === 'area ' ) && $ e -> hasAttribute ( ' href ' ) ) {
1091
+ $ return [ ' url ' ][] = $ this -> resolveUrl ( $ e ->getAttribute ('href ' ) );
1092
+ } else {
1093
+ $ xpaths = array (
1094
+ // .h-x>a[ href]:only-of-type:not[.h-*]
1095
+ './a[not(contains(concat(" ", @class), " h-")) and count(../a) = 1 and @href] ' ,
1096
+ // .h-x>area[href]:only-of-type:not[.h-*]
1097
+ ' ./area[not(contains(concat(" ", @class), " h-")) and count(../area) = 1 and @href] ' ,
1098
+ // .h-x>:only-child:not[.h-*]>a[ href]:only-of-type:not[.h-*]
1099
+ ' ./*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(a) = 1]/a[not(contains(concat(" ", @class), " h-")) and @href] ' ,
1100
+ // .h-x>:only-child:not[.h-*]>area[href]:only-of-type:not[.h-*]
1101
+ ' ./*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(area) = 1]/area[not(contains(concat(" ", @class), " h-")) and @href] '
1102
+ );
1103
+ foreach ( $ xpaths as $ xpath ) {
1104
+ $ url = $ this ->xpath ->query ($ xpath , $ e );
1105
+ if ( $ url !== false && $ url -> length === 1 ) {
1106
+ $ return [ ' url ' ][] = $ this -> resolveUrl ( $ url -> item ( 0 )-> getAttribute ( ' href ' ));
1107
+ break ;
1108
+ }
1109
1109
}
1110
1110
}
1111
-
1112
- if (!is_null ($ url )) {
1113
- $ return ['url ' ][] = $ this ->resolveUrl ($ url );
1114
- }
1115
1111
}
1116
1112
1117
1113
// Make sure things are unique and in alphabetical order
0 commit comments