1
1
import { getPossibleElementByQuerySelector } from './get-possible-element-by-query-selector.mjs'
2
-
3
- function getArea ( e ) {
4
- const rect = e . getBoundingClientRect ( )
5
- return rect . width * rect . height
6
- }
2
+ import { Readability , isProbablyReaderable } from '@mozilla/readability'
7
3
8
4
const adapters = {
9
5
'scholar.google' : [ '#gs_res_ccl_mid' ] ,
@@ -17,6 +13,11 @@ const adapters = {
17
13
'new.qq.com' : [ '.content-article' ] ,
18
14
}
19
15
16
+ function getArea ( e ) {
17
+ const rect = e . getBoundingClientRect ( )
18
+ return rect . width * rect . height
19
+ }
20
+
20
21
function findLargestElement ( e ) {
21
22
if ( ! e ) {
22
23
return null
@@ -42,22 +43,39 @@ function findLargestElement(e) {
42
43
return largestElement
43
44
}
44
45
45
- export function getCoreContentText ( ) {
46
- function getTextFrom ( e ) {
47
- return e . innerText || e . textContent
48
- }
46
+ function getTextFrom ( e ) {
47
+ return e . innerText || e . textContent
48
+ }
49
49
50
+ function postProcessText ( text ) {
51
+ return text
52
+ . trim ( )
53
+ . replaceAll ( ' ' , '' )
54
+ . replaceAll ( '\t' , '' )
55
+ . replaceAll ( '\n\n' , '' )
56
+ . replaceAll ( ',,' , '' )
57
+ }
58
+
59
+ export function getCoreContentText ( ) {
50
60
for ( const [ siteName , selectors ] of Object . entries ( adapters ) ) {
51
61
if ( location . hostname . includes ( siteName ) ) {
52
62
const element = getPossibleElementByQuerySelector ( selectors )
53
- if ( element ) return getTextFrom ( element )
63
+ if ( element ) return postProcessText ( getTextFrom ( element ) )
54
64
break
55
65
}
56
66
}
57
67
58
68
const element = document . querySelector ( 'article' )
59
69
if ( element ) {
60
- return getTextFrom ( element )
70
+ return postProcessText ( getTextFrom ( element ) )
71
+ }
72
+
73
+ if ( isProbablyReaderable ( document ) ) {
74
+ let article = new Readability ( document . cloneNode ( true ) , {
75
+ keepClasses : true ,
76
+ } ) . parse ( )
77
+ console . log ( 'readerable' )
78
+ return postProcessText ( article . textContent )
61
79
}
62
80
63
81
const largestElement = findLargestElement ( document . body )
@@ -79,5 +97,5 @@ export function getCoreContentText() {
79
97
ret = getTextFrom ( largestElement )
80
98
console . log ( 'use first' )
81
99
}
82
- return ret . trim ( ) . replaceAll ( ' ' , '' ) . replaceAll ( '\n\n' , '' ) . replaceAll ( ',,' , '' )
100
+ return postProcessText ( ret )
83
101
}
0 commit comments