Skip to content

Commit 19b3ce6

Browse files
authored
Merge pull request #588 from moritamori/adding-tests-for-onxml
Change the type assersion for Attr
2 parents 14f855a + ee6c26f commit 19b3ce6

File tree

4 files changed

+60
-6
lines changed

4 files changed

+60
-6
lines changed

colly_test.go

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,17 @@ func newTestServer() *httptest.Server {
6666
`))
6767
})
6868

69+
mux.HandleFunc("/xml", func(w http.ResponseWriter, r *http.Request) {
70+
w.Header().Set("Content-Type", "application/xml")
71+
w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
72+
<page>
73+
<title>Test Page</title>
74+
<paragraph type="description">This is a test page</paragraph>
75+
<paragraph type="description">This is a test paragraph</paragraph>
76+
</page>
77+
`))
78+
})
79+
6980
mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
7081
if r.Method == "POST" {
7182
w.Header().Set("Content-Type", "text/html")
@@ -1118,7 +1129,7 @@ func TestHTMLElement(t *testing.T) {
11181129
}
11191130
}
11201131

1121-
func TestCollectorOnXML(t *testing.T) {
1132+
func TestCollectorOnXMLWithHtml(t *testing.T) {
11221133
ts := newTestServer()
11231134
defer ts.Close()
11241135

@@ -1162,6 +1173,50 @@ func TestCollectorOnXML(t *testing.T) {
11621173
}
11631174
}
11641175

1176+
func TestCollectorOnXMLWithXML(t *testing.T) {
1177+
ts := newTestServer()
1178+
defer ts.Close()
1179+
1180+
c := NewCollector()
1181+
1182+
titleCallbackCalled := false
1183+
paragraphCallbackCount := 0
1184+
1185+
c.OnXML("//page/title", func(e *XMLElement) {
1186+
titleCallbackCalled = true
1187+
if e.Text != "Test Page" {
1188+
t.Error("Title element text does not match, got", e.Text)
1189+
}
1190+
})
1191+
1192+
c.OnXML("//page/paragraph", func(e *XMLElement) {
1193+
paragraphCallbackCount++
1194+
if e.Attr("type") != "description" {
1195+
t.Error("Failed to get paragraph's type attribute")
1196+
}
1197+
})
1198+
1199+
c.OnXML("/page", func(e *XMLElement) {
1200+
if e.ChildAttr("paragraph", "type") != "description" {
1201+
t.Error("Invalid type value")
1202+
}
1203+
classes := e.ChildAttrs("paragraph", "type")
1204+
if len(classes) != 2 {
1205+
t.Error("Invalid type values")
1206+
}
1207+
})
1208+
1209+
c.Visit(ts.URL + "/xml")
1210+
1211+
if !titleCallbackCalled {
1212+
t.Error("Failed to call OnXML callback for <title> tag")
1213+
}
1214+
1215+
if paragraphCallbackCount != 2 {
1216+
t.Error("Failed to find all <paragraph> tags")
1217+
}
1218+
}
1219+
11651220
func TestCollectorVisitWithTrace(t *testing.T) {
11661221
ts := newTestServer()
11671222
defer ts.Close()

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ require (
66
github.com/PuerkitoBio/goquery v1.5.1
77
github.com/andybalholm/cascadia v1.2.0 // indirect
88
github.com/antchfx/htmlquery v1.2.3
9-
github.com/antchfx/xmlquery v1.3.3
9+
github.com/antchfx/xmlquery v1.3.4
1010
github.com/gobwas/glob v0.2.3
1111
github.com/golang/protobuf v1.4.2 // indirect
1212
github.com/jawher/mow.cli v1.1.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5
77
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
88
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
99
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
10-
github.com/antchfx/xmlquery v1.3.3 h1:HYmadPG0uz8CySdL68rB4DCLKXz2PurCjS3mnkVF4CQ=
11-
github.com/antchfx/xmlquery v1.3.3/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
10+
github.com/antchfx/xmlquery v1.3.4 h1:RuhsI4AA5Ma4XoXhaAr2VjJxU0Xp0W2zy/f9ZIpsF4s=
11+
github.com/antchfx/xmlquery v1.3.4/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
1212
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
1313
github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg=
1414
github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=

xmlelement.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
package colly
1616

1717
import (
18-
"encoding/xml"
1918
"strings"
2019

2120
"github.com/antchfx/htmlquery"
@@ -76,7 +75,7 @@ func (h *XMLElement) Attr(k string) string {
7675
}
7776
}
7877
} else {
79-
for _, a := range h.attributes.([]xml.Attr) {
78+
for _, a := range h.attributes.([]xmlquery.Attr) {
8079
if a.Name.Local == k {
8180
return a.Value
8281
}

0 commit comments

Comments
 (0)