-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathregular.go
More file actions
124 lines (99 loc) · 3.73 KB
/
regular.go
File metadata and controls
124 lines (99 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package rss
import (
"context"
"encoding/xml"
"io"
"net/http"
"github.com/paulrosania/go-charset/charset"
)
// Channel represents an RSS channel containing metadata and items.
// It follows the RSS 2.0 specification structure.
type Channel struct {
// Title is the name of the channel
Title string `xml:"title"`
// Link is the URL to the HTML website corresponding to the channel
Link string `xml:"link"`
// Description is a phrase or sentence describing the channel
Description string `xml:"description"`
// Language is the language the channel is written in
Language string `xml:"language"`
// LastBuildDate indicates the last time the content of the channel changed
LastBuildDate Date `xml:"lastBuildDate"`
// Item is a slice of items in the channel
Item []Item `xml:"item"`
}
// ItemEnclosure represents an enclosure element in an RSS item.
// Enclosures are used to include media files with RSS items.
type ItemEnclosure struct {
// URL is the location of the enclosed file
URL string `xml:"url,attr"`
// Type is the MIME type of the enclosed file
Type string `xml:"type,attr"`
}
// Item represents a single item in an RSS channel.
// Each item typically represents a story, article, or other piece of content.
type Item struct {
// Title is the title of the item
Title string `xml:"title"`
// Link is the URL of the item
Link string `xml:"link"`
// Comments is the URL of a page for comments relating to the item
Comments string `xml:"comments"`
// PubDate is the publication date of the item
PubDate Date `xml:"pubDate"`
// GUID is a string that uniquely identifies the item
GUID string `xml:"guid"`
// Category is a list of categories that the item belongs to
Category []string `xml:"category"`
// Enclosure is a list of media files associated with the item
Enclosure []ItemEnclosure `xml:"enclosure"`
// Description is a synopsis of the item
Description string `xml:"description"`
// Author is the email address of the author of the item
Author string `xml:"author"`
// Content is the full content of the item (if available)
Content string `xml:"content"`
// FullText is the complete text content of the item
FullText string `xml:"full-text"`
}
// ParseRegular parses an RSS 2.0 feed from an io.Reader.
// It expects the reader to contain valid RSS XML.
// The context is used for cancellation control during parsing.
//
// The function automatically handles character encoding detection and conversion
// using the go-charset library, supporting various encodings commonly found
// in RSS feeds.
//
// Returns a Channel struct containing the parsed RSS data and any error that occurred.
// The reader is not closed by this function; the caller is responsible for closing it.
func ParseRegular(ctx context.Context, r io.Reader) (*Channel, error) {
// Check if context is cancelled before starting
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
xmlDecoder := xml.NewDecoder(r)
xmlDecoder.CharsetReader = charset.NewReader
var rss struct {
Channel Channel `xml:"channel"`
}
if err := xmlDecoder.Decode(&rss); err != nil {
return nil, err
}
return &rss.Channel, nil
}
// Regular parses an RSS 2.0 feed from an HTTP response.
// It expects the response body to contain valid RSS XML.
// The context is used for cancellation control during parsing.
//
// The function automatically handles character encoding detection and conversion
// using the go-charset library, supporting various encodings commonly found
// in RSS feeds.
//
// Returns a Channel struct containing the parsed RSS data and any error that occurred.
// The response body is automatically closed after parsing.
func Regular(ctx context.Context, resp *http.Response) (*Channel, error) {
defer resp.Body.Close()
return ParseRegular(ctx, resp.Body)
}