Skip to content

Commit c2821ff

Browse files
melnaryfmartingr
andauthored
feat: allow import of newer pocket data export files in csv format (#1023)
* feat: allow import of newer pocket data export files in csv format Signed-off-by: Mel <einebeere@gmail.com> * fix: linter error Signed-off-by: Mel <einebeere@gmail.com> --------- Signed-off-by: Mel <einebeere@gmail.com> Co-authored-by: Felipe Martin <812088+fmartingr@users.noreply.github.com>
1 parent 87bc7a8 commit c2821ff

1 file changed

Lines changed: 144 additions & 43 deletions

File tree

internal/cmd/pocket.go

Lines changed: 144 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,29 @@
11
package cmd
22

33
import (
4+
"context"
5+
"encoding/csv"
6+
"errors"
47
"fmt"
58
"os"
9+
"path/filepath"
10+
"regexp"
11+
"slices"
612
"strconv"
713
"strings"
814
"time"
915

1016
"github.com/PuerkitoBio/goquery"
1117
"github.com/go-shiori/shiori/internal/core"
18+
"github.com/go-shiori/shiori/internal/database"
1219
"github.com/go-shiori/shiori/internal/model"
1320
"github.com/spf13/cobra"
1421
)
1522

1623
func pocketCmd() *cobra.Command {
1724
cmd := &cobra.Command{
1825
Use: "pocket source-file",
19-
Short: "Import bookmarks from Pocket's exported HTML file",
26+
Short: "Import bookmarks from Pocket's data export file",
2027
Args: cobra.ExactArgs(1),
2128
Run: pocketHandler,
2229
}
@@ -25,17 +32,43 @@ func pocketCmd() *cobra.Command {
2532
}
2633

2734
func pocketHandler(cmd *cobra.Command, args []string) {
28-
_, deps := initShiori(cmd.Context(), cmd)
35+
ctx := cmd.Context()
36+
_, deps := initShiori(ctx, cmd)
2937

3038
// Open pocket's file
31-
srcFile, err := os.Open(args[0])
39+
filePath := args[0]
40+
srcFile, err := os.Open(filePath)
3241
if err != nil {
3342
cError.Println(err)
3443
os.Exit(1)
3544
}
3645
defer srcFile.Close()
3746

38-
// Parse pocket's file
47+
var bookmarks []model.BookmarkDTO
48+
switch filepath.Ext(filePath) {
49+
case ".html":
50+
bookmarks = parseHtmlExport(ctx, deps.Database, srcFile)
51+
case ".csv":
52+
bookmarks = parseCsvExport(ctx, deps.Database, srcFile)
53+
default:
54+
cError.Println("Invalid file format. Only HTML and CSV are supported.")
55+
os.Exit(1)
56+
}
57+
58+
// Save bookmark to database
59+
bookmarks, err = deps.Database.SaveBookmarks(ctx, true, bookmarks...)
60+
if err != nil {
61+
cError.Printf("Failed to save bookmarks: %v\n", err)
62+
os.Exit(1)
63+
}
64+
65+
// Print imported bookmarks
66+
fmt.Println()
67+
printBookmarks(bookmarks...)
68+
}
69+
70+
// Parse bookmarks from HTML file
71+
func parseHtmlExport(ctx context.Context, db database.DB, srcFile *os.File) []model.BookmarkDTO {
3972
bookmarks := []model.BookmarkDTO{}
4073
mapURL := make(map[string]struct{})
4174

@@ -49,69 +82,137 @@ func pocketHandler(cmd *cobra.Command, args []string) {
4982
// Get metadata
5083
title := a.Text()
5184
url, _ := a.Attr("href")
52-
strTags, _ := a.Attr("tags")
53-
strModified, _ := a.Attr("time_added")
54-
intModified, _ := strconv.ParseInt(strModified, 10, 64)
55-
modified := time.Unix(intModified, 0)
56-
57-
// Clean up URL
58-
var err error
59-
url, err = core.RemoveUTMParams(url)
85+
tagsStr, _ := a.Attr("tags")
86+
timeAddedStr, _ := a.Attr("time_added")
87+
88+
title, url, timeAdded, tags, err := verifyMetadata(title, url, timeAddedStr, tagsStr)
6089
if err != nil {
61-
cError.Printf("Skip %s: URL is not valid\n", url)
90+
cError.Printf("Skip %s: %v\n", url, err)
6291
return
6392
}
6493

65-
// Make sure title is valid Utf-8
66-
title = validateTitle(title, url)
67-
68-
// Check if the URL already exist before, both in bookmark
69-
// file or in database
70-
if _, exist := mapURL[url]; exist {
71-
cError.Printf("Skip %s: URL already exists\n", url)
94+
if err = handleDuplicates(ctx, db, mapURL, url); err != nil {
95+
cError.Printf("Skip %s: %v\n", url, err)
7296
return
7397
}
7498

75-
_, exist, err := deps.Database.GetBookmark(cmd.Context(), 0, url)
76-
if err != nil {
77-
cError.Printf("Skip %s: Get Bookmark fail, %v", url, err)
78-
return
99+
// Add item to list
100+
bookmark := model.BookmarkDTO{
101+
URL: url,
102+
Title: title,
103+
ModifiedAt: timeAdded.Format(model.DatabaseDateFormat),
104+
CreatedAt: timeAdded.Format(model.DatabaseDateFormat),
105+
Tags: tags,
79106
}
80107

81-
if exist {
82-
cError.Printf("Skip %s: URL already exists\n", url)
83-
mapURL[url] = struct{}{}
84-
return
85-
}
108+
mapURL[url] = struct{}{}
109+
bookmarks = append(bookmarks, bookmark)
110+
})
111+
112+
return bookmarks
113+
}
114+
115+
// Parse bookmarks from CSV file
116+
func parseCsvExport(ctx context.Context, db database.DB, srcFile *os.File) []model.BookmarkDTO {
117+
bookmarks := []model.BookmarkDTO{}
118+
mapURL := make(map[string]struct{})
86119

87-
// Get bookmark tags
88-
tags := []model.Tag{}
89-
for _, strTag := range strings.Split(strTags, ",") {
90-
if strTag != "" {
91-
tags = append(tags, model.Tag{Name: strTag})
120+
reader := csv.NewReader(srcFile)
121+
records, err := reader.ReadAll()
122+
if err != nil {
123+
cError.Println(err)
124+
os.Exit(1)
125+
}
126+
127+
for i, cols := range records {
128+
// Check and skip header
129+
if i == 0 {
130+
expected := []string{"title", "url", "time_added", "cursor", "tags", "status"}
131+
if slices.Compare(cols, expected) != 0 {
132+
cError.Printf("Invalid CSV format. Header must be: %s\n", strings.Join(expected, ","))
133+
os.Exit(1)
92134
}
135+
continue
136+
}
137+
138+
// Get metadata
139+
title, url, timeAdded, tags, err := verifyMetadata(cols[0], cols[1], cols[2], cols[4])
140+
if err != nil {
141+
cError.Printf("Skip %s: %v\n", url, err)
142+
continue
143+
}
144+
145+
if err = handleDuplicates(ctx, db, mapURL, url); err != nil {
146+
cError.Printf("Skip %s: %v\n", url, err)
147+
continue
93148
}
94149

95150
// Add item to list
96151
bookmark := model.BookmarkDTO{
97152
URL: url,
98153
Title: title,
99-
ModifiedAt: modified.Format(model.DatabaseDateFormat),
154+
ModifiedAt: timeAdded.Format(model.DatabaseDateFormat),
155+
CreatedAt: timeAdded.Format(model.DatabaseDateFormat),
100156
Tags: tags,
101157
}
102158

103159
mapURL[url] = struct{}{}
104160
bookmarks = append(bookmarks, bookmark)
105-
})
161+
}
106162

107-
// Save bookmark to database
108-
bookmarks, err = deps.Database.SaveBookmarks(cmd.Context(), true, bookmarks...)
163+
return bookmarks
164+
}
165+
166+
// Parse metadata and verify it's validity
167+
func verifyMetadata(title, url, timeAddedStr, tags string) (string, string, time.Time, []model.Tag, error) {
168+
// Clean up URL
169+
var err error
170+
url, err = core.RemoveUTMParams(url)
109171
if err != nil {
110-
cError.Printf("Failed to save bookmarks: %v\n", err)
111-
os.Exit(1)
172+
err = fmt.Errorf("URL is not valid, %w", err)
173+
return "", "", time.Time{}, nil, err
112174
}
113175

114-
// Print imported bookmark
115-
fmt.Println()
116-
printBookmarks(bookmarks...)
176+
// Make sure title is valid Utf-8
177+
title = validateTitle(title, url)
178+
179+
// Parse time added
180+
timeAddedInt, err := strconv.ParseInt(timeAddedStr, 10, 64)
181+
if err != nil {
182+
err = fmt.Errorf("Invalid time added, %w", err)
183+
return "", "", time.Time{}, nil, err
184+
}
185+
timeAdded := time.Unix(timeAddedInt, 0)
186+
187+
// Get bookmark tags
188+
tagsList := []model.Tag{}
189+
// We need to split tags by both comma or pipe,
190+
// because Pocket's CSV export use pipe as separator,
191+
// while HTML export use comma.
192+
for _, tag := range regexp.MustCompile(`[,|]`).Split(tags, -1) {
193+
if tag != "" {
194+
tagsList = append(tagsList, model.Tag{Name: tag})
195+
}
196+
}
197+
198+
return title, url, timeAdded, tagsList, nil
199+
}
200+
201+
// Checks if the URL already exist, both in bookmark
202+
// file or in database
203+
func handleDuplicates(ctx context.Context, db database.DB, mapURL map[string]struct{}, url string) error {
204+
if _, exists := mapURL[url]; exists {
205+
return errors.New("URL already exists")
206+
}
207+
208+
_, exists, err := db.GetBookmark(ctx, 0, url)
209+
if err != nil {
210+
return fmt.Errorf("Failed getting bookmark, %w", err)
211+
}
212+
213+
if exists {
214+
return errors.New("URL already exists")
215+
}
216+
217+
return nil
117218
}

0 commit comments

Comments
 (0)