11package cmd
22
33import (
4+ "context"
5+ "encoding/csv"
6+ "errors"
47 "fmt"
58 "os"
9+ "path/filepath"
10+ "regexp"
11+ "slices"
612 "strconv"
713 "strings"
814 "time"
915
1016 "github.com/PuerkitoBio/goquery"
1117 "github.com/go-shiori/shiori/internal/core"
18+ "github.com/go-shiori/shiori/internal/database"
1219 "github.com/go-shiori/shiori/internal/model"
1320 "github.com/spf13/cobra"
1421)
1522
1623func pocketCmd () * cobra.Command {
1724 cmd := & cobra.Command {
1825 Use : "pocket source-file" ,
19- Short : "Import bookmarks from Pocket's exported HTML file" ,
26+ Short : "Import bookmarks from Pocket's data export file" ,
2027 Args : cobra .ExactArgs (1 ),
2128 Run : pocketHandler ,
2229 }
@@ -25,17 +32,43 @@ func pocketCmd() *cobra.Command {
2532}
2633
2734func pocketHandler (cmd * cobra.Command , args []string ) {
28- _ , deps := initShiori (cmd .Context (), cmd )
35+ ctx := cmd .Context ()
36+ _ , deps := initShiori (ctx , cmd )
2937
3038 // Open pocket's file
31- srcFile , err := os .Open (args [0 ])
39+ filePath := args [0 ]
40+ srcFile , err := os .Open (filePath )
3241 if err != nil {
3342 cError .Println (err )
3443 os .Exit (1 )
3544 }
3645 defer srcFile .Close ()
3746
38- // Parse pocket's file
47+ var bookmarks []model.BookmarkDTO
48+ switch filepath .Ext (filePath ) {
49+ case ".html" :
50+ bookmarks = parseHtmlExport (ctx , deps .Database , srcFile )
51+ case ".csv" :
52+ bookmarks = parseCsvExport (ctx , deps .Database , srcFile )
53+ default :
54+ cError .Println ("Invalid file format. Only HTML and CSV are supported." )
55+ os .Exit (1 )
56+ }
57+
58+ // Save bookmark to database
59+ bookmarks , err = deps .Database .SaveBookmarks (ctx , true , bookmarks ... )
60+ if err != nil {
61+ cError .Printf ("Failed to save bookmarks: %v\n " , err )
62+ os .Exit (1 )
63+ }
64+
65+ // Print imported bookmarks
66+ fmt .Println ()
67+ printBookmarks (bookmarks ... )
68+ }
69+
70+ // Parse bookmarks from HTML file
71+ func parseHtmlExport (ctx context.Context , db database.DB , srcFile * os.File ) []model.BookmarkDTO {
3972 bookmarks := []model.BookmarkDTO {}
4073 mapURL := make (map [string ]struct {})
4174
@@ -49,69 +82,137 @@ func pocketHandler(cmd *cobra.Command, args []string) {
4982 // Get metadata
5083 title := a .Text ()
5184 url , _ := a .Attr ("href" )
52- strTags , _ := a .Attr ("tags" )
53- strModified , _ := a .Attr ("time_added" )
54- intModified , _ := strconv .ParseInt (strModified , 10 , 64 )
55- modified := time .Unix (intModified , 0 )
56-
57- // Clean up URL
58- var err error
59- url , err = core .RemoveUTMParams (url )
85+ tagsStr , _ := a .Attr ("tags" )
86+ timeAddedStr , _ := a .Attr ("time_added" )
87+
88+ title , url , timeAdded , tags , err := verifyMetadata (title , url , timeAddedStr , tagsStr )
6089 if err != nil {
61- cError .Printf ("Skip %s: URL is not valid \n " , url )
90+ cError .Printf ("Skip %s: %v \n " , url , err )
6291 return
6392 }
6493
65- // Make sure title is valid Utf-8
66- title = validateTitle (title , url )
67-
68- // Check if the URL already exist before, both in bookmark
69- // file or in database
70- if _ , exist := mapURL [url ]; exist {
71- cError .Printf ("Skip %s: URL already exists\n " , url )
94+ if err = handleDuplicates (ctx , db , mapURL , url ); err != nil {
95+ cError .Printf ("Skip %s: %v\n " , url , err )
7296 return
7397 }
7498
75- _ , exist , err := deps .Database .GetBookmark (cmd .Context (), 0 , url )
76- if err != nil {
77- cError .Printf ("Skip %s: Get Bookmark fail, %v" , url , err )
78- return
99+ // Add item to list
100+ bookmark := model.BookmarkDTO {
101+ URL : url ,
102+ Title : title ,
103+ ModifiedAt : timeAdded .Format (model .DatabaseDateFormat ),
104+ CreatedAt : timeAdded .Format (model .DatabaseDateFormat ),
105+ Tags : tags ,
79106 }
80107
81- if exist {
82- cError .Printf ("Skip %s: URL already exists\n " , url )
83- mapURL [url ] = struct {}{}
84- return
85- }
108+ mapURL [url ] = struct {}{}
109+ bookmarks = append (bookmarks , bookmark )
110+ })
111+
112+ return bookmarks
113+ }
114+
115+ // Parse bookmarks from CSV file
116+ func parseCsvExport (ctx context.Context , db database.DB , srcFile * os.File ) []model.BookmarkDTO {
117+ bookmarks := []model.BookmarkDTO {}
118+ mapURL := make (map [string ]struct {})
86119
87- // Get bookmark tags
88- tags := []model.Tag {}
89- for _ , strTag := range strings .Split (strTags , "," ) {
90- if strTag != "" {
91- tags = append (tags , model.Tag {Name : strTag })
120+ reader := csv .NewReader (srcFile )
121+ records , err := reader .ReadAll ()
122+ if err != nil {
123+ cError .Println (err )
124+ os .Exit (1 )
125+ }
126+
127+ for i , cols := range records {
128+ // Check and skip header
129+ if i == 0 {
130+ expected := []string {"title" , "url" , "time_added" , "cursor" , "tags" , "status" }
131+ if slices .Compare (cols , expected ) != 0 {
132+ cError .Printf ("Invalid CSV format. Header must be: %s\n " , strings .Join (expected , "," ))
133+ os .Exit (1 )
92134 }
135+ continue
136+ }
137+
138+ // Get metadata
139+ title , url , timeAdded , tags , err := verifyMetadata (cols [0 ], cols [1 ], cols [2 ], cols [4 ])
140+ if err != nil {
141+ cError .Printf ("Skip %s: %v\n " , url , err )
142+ continue
143+ }
144+
145+ if err = handleDuplicates (ctx , db , mapURL , url ); err != nil {
146+ cError .Printf ("Skip %s: %v\n " , url , err )
147+ continue
93148 }
94149
95150 // Add item to list
96151 bookmark := model.BookmarkDTO {
97152 URL : url ,
98153 Title : title ,
99- ModifiedAt : modified .Format (model .DatabaseDateFormat ),
154+ ModifiedAt : timeAdded .Format (model .DatabaseDateFormat ),
155+ CreatedAt : timeAdded .Format (model .DatabaseDateFormat ),
100156 Tags : tags ,
101157 }
102158
103159 mapURL [url ] = struct {}{}
104160 bookmarks = append (bookmarks , bookmark )
105- })
161+ }
106162
107- // Save bookmark to database
108- bookmarks , err = deps .Database .SaveBookmarks (cmd .Context (), true , bookmarks ... )
163+ return bookmarks
164+ }
165+
166+ // Parse metadata and verify it's validity
167+ func verifyMetadata (title , url , timeAddedStr , tags string ) (string , string , time.Time , []model.Tag , error ) {
168+ // Clean up URL
169+ var err error
170+ url , err = core .RemoveUTMParams (url )
109171 if err != nil {
110- cError . Printf ( "Failed to save bookmarks: %v \n " , err )
111- os . Exit ( 1 )
172+ err = fmt . Errorf ( "URL is not valid, %w " , err )
173+ return "" , "" , time. Time {}, nil , err
112174 }
113175
114- // Print imported bookmark
115- fmt .Println ()
116- printBookmarks (bookmarks ... )
176+ // Make sure title is valid Utf-8
177+ title = validateTitle (title , url )
178+
179+ // Parse time added
180+ timeAddedInt , err := strconv .ParseInt (timeAddedStr , 10 , 64 )
181+ if err != nil {
182+ err = fmt .Errorf ("Invalid time added, %w" , err )
183+ return "" , "" , time.Time {}, nil , err
184+ }
185+ timeAdded := time .Unix (timeAddedInt , 0 )
186+
187+ // Get bookmark tags
188+ tagsList := []model.Tag {}
189+ // We need to split tags by both comma or pipe,
190+ // because Pocket's CSV export use pipe as separator,
191+ // while HTML export use comma.
192+ for _ , tag := range regexp .MustCompile (`[,|]` ).Split (tags , - 1 ) {
193+ if tag != "" {
194+ tagsList = append (tagsList , model.Tag {Name : tag })
195+ }
196+ }
197+
198+ return title , url , timeAdded , tagsList , nil
199+ }
200+
201+ // Checks if the URL already exist, both in bookmark
202+ // file or in database
203+ func handleDuplicates (ctx context.Context , db database.DB , mapURL map [string ]struct {}, url string ) error {
204+ if _ , exists := mapURL [url ]; exists {
205+ return errors .New ("URL already exists" )
206+ }
207+
208+ _ , exists , err := db .GetBookmark (ctx , 0 , url )
209+ if err != nil {
210+ return fmt .Errorf ("Failed getting bookmark, %w" , err )
211+ }
212+
213+ if exists {
214+ return errors .New ("URL already exists" )
215+ }
216+
217+ return nil
117218}
0 commit comments