@@ -17,7 +17,6 @@ import (
1717 "log"
1818 "net/url"
1919 "os"
20- "text/template"
2120
2221 "github.com/gen2brain/go-fitz"
2322 "github.com/readium/readium-lcp-server/rwpm"
@@ -229,6 +228,11 @@ func (reader *RPFReader) ConformsTo() string {
229228 return reader .manifest .Metadata .ConformsTo
230229}
231230
231+ // Title returns the title of the manifest
232+ func (reader * RPFReader ) Title () string {
233+ return reader .manifest .Metadata .Title ["und" ]
234+ }
235+
232236// Close closes a Readium Package Reader
233237func (reader * RPFReader ) Close () error {
234238 return reader .zipArchive .Close ()
@@ -397,7 +401,7 @@ func OpenRPF(name string) (*RPFReader, error) {
397401
398402// BuildRPFFromPDF builds a Readium Package (rwpp) which embeds a PDF file and a cover
399403// the cover file extracted from the PDF is not deleted by this function
400- func BuildRPFFromPDF (inputPath , packagePath , coverPath string ) (RWPInfo , error ) {
404+ func BuildRPFFromPDF (inputPath , packagePath , coverPath string , pdfNoMeta bool ) (RWPInfo , error ) {
401405
402406 var rwpInfo RWPInfo
403407
@@ -456,69 +460,71 @@ func BuildRPFFromPDF(inputPath, packagePath, coverPath string) (RWPInfo, error)
456460 return rwpInfo , err
457461 }
458462
459- // inject a Readium manifest into the zip output
460- manifest := `
461- {
462- "@context": "https://readium.org/webpub-manifest/context.jsonld"
463- ,
464- "metadata": {
465- "@type": "http://schema.org/Book",
466- "conformsTo": "https://readium.org/webpub-manifest/profiles/pdf",
467- "title": "{{.Title}}",
468- "author": "{{.Author}}",
469- "subject": "{{.Subject}}",
470- "numberOfPages": {{.NumPages}}
471- },
472- "readingOrder": [
473- {
474- "href": "publication.pdf", "title": "publication", "type": "application/pdf"
475- }
476- ],
477- "resources": [
478- {
479- "rel": "cover", "href": "cover.jpg", "type": "image/jpeg"
480- }
481- ]
482- }
483- `
484-
485463 manifestWriter , err := zipWriter .Create (ManifestLocation )
486464 if err != nil {
487465 return rwpInfo , err
488466 }
489467
490- tmpl , err := template .New ("manifest" ).Parse (manifest )
491- if err != nil {
492- return rwpInfo , err
468+ // create simple manifest object
469+ var manifest rwpm.Publication
470+
471+ manifest .Context .Add ("https://readium.org/webpub-manifest/context.jsonld" )
472+ manifest .Metadata .Type = "http://schema.org/Book"
473+ manifest .Metadata .ConformsTo = "https://readium.org/webpub-manifest/profiles/pdf"
474+
475+ // number of pages is needed to display progress in the reader
476+ manifest .Metadata .NumberOfPages = rwpInfo .NumPages
477+
478+ // PDF metadata can be so bad that we may want to ignore them
479+ if pdfNoMeta {
480+ // we still need a title
481+ filename := filepath .Base (inputPath )
482+ rwpInfo .Title = strings .TrimSuffix (filename , filepath .Ext (filename )) // default title
483+ // remove underscores, hyphens, dots which are frequent in PDF file names
484+ rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "_" , " " )
485+ rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "-" , " " )
486+ rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "." , " " )
487+ rwpInfo .Title = strings .TrimSpace (rwpInfo .Title )
488+ manifest .Metadata .Title .Set ("und" , rwpInfo .Title )
489+ // add PDF metadata to the manifest
490+ } else {
491+ // remove underscores, hyphens, stars which are frequent in PDF titles
492+ rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "_" , " " )
493+ rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "-" , " " )
494+ rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "*" , " " )
495+ rwpInfo .Title = strings .TrimSpace (rwpInfo .Title )
496+ if rwpInfo .Title == "" {
497+ rwpInfo .Title = "No Title Available" // default title
498+ }
499+ manifest .Metadata .Title .Set ("und" , rwpInfo .Title )
500+ // there is zero or one author/subject in the PDF metadata
501+ if len (rwpInfo .Author ) != 0 {
502+ manifest .Metadata .Author .AddName (rwpInfo .Author [0 ])
503+ }
504+ if len (rwpInfo .Subject ) != 0 {
505+ manifest .Metadata .Subject .Add (rwpm.Subject {Name : rwpInfo .Subject [0 ]})
506+ }
493507 }
494508
495- // remove underscores, hyphens, stars which are frequent in PDF titles
496- rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "_" , " " )
497- rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "-" , " " )
498- rwpInfo .Title = strings .ReplaceAll (rwpInfo .Title , "*" , " " )
499- rwpInfo .Title = strings .TrimSpace (rwpInfo .Title )
500- if rwpInfo .Title == "" {
501- rwpInfo .Title = "No Title Available" // default title
502- }
503- // there is zero or one author/subject in the PDF metadata
504- if len (rwpInfo .Author ) == 0 {
505- rwpInfo .Author = []string {"unknown" }
506- }
507- if len (rwpInfo .Subject ) == 0 {
508- rwpInfo .Subject = []string {"unknown" }
509- }
510- params := struct {
511- Title string
512- Author string
513- Subject string
514- NumPages int
515- }{
516- Title : rwpInfo .Title ,
517- Author : rwpInfo .Author [0 ],
518- Subject : rwpInfo .Subject [0 ],
519- NumPages : rwpInfo .NumPages ,
520- }
521- err = tmpl .Execute (manifestWriter , params )
509+ manifest .ReadingOrder = []rwpm.Link {
510+ {
511+ Href : "publication.pdf" ,
512+ Title : "publication" ,
513+ Type : "application/pdf" ,
514+ },
515+ }
516+ manifest .Resources = []rwpm.Link {
517+ {
518+ Rel : []string {"cover" },
519+ Href : "cover.jpg" ,
520+ Type : "image/jpeg" ,
521+ },
522+ }
523+
524+ // marshal and write manifest as JSON
525+ encoder := json .NewEncoder (manifestWriter )
526+ encoder .SetIndent ("" , " " )
527+ err = encoder .Encode (manifest )
522528 if err != nil {
523529 return rwpInfo , err
524530 }
@@ -550,8 +556,14 @@ func extractRWPInfo(inputPath, coverPath string) (RWPInfo, error) {
550556 // extract PDF metadata and number of pages
551557 metadata := doc .Metadata ()
552558 rwpInfo .Title = cleanNulls (metadata ["title" ])
553- rwpInfo .Author = []string {cleanNulls (metadata ["author" ])}
554- rwpInfo .Subject = []string {cleanNulls (metadata ["subject" ])}
559+ author := cleanNulls (metadata ["author" ])
560+ if author != "" {
561+ rwpInfo .Author = []string {author }
562+ }
563+ subject := cleanNulls (metadata ["subject" ])
564+ if subject != "" {
565+ rwpInfo .Subject = []string {subject }
566+ }
555567 rwpInfo .NumPages = doc .NumPage ()
556568
557569 if coverPath == "" {
0 commit comments