dmikhr · dmikhr · May 7, 2025 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+/bin/
+/tmp/
+/.idea/
+
+.DS_Store
diff --git a/.golangci.yaml b/.golangci.yaml
@@ -0,0 +1,36 @@
+run:
+  concurrency: 8
+  timeout: 5m
+  issues-exit-code: 1
+  tests: true
+  issues:
+    exclude-dirs:
+      - bin
+      - tmp
+
+output:
+  formats: colored-line-number
+  print-issued-lines: true
+  print-linter-name: true
+
+linters-settings:
+  govet:
+    shadow: true
+  dupl:
+    threshold: 100
+  goconst:
+    min-len: 2
+    min-occurrences: 2
+
+linters:
+  disable-all: true
+  enable:
+    - errcheck
+    - goconst
+    - goimports
+    - gosec
+    - govet
+    - ineffassign
+    - revive
+    - typecheck
+    - unused
diff --git a/Makefile b/Makefile
@@ -0,0 +1,27 @@
+LOCAL_BIN:=$(CURDIR)/bin
+
+.PHONY: third_party_licenses go-lic lint install-golangci-lint test build build-run run
+
+run:
+	go run ./bin/pdfjuicer
+
+build:
+	go build -o ./bin/pdfjuicer
+
+build-run:
+	go build -o ./bin/pdfjuicer ./cmd && ./bin/pdfjuicer
+
+test:
+	go test ./... -v
+
+install-golangci-lint:
+	GOBIN=$(LOCAL_BIN) go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.64.8
+
+lint:
+	$(LOCAL_BIN)/golangci-lint run ./...
+
+go-lic-install:
+	GOBIN=$(LOCAL_BIN) go install github.com/google/go-licenses@v1.6.0
+
+third_party_licenses:
+	$(LOCAL_BIN)/go-licenses save ./... --save_path=./THIRD_PARTY_LICENSES
diff --git a/README.md b/README.md
@@ -1 +1,129 @@
 # pdfjuicer
+
+![demo](assets/pdfjuicer_action.png)
+
+A fast and flexible command-line tool for extracting pages from PDF documents as high-quality images. This utility allows you to quickly convert PDF content into various image formats with extensive customization options.
+
+✅ Extract specific pages or page ranges
+
+✅ Control output image format and quality
+
+✅ Scale images to desired dimensions
+
+✅ Generate thumbnails for quick previews and web use
+
+✅ Asynchronous processing for faster extraction
+
+Built with Go, app leverages the speed of a compiled language and concurrent processing via goroutines.
+
+
+## About
+Pdfjuicer was designed with content creators and educators in mind, providing easy ways to extract visual content from PDF documents for presentations, course materials, and digital content creation.
+
+Whether you're creating educational materials, preparing digital content, or need to quickly extract visuals from documentation, **Pdfjuicer** provides a straightforward command-line interface to transform your PDFs into usable image assets.
+
+## Example use cases
+
+### Content Creation for Digital Platforms
+
+- **Social Media** - Transform PDF visuals into shareable graphics
+- **E-learning platforms** - Convert PDF content into image assets for online courses
+- **Infographic creation** - Pull charts and diagrams as starting points for custom infographics
+- **Telegram Channels** - Create image-based content for Telegram groups and channels
+- **Blog illustrations** - Extract relevant images from research papers or reports
+
+The built-in thumbnail generation feature creates perfect preview images for content libraries, ensuring students and followers can quickly identify relevant materials before clicking through to full-size content.
+
+### AI and Automation Integration
+The command-line interface makes PDF Pages Extractor an ideal component in automated workflows:
+
+- **OCR Processing Pipelines** - Extract PDF pages as images that can be fed directly into text recognition engines for data extraction
+- **n8n Workflow Integration** - Seamlessly incorporate into n8n automation workflows with simple command execution nodes
+- **Batch Processing Systems** - Schedule regular extraction jobs for new documents in content management systems
+
+The tool's CLI nature enables headless operation in server environments, making it easy to integrate with AI processing pipelines where PDFs need to be converted to image format before analysis, classification, or text extraction.
+
+### Professional Applications
+- **Creating presentations** - Extract diagrams and illustrations for presentations
+- **Documentation** - Create visual guides from technical manuals
+- **Portfolio creation** - Showcase design work originally saved in PDF format
+
+## Commands
+Specify source file and output folder
+  ```
+  -s, --source string    Specify path to source file (pdf)
+  -o, --output string    Specify output folder path
+  -x, --postfix string   Postfix for a filename
+  -p, --prefix string    Prefix for a filename (default "page")
+  ```
+
+Specify particular pages or ranges for extraction
+  ```
+  -P, --pages string     Use this flag to extract specific pages, 
+                         example: 2,3,6-8,10
+  ```
+
+Extracted images settings
+  ```
+  -C, --scale float      Specify image scaling down factor, 
+                         example 5, for example 5 means output image will be 
+                         5 times smaller than original image (default 1)
+  -S, --size string      Specify image size, example 640x480, 
+                         if not specified will output default size from document
+ -F, --format string    Specify output image format (png/jpg) (default "png")
+ ```
+
+Thumbnails settings
+ ```
+  -t, --thumb            enable thumbnails generation
+  -c, --tscale float     Specify thumbnails scaling down factor, 
+                         for example 5 means thumbnail will be 5 times smaller 
+                         than original image (default 10)
+  -z, --tsize string     Specify thumbnails size e.g. 64x64
+  ```
+
+Miscellaneous
+  ```
+  -v, --version          Show version
+  -q, --quiet            Quiet mode (no progress bar, no colored output)
+  -w, --workers int      Set number of anynchronous workers (default N*)
+  ```
+
+*Default number of asynchronous workers is set by default to number (N) of logical CPU cores in your computer.
+
+## Usage examples
+
+See help by calling app either without parameters
+```
+pdfjuicer
+```
+or call app with flag `--help` or `-h`
+
+Extract all pages from pdf document as images using default settings (original image size):
+
+```sh
+pdfjuicer -s ./tmp/test.pdf -o ./media/pics
+```
+
+Extract in quite mode. No progress bar and output formatting. More convenient if app is called from another program.
+```sh
+pdfjuicer -s ./tmp/test.pdf -o ./media/pics --quiet
+```
+
+Extract pages 1 to 3 from `./tmp/test.pdf` and save images `./media/pics` reduce size by a factor of 5
+```sh
+pdfjuicer -s ./tmp/test.pdf -o ./media/pics --pages=1-3 --scale=5
+```
+
+Extract pages `-P` 2-5 from pdf document as images using default settings with thumbnails `-t` using shorthand flags`:
+```sh
+pdfjuicer -s ./tmp/test.pdf -o ./media/pics -t -P=2-5
+```
+
+Extract pages 3,5,7-10,15,20-22 in jpg format with specific image and thumbnails sizes
+```sh
+pdfjuicer -s ./tmp/test.pdf -o ./media/pics -t --pages=3,5,7-10,15,20-22 --size=512x256 --tsize=128x64 --format=jpg
+```
+
+
+
diff --git a/assets/pdfjuicer_action.png b/assets/pdfjuicer_action.png
diff --git a/cmd/color.go b/cmd/color.go
@@ -0,0 +1,39 @@
+package main
+
+import "fmt"
+
+// Color represents an ANSI escape code string used for terminal text formatting and coloring
+type Color string
+
+const (
+	// Reset resets all terminal formatting to default
+	Reset Color = "\033[0m"
+	// Bold applies bold formatting to terminal text
+	Bold = "\033[1m"
+	// ColorGreen sets the terminal text color to green
+	ColorGreen Color = "\033[32m"
+)
+
+// color apply color and optional bold formatting to text
+func color(text string, color Color, bold, noFormat bool) string {
+	if noFormat {
+		return text
+	}
+	mode := string(color)
+	if bold {
+		mode += Bold
+	}
+	return mode + text + string(Reset)
+}
+
+type formattedLabel interface {
+	~string | ~float64
+}
+
+// fbg formatting text in bold and color green
+func fbg[T formattedLabel](label T, noFormat bool) string {
+	if s, ok := any(label).(string); ok {
+		return color(s, ColorGreen, true, noFormat)
+	}
+	return color(fmt.Sprintf("%.2f", any(label).(float64)), ColorGreen, true, noFormat)
+}
diff --git a/cmd/info.go b/cmd/info.go
@@ -0,0 +1,19 @@
+package main
+
+import "fmt"
+
+const (
+	version    = "1.0.0"
+	author     = "Dmitrii Khramtsov"
+	repository = "https://github.com/dmikhr/pdfjuicer"
+	license    = "AGPL-3.0"
+)
+
+func about() string {
+	return fmt.Sprintf(`pdfjuicer v%s
+A command-line tool for extracting pages from a PDF file as images.
+Author: %s
+Repository: %s
+License: %s
+`, version, author, repository, license)
+}