diff --git a/System-Automation-Scripts/PDF_Tables_To_CSV/.gitignore b/System-Automation-Scripts/PDF_Tables_To_CSV/.gitignore new file mode 100644 index 00000000..0f7c8b11 --- /dev/null +++ b/System-Automation-Scripts/PDF_Tables_To_CSV/.gitignore @@ -0,0 +1,9 @@ +# Vscode files +.vscode + +# Sample Files +sample.pdf +sample2.pdf + +# Python +__pycache__ \ No newline at end of file diff --git a/System-Automation-Scripts/PDF_Tables_To_CSV/README.md b/System-Automation-Scripts/PDF_Tables_To_CSV/README.md new file mode 100644 index 00000000..c0c4b635 --- /dev/null +++ b/System-Automation-Scripts/PDF_Tables_To_CSV/README.md @@ -0,0 +1,22 @@ +# PDF to CSV +This script will convert the tables in the PDF file into CSV files. Each CSV file has one table from the PDF and the number of CSV equal to the number of tables in the PDF. + +# Requirements +`pip install tabula-py, pandas` + +# How to use? +Just use the following command while executing the scrpit: + +`python app.py location_of_pdf pages` + +Pages have two options: +- 'all' will extract tables from whole PDF +- specific page (ex 1,2,54..) will extract table from that page + +Example: +- `python app.py sample.pdf all` +- `python app.py sample2.pdf 45` + +# Preview + +![](preview.gif) diff --git a/System-Automation-Scripts/PDF_Tables_To_CSV/app.py b/System-Automation-Scripts/PDF_Tables_To_CSV/app.py new file mode 100644 index 00000000..2456456e --- /dev/null +++ b/System-Automation-Scripts/PDF_Tables_To_CSV/app.py @@ -0,0 +1,19 @@ +import tabula +import pandas as pd +import sys + +def extract(path, number_pages): + tables = tabula.read_pdf(path, multiple_tables=True, pages=number_pages) + count = 1 + if len(tables)!=0: + for table in tables: + print + print(f"Saving file -{count}") + table.to_csv(f'Table- {count}.csv') + count += 1 + print("All tables saved as seperate files !") + else: + print("No tables found !") + +if __name__ == "__main__": + extract(sys.argv[1], sys.argv[2]) \ No newline at end of file diff --git a/System-Automation-Scripts/PDF_Tables_To_CSV/preview.gif b/System-Automation-Scripts/PDF_Tables_To_CSV/preview.gif new file mode 100644 index 00000000..adc80411 Binary files /dev/null and b/System-Automation-Scripts/PDF_Tables_To_CSV/preview.gif differ