From 4998765b5d9af555777a41440a9aaaba6e2bcc36 Mon Sep 17 00:00:00 2001 From: Erfan Imani Date: Tue, 12 Feb 2019 17:36:03 +0800 Subject: [PATCH 1/2] PROOF OF CONCEPT - added md5 hash based check to prevent image duplication upon successive product CSV imports. --- .../Model/Import/Product.php | 83 +++++++++++++++---- 1 file changed, 69 insertions(+), 14 deletions(-) diff --git a/app/code/Magento/CatalogImportExport/Model/Import/Product.php b/app/code/Magento/CatalogImportExport/Model/Import/Product.php index 908cf66e3f64d..a56302c4bf09c 100644 --- a/app/code/Magento/CatalogImportExport/Model/Import/Product.php +++ b/app/code/Magento/CatalogImportExport/Model/Import/Product.php @@ -1629,8 +1629,12 @@ protected function _saveProducts() $uploadedImages = []; $previousType = null; $prevAttributeSet = null; + $importDir = $this->_mediaDirectory->getAbsolutePath($this->getImportDir()); + $existingImages = $this->getExistingImages($bunch); + $this->addImageHashes($existingImages); + foreach ($bunch as $rowNum => $rowData) { // reset category processor's failed categories array $this->categoryProcessor->clearFailedCategories(); @@ -1791,17 +1795,37 @@ protected function _saveProducts() $position = 0; foreach ($rowImages as $column => $columnImages) { foreach ($columnImages as $columnImageKey => $columnImage) { - if (!isset($uploadedImages[$columnImage])) { - $uploadedFile = $this->uploadMediaFiles($columnImage); - $uploadedFile = $uploadedFile ?: $this->getSystemFile($columnImage); - if ($uploadedFile) { - $uploadedImages[$columnImage] = $uploadedFile; + $hash = md5_file($importDir . DIRECTORY_SEPARATOR . $columnImage); + + if (!isset($existingImages[$rowSku])) { + $imageAlreadyExists = false; + } else { + $imageAlreadyExists = array_reduce($existingImages[$rowSku], function ($exists, $file) use ($hash) { + if ($exists) { + return $exists; + } + if ($file['hash'] === $hash) { + return $file['value']; + } + return $exists; + }, ''); + } + + if ($imageAlreadyExists) { + $uploadedFile = $imageAlreadyExists; + } else { + if (!isset($uploadedImages[$columnImage])) { + $uploadedFile = $this->uploadMediaFiles($columnImage); + $uploadedFile = $uploadedFile ?: $this->getSystemFile($columnImage); + if ($uploadedFile) { + $uploadedImages[$columnImage] = $uploadedFile; + } else { + unset($rowData[$column]); + $this->skipRow($rowNum, ValidatorInterface::ERROR_MEDIA_URL_NOT_ACCESSIBLE); + } } else { - unset($rowData[$column]); - $this->skipRow($rowNum, ValidatorInterface::ERROR_MEDIA_URL_NOT_ACCESSIBLE); + $uploadedFile = $uploadedImages[$columnImage]; } - } else { - $uploadedFile = $uploadedImages[$columnImage]; } if ($uploadedFile && $column !== self::COL_MEDIA_IMAGE) { @@ -1975,6 +1999,23 @@ protected function _saveProducts() return $this; } + /** + * Generate md5 hashes for existing images for comparison with newly uploaded images. + * + * @param array $images + */ + public function addImageHashes(&$images) { + $dirConfig = DirectoryList::getDefaultConfig(); + $dirAddon = $dirConfig[DirectoryList::MEDIA][DirectoryList::PATH]; + $productPath = $this->_mediaDirectory->getAbsolutePath($dirAddon . '/catalog/product'); + + foreach ($images as $sku => $files) { + foreach ($files as $path => $file) { + $images[$sku][$path]['hash'] = md5_file($productPath . $file['value']); + } + } + } + /** * Prepare array with image states (visible or hidden from product page) * @@ -2110,6 +2151,24 @@ protected function _saveProductTierPrices(array $tierPriceData) return $this; } + /** + * Returns the import directory if specified or a default import directory (media/import). + * + * @return string + */ + protected function getImportDir() + { + $dirConfig = DirectoryList::getDefaultConfig(); + $dirAddon = $dirConfig[DirectoryList::MEDIA][DirectoryList::PATH]; + + if (!empty($this->_parameters[Import::FIELD_NAME_IMG_FILE_DIR])) { + $tmpPath = $this->_parameters[Import::FIELD_NAME_IMG_FILE_DIR]; + } else { + $tmpPath = $dirAddon . '/' . $this->_mediaDirectory->getRelativePath('import'); + } + return $tmpPath; + } + /** * Returns an object for upload a media files * @@ -2126,11 +2185,7 @@ protected function _getUploader() $dirConfig = DirectoryList::getDefaultConfig(); $dirAddon = $dirConfig[DirectoryList::MEDIA][DirectoryList::PATH]; - if (!empty($this->_parameters[Import::FIELD_NAME_IMG_FILE_DIR])) { - $tmpPath = $this->_parameters[Import::FIELD_NAME_IMG_FILE_DIR]; - } else { - $tmpPath = $dirAddon . '/' . $this->_mediaDirectory->getRelativePath('import'); - } + $tmpPath = $this->getImportDir(); if (!$this->_fileUploader->setTmpDir($tmpPath)) { throw new LocalizedException( From 7f7d7d1eb51a695f56f11a8646c9d58a28fda0f9 Mon Sep 17 00:00:00 2001 From: Erfan Imani Date: Wed, 22 May 2019 12:21:29 +0800 Subject: [PATCH 2/2] Fixed md5_file warnings being thrown if file in CSV does not exist. --- .../Magento/CatalogImportExport/Model/Import/Product.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/code/Magento/CatalogImportExport/Model/Import/Product.php b/app/code/Magento/CatalogImportExport/Model/Import/Product.php index a56302c4bf09c..6682c24488223 100644 --- a/app/code/Magento/CatalogImportExport/Model/Import/Product.php +++ b/app/code/Magento/CatalogImportExport/Model/Import/Product.php @@ -1795,7 +1795,11 @@ protected function _saveProducts() $position = 0; foreach ($rowImages as $column => $columnImages) { foreach ($columnImages as $columnImageKey => $columnImage) { - $hash = md5_file($importDir . DIRECTORY_SEPARATOR . $columnImage); + $filename = $importDir . DIRECTORY_SEPARATOR . $columnImage; + $hash = ''; + if ($this->_mediaDirectory->isReadable($filename)) { + $hash = md5_file($filename); + } if (!isset($existingImages[$rowSku])) { $imageAlreadyExists = false;