Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
build:
strategy:
matrix:
python-version: [ 3.7 ]
python-version: [ 3.10.7 ]
os: [ windows-latest, ubuntu-latest, macos-latest ]
runs-on: ${{ matrix.os }}
permissions:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
*.vscode
venv/
coverage.xml
.python-version
11 changes: 5 additions & 6 deletions gdk/aws_clients/Greengrassv2Client.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def get_component_version(self, component_arn) -> dict:
comp_list_response = self.client.list_component_versions(arn=component_arn)
return comp_list_response["componentVersions"]

def get_component(self, component_arn) -> dict:
comp_response = self.client.get_component(recipeOutputFormat='YAML', arn=component_arn)
return comp_response["recipe"]

def create_gg_component(self, file_path) -> None:
"""
Creates a GreengrassV2 private component version using its recipe.
Expand All @@ -38,12 +42,7 @@ def create_gg_component(self, file_path) -> None:
"""
with open(file_path, "r", encoding="utf-8") as f:
try:
response = self.client.create_component_version(inlineRecipe=f.read())
logging.info(
"Created private version '%s' of the component '%s' in the account.",
response.get("componentVersion"),
response.get("componentName"),
)
self.client.create_component_version(inlineRecipe=f.read())
except Exception:
logging.error("Failed to create a private version of the component using the recipe at '%s'.", file_path)
raise
200 changes: 195 additions & 5 deletions gdk/commands/component/PublishCommand.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import logging
from pathlib import Path
import yaml
import json
import deepdiff
from gdk.commands.component.transformer.PublishRecipeTransformer import PublishRecipeTransformer

import gdk.commands.component.component as component
Expand All @@ -7,6 +11,7 @@
from gdk.aws_clients.S3Client import S3Client
from gdk.commands.Command import Command
from gdk.commands.component.config.ComponentPublishConfiguration import ComponentPublishConfiguration
from gdk.common.CaseInsensitive import CaseInsensitiveRecipeFile


class PublishCommand(Command):
Expand All @@ -29,6 +34,181 @@ def run(self):
)
raise

def _check_for_changes(self):
logging.info(f"Checking for changes in the component: {self.project_config.component_name}")

options = self.project_config.options
only_on_change = options.get("only_on_change", [])

if only_on_change:
latest_published_recipe = self._get_latest_published_recipe()
if not latest_published_recipe:
logging.info(f"No published recipe found for the component: {self.project_config.component_name}")
return True

if "RECIPE" in only_on_change:
logging.info("Checking for changes in the RECIPE.")
if self._diff_recipe(latest_published_recipe):
return True

if "ARTIFACTS" in only_on_change:
logging.info("Checking for changes in the ARTIFACTS.")
if self._diff_artifacts(latest_published_recipe):
return True

logging.info(f"No changes in the component: {self.project_config.component_name}")
return False

else:
logging.info(f"Publishing regardless of RECIPE or ARTIFACT diff: {self.project_config.component_name}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should default to publishing regardless of recipe/artifact diff if any specific component version is specified in the gdk-config file. IMO, we only care about checking diffs if the version is set to NEXT_PATCH, as this will keep incrementing the patch version and creating new components. If someone specifies an actual component version such as 1.0.0 or 2.0.0 we should attempt the publish to that version regardless of differences.

I'm thinking in some scenario someone may be developing with NEXT_PATCH (say along a 1.0.0 major/minor version), then decide they want to publish the same unchanged component to 2.0.0 to move up a major version and test with other components looking for a version in that dependency range. Unless they remove the only_on_change part of their config, we wouldn't make the publish as it would still be the same component as the latest patch version in the 1.0.0 major/minor version which is still latest. It seems more useful to me for GDK to attempt this publish regardless, rather than prevent a publish to an explicitly defined version. Of course, since only_on_change is not default, they would have added that to their config file at some point themselves as well, so I'm open to hearing your thoughts on what the behavior should be when version isn't set to NEXT_PATCH.

Copy link
Author

@nbaillie nbaillie Dec 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking as bellow for scenarios:

version is NEXT_PATCH and only_on_change IS NOT set

  • Then continue to publish as per existing behavior.

version is NEXT_PATCH and only_on_change IS set

  • Then check for changes against the latest published version.
  • only publish if there are changes

version is NOT NEXT_PATCH and only_on_change IS NOT set

  • Then continue to publish as per existing behavior.

version is NOT NEXT_PATCH and only_on_change IS set (this part was not in the initial PR)

  • Then check for changes against the published version that is set by version.
  • If the version does not exists publish
  • If the version does exists only publish if there are changes (more efficient)

--
Happy to leave the last one out and also stick to existing behavior, let me know if you think its something needed or not. and i'll make the change.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first three behaviors looks good to me. For the last scenario (version is NOT NEXT_PATCH and only_on_change IS set), if the version does exist, even if there are changes those changes would never be reflected in the component version, as we get something along the lines of the following error:

botocore.errorfactory.ConflictException: An error occurred (ConflictException) when calling the CreateComponentVersion operation: Component [componentname : componentversion] for account [000000000000] already exists with state: [DEPLOYABLE]

A customer would have to delete this component version before GDK could publish to it again with changes. The GDK currently doesn't do this, and we typically recommend customers to create new component versions when they have changes, so I think this error is sufficient as it describes the issue clearly. So checking if there are changes or not both results in the component version remaining unchanged if it already existed. I think leaving the last scenario out and just sticking to existing behavior would essentially have the same effect since the new publish doesn't actually change the component version even if there are changes.

Maybe an improvement here could be to warn in the GDK output if the S3 artifacts have been changed but the version already existed, since component artifacts that are uploaded could result in a different hash from the existing cloud component version's artifact details in the old recipe if they have been modified. But this would be a separate improvement not closely tied to the new only_on_change feature.

return True

def _diff_artifacts(self, latest_published_recipe):
build_artifacts = list(self.project_config.gg_build_component_artifacts_dir.iterdir())

for build_artifact in build_artifacts:
artifact_found_in_latest_manifest = False
for latest_p_manifest in latest_published_recipe.get("Manifests", []):
for latest_p_artifact in latest_p_manifest.get("Artifacts", []):
if latest_p_artifact.get("URI", latest_p_artifact.get("Uri")).split("/")[-1] == build_artifact.name:
artifact_found_in_latest_manifest = True
if latest_p_artifact.get("Digest", None) != utils.artifact_encoded_hash(build_artifact):
logging.info(f"Changes found in the artifact: {build_artifact}")
return True

if not artifact_found_in_latest_manifest:
logging.info("Could not find find the published artifact: {build_artifact}")
return True

logging.info("No Changes found in the artifacts")
return False

def _check_recipe_structure(self, recipe, latest_published_recipe):
"""Check for structural changes in the recipe."""
diff = deepdiff.DeepDiff(
recipe,
latest_published_recipe,
ignore_type_subclasses=True,
exclude_paths=["root['ComponentVersion']"],
exclude_regex_paths=[
"^root\['Manifests'\]\[\d\]\['Artifacts'\]\[\d\]", # noqa: W605
"^root\['Manifests'\]\[\d\]\['Artifacts'\]" # noqa: W605
]
)

logging.debug(f"Recipe diff: {diff}")

if "dictionary_item_added" in diff.keys():
for item in diff["dictionary_item_added"]:
if item not in ["root['ComponentType']", "root['Lifecycle']"]:
logging.info(f"Changes found: dictionary_item_added: {item}")
return True

if "dictionary_item_removed" in diff.keys():
logging.info("Changes found: dictionary_item_removed")
return True

if "values_changed" in diff.keys():
logging.info(f"Changes found: values_changed: {json.dumps(diff['values_changed'], indent=2)}")
return True

return False

def _check_manifests(self, recipe, latest_published_recipe):
"""Check for changes in manifests and artifacts."""
manifests = recipe.get("Manifests", [])
latest_manifests = latest_published_recipe.get("Manifests")

if len(manifests) != len(latest_manifests):
logging.info("Changes found in the number of defined Manifests")
return True

for idm, manifest in enumerate(manifests):
if self._check_artifacts(manifest, latest_manifests[idm]):
return True

return False

def _check_artifacts(self, manifest, latest_manifest):
"""Check for changes in artifacts."""
artifacts = manifest.get("Artifacts", [])
latest_artifacts = latest_manifest.get("Artifacts", [])

if len(artifacts) != len(latest_artifacts):
logging.info("Changes found in the number of defined Artifacts")
return True

if not artifacts:
return False

for ida, artifact in enumerate(artifacts):
if self._check_single_artifact(artifact, latest_artifacts[ida]):
return True

return False

def _check_single_artifact(self, artifact, latest_artifact):
"""Check for changes in a single artifact."""
recipe_uri = artifact.get("URI", artifact.get("Uri"))
recipe_unarchive = artifact.get("Unarchive", "")

latest_uri = latest_artifact.get("URI", latest_artifact.get("Uri"))
latest_unarchive = latest_artifact.get("Unarchive", "")
latest_published_version = self.project_config.latest_published_component_version

# # Check URI changes
recipe_version = self.project_config.component_version
# latest_version = self.latest_version or ''
if recipe_uri.replace(recipe_version, '<VERSION>') != latest_uri.replace(latest_published_version, '<VERSION>'):
logging.info(f"Changes found: Artifact URI: local: {recipe_uri}")
logging.info(f"Changes found: Artifact URI: in gg: {latest_uri}")
return True

# # Check unarchive changes
if recipe_unarchive != latest_unarchive:
logging.info(f"Changes found: Artifact Unarchive: local: {recipe_unarchive}")
logging.info(f"Changes found: Artifact Unarchive: in gg: {latest_unarchive}")
return True

return False

def _log_recipe_comparison(self, recipe, latest_published_recipe):
"""Log the recipe comparison details."""
logging.debug(f"Recipe Local: {json.dumps(recipe,indent=2)}")
logging.debug(f"Recipe in Greengrass: {json.dumps(latest_published_recipe,indent=2)}")

def _diff_recipe(self, latest_published_recipe):
recipe = self._get_recipe().to_dict()

self._log_recipe_comparison(recipe, latest_published_recipe)

if not latest_published_recipe:
logging.info(f"No published recipe found for the component to check against: {self.project_config.component_name}")
return True

if self._check_recipe_structure(recipe, latest_published_recipe):
return True

if self._check_manifests(recipe, latest_published_recipe):
return True

logging.info("No Changes found in the recipe")
return False

def _get_latest_published_recipe(self):
if self.project_config.latest_published_component_version:
component = f"components:{self.project_config.component_name}"
version = f"versions:{self.project_config.latest_published_component_version}"
component_with_version = f"{component}:{version}"
arn = f"arn:aws:greengrass:{self.project_config.region}:{self.project_config.account_num}:{component_with_version}"
return yaml.safe_load(self.greengrass_client.get_component(arn).decode('utf8'))
else:
return None

def _get_recipe(self):
recipe_path = Path(self.project_config.publish_recipe_file)
return CaseInsensitiveRecipeFile().read(recipe_path)

def try_build(self):
# TODO: This method should just warn and proceed. It should not build the component.
component_name = self.project_config.component_name
Expand All @@ -41,14 +221,24 @@ def try_build(self):

def _publish_component_version(self, component_name, component_version):
logging.info("Publishing the component '%s' with the given project configuration.", component_name)
logging.info("Uploading the component built artifacts to s3 bucket.")
self.upload_artifacts_s3()

logging.info("Updating the component recipe %s-%s.", component_name, component_version)
logging.info("Transform the component recipe %s-%s.", component_name, component_version)
PublishRecipeTransformer(self.project_config).transform()
if self._check_for_changes():

logging.info("Creating a new greengrass component %s-%s.", component_name, component_version)
self.greengrass_client.create_gg_component(self.project_config.publish_recipe_file)
logging.info("Uploading the component built artifacts to s3 bucket.")
self.upload_artifacts_s3()

logging.info("Creating a new greengrass component version %s-%s.", component_name, component_version)
self.greengrass_client.create_gg_component(self.project_config.publish_recipe_file)
logging.info("Latest published version is now: %s-%s", component_name, self.project_config.component_version)
else:
logging.info("No changes found in the component. Skipping the publish step.")
logging.info(
"Latest published version remains: %s-%s",
component_name,
self.project_config.latest_published_component_version
)

def upload_artifacts_s3(self) -> None:
"""
Expand Down
37 changes: 29 additions & 8 deletions gdk/commands/component/config/ComponentPublishConfiguration.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(self, _args) -> None:
self.options = self._get_options()
self.account_num = self.get_account_number()
self.region = self._get_region()
self.latest_published_component_version = self.get_latest_published_component_version(self.region)
self.bucket = self._get_bucket(self.region, self.account_num)
self.component_version = self.get_component_version(self.region)
self.publisher = self.component_config.get("author", "")
Expand All @@ -37,15 +38,19 @@ def _get_region(self):
def _validated_region(self, region):
if region == "":
raise ValueError("Region cannot be empty. Please provide a valid region.")
return region

def _check_for_latest_published_component_version(self, region):
component_arn = self._get_component_arn(region)
version = None
try:
Greengrassv2Client(region).get_component_version(component_arn)
version = Greengrassv2Client(region).get_highest_cloud_component_version(component_arn)
except exceptions.EndpointConnectionError:
raise ValueError("Greengrass does not exist in %s region. Please provide a valid region.", region)
except Exception as e:
logging.error("Error occurred while checking Greengrass availability: %s", e)
raise e
return region
return version

def _get_bucket(self, _region, _account):
_bucket = ""
Expand Down Expand Up @@ -89,6 +94,24 @@ def _read_from_file(self, _opts_path: Path):
with open(_opts_path.resolve(), "r", encoding="utf-8") as file:
return json.loads(file.read())

def get_latest_published_component_version(self, _region, ) -> str:
logging.debug("Fetching private components from the account.")
try:
c_name = self.component_name
c_latest_current_version = self._check_for_latest_published_component_version(_region)
if not c_latest_current_version:
logging.info(
"No private version of the component '%s' exist in the account.", c_name
)

return None
logging.debug("Found latest version '%s' of the component '%s' in the account.", c_latest_current_version, c_name)

return c_latest_current_version
except Exception:
logging.error("Failed to calculate the latest published version of the component.")
raise

def get_component_version(self, _region):
_version = self.component_config.get("version")
if not _version:
Expand All @@ -110,22 +133,20 @@ def _get_next_version(self, _region) -> str:
the fallback version.
"""
fallback_version = "1.0.0"
logging.debug("Fetching private components from the account.")
try:
c_name = self.component_name
component_arn = self._get_component_arn(_region)
c_next_patch_version = Greengrassv2Client(_region).get_highest_cloud_component_version(component_arn)
if not c_next_patch_version:
c_latest_published_version = self.latest_published_component_version
if not c_latest_published_version:
logging.info(
"No private version of the component '%s' exist in the account. Using '%s' as the next version to create.",
c_name,
fallback_version,
)

return fallback_version
logging.debug("Found latest version '%s' of the component '%s' in the account.", c_next_patch_version, c_name)
logging.debug("Found latest version '%s' of the component '%s' in account.", c_latest_published_version, c_name)

next_version = utils.get_next_patch_version(c_next_patch_version)
next_version = utils.get_next_patch_version(c_latest_published_version)
logging.info("Using '%s' as the next version of the component '%s' to create.", next_version, c_name)
return next_version
except Exception:
Expand Down
9 changes: 9 additions & 0 deletions gdk/commands/config/update/ConfigChecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ def is_valid_publish_options(self, input_value):
if not isinstance(_file_upload_args, dict):
return False

_only_on_change = input_object.get("only_on_change", [])
if not isinstance(_only_on_change, list):
return False
for item in _only_on_change:
if not isinstance(item, str):
return False
elif item not in ["ARTIFACTS", "GDK_CONFIG", "RECIPE"]:
return False

return True

def is_valid_gdk_version(self, input_value):
Expand Down
6 changes: 4 additions & 2 deletions gdk/common/GithubUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ def get_latest_release_name(self, owner, repository):
latest_release_api_url = f"https://api.github.com/repos/{owner}/{repository}/releases/latest"
response = requests.get(latest_release_api_url)
if response.status_code != 200:
response.raise_for_status()
return response.json().get("name") # We typically name our GTF releases by the version.
if response.status_code == 403:
if not response.json().get("message").find("API rate limit exceeded"):
response.raise_for_status()
return response.json().get("name", None) # We typically name our GTF releases by the version.
12 changes: 12 additions & 0 deletions gdk/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import hashlib
import base64
import logging
import shutil
from pathlib import Path
Expand Down Expand Up @@ -156,6 +158,16 @@ def convertToLowercase(value):
return str.lower(value)


def artifact_encoded_hash(file_path):
with open(file_path, "rb") as f:
file_hash = hashlib.sha256()
chunk = f.read(4096)
while chunk:
file_hash.update(chunk)
chunk = f.read(4096)
return base64.b64encode(file_hash.digest()).decode("utf-8")


error_line = "\n=============================== ERROR ===============================\n"
help_line = "\n=============================== HELP ===============================\n"
current_directory = Path(".").resolve()
Expand Down
Loading
Loading