Skip to content

Commit cc62a57

Browse files
Codeql fixes [azure-ai-ml] (#39536)
* change hash function * codeql fixes * correct uuid formation * roll back one change * hash change * fix lint issue * fix unit tests * update recordings * reverse recording * reverse hash_dict change * add object hash change * pass function_scoped to true * update recordings * update recordings * update recordings * updates * update failed test recording * update failed test recording
1 parent 85603e0 commit cc62a57

File tree

12 files changed

+44
-51
lines changed

12 files changed

+44
-51
lines changed

sdk/ml/azure-ai-ml/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/ml/azure-ai-ml",
5-
"Tag": "python/ml/azure-ai-ml_53a7a4a178"
5+
"Tag": "python/ml/azure-ai-ml_a27e606230"
66
}

sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
# pylint: disable=protected-access,too-many-lines
66

77
import hashlib
8+
import json
89
import logging
910
import os
10-
import json
1111
import uuid
1212
import warnings
1313
from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -16,17 +16,7 @@
1616
from os import PathLike
1717
from pathlib import Path
1818
from platform import system
19-
from typing import (
20-
TYPE_CHECKING,
21-
Any,
22-
Dict,
23-
Iterable,
24-
List,
25-
Optional,
26-
Tuple,
27-
Union,
28-
cast,
29-
)
19+
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast
3020

3121
from colorama import Fore
3222
from tqdm import TqdmWarning, tqdm
@@ -67,11 +57,7 @@
6757
from azure.ai.ml._restclient.v2023_04_01.models import PendingUploadRequestDto
6858
from azure.ai.ml._utils._pathspec import GitWildMatchPattern, normalize_file
6959
from azure.ai.ml._utils.utils import convert_windows_path_to_unix, retry, snake_to_camel
70-
from azure.ai.ml.constants._common import (
71-
MAX_AUTOINCREMENT_ATTEMPTS,
72-
DefaultOpenEncoding,
73-
OrderString,
74-
)
60+
from azure.ai.ml.constants._common import MAX_AUTOINCREMENT_ATTEMPTS, DefaultOpenEncoding, OrderString
7561
from azure.ai.ml.entities._assets.asset import Asset
7662
from azure.ai.ml.exceptions import (
7763
AssetPathException,
@@ -92,7 +78,7 @@
9278
ModelOperations,
9379
)
9480

95-
hash_type = type(hashlib.md5()) # nosec
81+
hash_type = type(hashlib.sha256()) # nosec
9682

9783
module_logger = logging.getLogger(__name__)
9884

@@ -335,7 +321,7 @@ def _build_metadata_dict(name: str, version: str) -> Dict[str, str]:
335321

336322

337323
def get_object_hash(path: Union[str, os.PathLike], ignore_file: IgnoreFile = IgnoreFile()) -> str:
338-
_hash = hashlib.md5(b"Initialize for october 2021 AML CLI version") # nosec
324+
_hash = hashlib.sha256(b"Initialize for october 2021 AML CLI version") # nosec
339325
if Path(path).is_dir():
340326
object_hash = _get_dir_hash(directory=path, _hash=_hash, ignore_file=ignore_file)
341327
else:

sdk/ml/azure-ai-ml/azure/ai/ml/_utils/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -788,9 +788,9 @@ def hash_dict(items: Dict[str, Any], keys_to_omit: Optional[Iterable[str]] = Non
788788
items = pydash.omit(items, keys_to_omit)
789789
# serialize dict with order so same dict will have same content
790790
serialized_component_interface = json.dumps(items, sort_keys=True)
791-
object_hash = hashlib.md5() # nosec
791+
object_hash = hashlib.sha256()
792792
object_hash.update(serialized_component_interface.encode("utf-8"))
793-
return str(UUID(object_hash.hexdigest()))
793+
return str(UUID(object_hash.hexdigest()[:32]))
794794

795795

796796
def convert_identity_dict(

sdk/ml/azure-ai-ml/azure/ai/ml/constants/_component.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,4 +147,4 @@ class IOConstants:
147147
GROUP_TYPE_NAME = "group"
148148
# Note: ([a-zA-Z_]+[a-zA-Z0-9_]*) is a valid single key,
149149
# so a valid pipeline key is: ^{single_key}([.]{single_key})*$
150-
VALID_KEY_PATTERN = r"^([a-zA-Z_]+[a-zA-Z0-9_]*)([.]([a-zA-Z_]+[a-zA-Z0-9_]*))*$"
150+
VALID_KEY_PATTERN = r"^[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*$"

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_assets/_artifacts/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from azure.ai.ml.entities._assets import Artifact
2525
from azure.ai.ml.entities._assets.intellectual_property import IntellectualProperty
2626
from azure.ai.ml.entities._system_data import SystemData
27-
from azure.ai.ml.entities._util import get_md5_string, load_from_dict
27+
from azure.ai.ml.entities._util import get_sha256_string, load_from_dict
2828

2929
from .artifact import ArtifactStorageInfo
3030

@@ -103,7 +103,7 @@ def __init__(
103103
if self._is_anonymous and self.path:
104104
_ignore_file = get_ignore_file(self.path)
105105
_upload_hash = get_object_hash(self.path, _ignore_file)
106-
self.name = get_md5_string(_upload_hash)
106+
self.name = get_sha256_string(_upload_hash)
107107

108108
@classmethod
109109
def _load(

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_assets/environment.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from azure.ai.ml.entities._assets.intellectual_property import IntellectualProperty
2727
from azure.ai.ml.entities._mixins import LocalizableMixin
2828
from azure.ai.ml.entities._system_data import SystemData
29-
from azure.ai.ml.entities._util import get_md5_string, load_from_dict
29+
from azure.ai.ml.entities._util import get_sha256_string, load_from_dict
3030
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException
3131

3232

@@ -374,20 +374,20 @@ def _generate_anonymous_name_version(
374374
) -> None:
375375
hash_str = ""
376376
if source == "image":
377-
hash_str = hash_str.join(get_md5_string(self.image))
377+
hash_str = hash_str.join(get_sha256_string(self.image))
378378
if inference_config:
379-
hash_str = hash_str.join(get_md5_string(yaml.dump(inference_config, sort_keys=True)))
379+
hash_str = hash_str.join(get_sha256_string(yaml.dump(inference_config, sort_keys=True)))
380380
if conda_file:
381-
hash_str = hash_str.join(get_md5_string(conda_file))
381+
hash_str = hash_str.join(get_sha256_string(conda_file))
382382
if source == "build":
383383
if self.build is not None and not self.build.dockerfile_path:
384-
hash_str = hash_str.join(get_md5_string(self._upload_hash))
384+
hash_str = hash_str.join(get_sha256_string(self._upload_hash))
385385
else:
386386
if self.build is not None:
387-
hash_str = hash_str.join(get_md5_string(self._upload_hash)).join(
388-
get_md5_string(self.build.dockerfile_path)
387+
hash_str = hash_str.join(get_sha256_string(self._upload_hash)).join(
388+
get_sha256_string(self.build.dockerfile_path)
389389
)
390-
version_hash = get_md5_string(hash_str)
390+
version_hash = get_sha256_string(hash_str)
391391
self.version = version_hash
392392
self.name = ANONYMOUS_ENV_NAME
393393

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_util.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,17 +218,17 @@ def decorate_validation_error(schema: Any, pretty_error: str, additional_message
218218
return f"Validation for {schema.__name__} failed:\n\n {pretty_error} \n\n {additional_message}"
219219

220220

221-
def get_md5_string(text: Optional[str]) -> str:
222-
"""Get md5 string for a given text.
221+
def get_sha256_string(text: Optional[str]) -> str:
222+
"""Get sha256 string for a given text.
223223
224-
:param text: The text to get md5 string for.
224+
:param text: The text to get sha256 string for.
225225
:type text: str
226-
:return: The md5 string.
226+
:return: The sha256 string.
227227
:rtype: str
228228
"""
229229
try:
230230
if text is not None:
231-
return hashlib.md5(text.encode("utf8")).hexdigest() # nosec
231+
return hashlib.sha256(text.encode("utf8")).hexdigest() # nosec
232232
return ""
233233
except Exception as ex:
234234
raise ex

sdk/ml/azure-ai-ml/tests/command_job/unittests/test_command_job_schema.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,10 @@ def test_anonymous_assets(self):
127127
assert internal_representation.environment.name != envName
128128
assert internal_representation.environment.name == ANONYMOUS_ENV_NAME
129129
assert internal_representation.environment._is_anonymous
130-
assert internal_representation.environment.version == "7edecdc2427764b4606a68e5f1a95fe3"
130+
assert (
131+
internal_representation.environment.version
132+
== "9a8126427ef71af0cdc4a56218a9b24e764524f68383d5c4b645448376a03f6b"
133+
)
131134

132135
assert internal_representation.inputs["test1"].path == input_path
133136
# Validate default dataset is mounted

sdk/ml/azure-ai-ml/tests/component/unittests/test_flow_component.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def test_component_load_from_dag(self):
4949
"is_anonymous": False,
5050
"is_archived": False,
5151
"properties": {
52-
"client_component_hash": "19278001-3d52-0e43-dc43-4082128d8243",
52+
"client_component_hash": "a32d6c66-9253-d3b2-d068-b6a409ab6770",
5353
},
5454
"tags": {},
5555
},
@@ -115,7 +115,7 @@ def test_component_load_from_run(self):
115115
"description": "A run of the basic flow",
116116
"is_anonymous": False,
117117
"is_archived": False,
118-
"properties": {"client_component_hash": "bc6d5b98-1aef-0d5a-96ff-5803f1a906c8"},
118+
"properties": {"client_component_hash": "f060820c-7fb3-56a8-790c-ae2969a7b544"},
119119
"tags": {},
120120
},
121121
}
@@ -199,7 +199,7 @@ def test_flow_component_entity(self):
199199
"is_anonymous": False,
200200
"is_archived": False,
201201
# note that this won't take effect actually
202-
"properties": {"client_component_hash": "0eec0297-5f6d-e333-780d-c76871ad9c57"},
202+
"properties": {"client_component_hash": "07cdc416-c6ee-0beb-3d1b-4e5a5c4b44ec"},
203203
"tags": {},
204204
},
205205
}

sdk/ml/azure-ai-ml/tests/conftest.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -630,15 +630,19 @@ def pipeline_samples_e2e_registered_eval_components(client: MLClient) -> Compone
630630

631631
@pytest.fixture
632632
def mock_code_hash(request, mocker: MockFixture) -> None:
633+
fake_uuid = "00000000000000000000000000000000"
634+
633635
def generate_hash(*args, **kwargs):
634-
return str(uuid.uuid4())
636+
real_uuid = str(uuid.uuid4())
637+
add_general_string_sanitizer(value=fake_uuid, target=real_uuid, function_scoped=True)
638+
return real_uuid
635639

636640
if "disable_mock_code_hash" not in request.keywords and is_live_and_not_recording():
637641
mocker.patch("azure.ai.ml._artifacts._artifact_utilities.get_object_hash", side_effect=generate_hash)
638642
elif not is_live():
639643
mocker.patch(
640644
"azure.ai.ml._artifacts._artifact_utilities.get_object_hash",
641-
return_value="00000000000000000000000000000000",
645+
return_value=fake_uuid,
642646
)
643647

644648

@@ -659,7 +663,7 @@ def mock_anon_component_version(mocker: MockFixture):
659663

660664
def generate_name_version(*args, **kwargs):
661665
real_uuid = str(uuid.uuid4())
662-
add_general_string_sanitizer(value=fake_uuid, target=real_uuid)
666+
add_general_string_sanitizer(value=fake_uuid, target=real_uuid, function_scoped=True)
663667
return ANONYMOUS_COMPONENT_NAME, real_uuid
664668

665669
def fake_name_version(*args, **kwargs):
@@ -683,7 +687,7 @@ def mock_asset_name(mocker: MockFixture):
683687

684688
def generate_uuid(*args, **kwargs):
685689
real_uuid = str(uuid.uuid4())
686-
add_general_string_sanitizer(value=fake_uuid, target=real_uuid)
690+
add_general_string_sanitizer(value=fake_uuid, target=real_uuid, function_scoped=True)
687691
return real_uuid
688692

689693
if is_live():
@@ -727,7 +731,7 @@ def generate_component_hash(*args, **kwargs):
727731
"""Normalize component dict with sanitized value and return hash."""
728732
dict_hash = hash_dict(*args, **kwargs)
729733
normalized_dict_hash = normalized_hash_dict(*args, **kwargs)
730-
add_general_string_sanitizer(value=normalized_dict_hash, target=dict_hash)
734+
add_general_string_sanitizer(value=normalized_dict_hash, target=dict_hash, function_scoped=True)
731735
return dict_hash
732736

733737

@@ -837,7 +841,7 @@ def mock_job_name_generator(mocker: MockFixture):
837841

838842
def generate_and_sanitize_job_name(*args, **kwargs):
839843
real_job_name = generate_job_name()
840-
add_general_string_sanitizer(value=fake_job_name, target=real_job_name)
844+
add_general_string_sanitizer(value=fake_job_name, target=real_job_name, function_scoped=True)
841845
return real_job_name
842846

843847
if is_live():

sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def pipeline(job_in_number, job_in_other_number, job_in_path):
174174
job = client.jobs.create_or_update(pipeline)
175175
# check required fields in job dict
176176
job_dict = job._to_dict()
177-
expected_keys = ["status", "properties", "tags", "creation_context"]
177+
expected_keys = ["status", "tags", "creation_context"]
178178
for k in expected_keys:
179179
assert k in job_dict.keys(), f"failed to get {k} in {job_dict}"
180180

sdk/ml/azure-ai-ml/tests/environment/unittests/test_env_entity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ def test_anonymous_environment_version_changes_with_inference_config(self):
127127

128128
assert env_no_inference_config.name == env_no_inference_config.name == ANONYMOUS_ENV_NAME
129129
assert env_no_inference_config.version != env_with_inference_config.version
130-
assert env_no_inference_config.version == "25484232c6e5bfb2c6e1dc5d38fb9fa5"
131-
assert env_with_inference_config.version == "b181e0fef2ad76757e2f78317c10cb2b"
130+
assert env_no_inference_config.version == "09c18f036e80ca941e58b12313012d8e6f23cdc3ccc35b8c97f890ccbaedbf40"
131+
assert env_with_inference_config.version == "e060479364110e178fa0338de64bcd5cdf2dc186ee80b89641d842fc974c6949"
132132

133133
def test_ipp_environment(self) -> None:
134134
# test through deserializing REST instead of using real IP assets

0 commit comments

Comments
 (0)