diff --git a/invenio_rdm_records/resources/serializers/dcat/__init__.py b/invenio_rdm_records/resources/serializers/dcat/__init__.py index a62f9b4d9..92e48b5c5 100644 --- a/invenio_rdm_records/resources/serializers/dcat/__init__.py +++ b/invenio_rdm_records/resources/serializers/dcat/__init__.py @@ -80,16 +80,14 @@ def access_url(file): "{{{dcat}}}mediaType": media_type, "{{{dcat}}}byteSize": byte_size, "{{{dcat}}}accessURL": access_url, - # TODO: there's also "spdx:checksum", but it's not in the W3C spec yet } - for f in files: + for file in files: dist_wrapper = ET.SubElement(root[0], "{{{dcat}}}distribution".format(**ns)) dist = ET.SubElement(dist_wrapper, "{{{dcat}}}Distribution".format(**ns)) for tag, func in files_fields.items(): - tag_value = func(f) - + tag_value = func(file) if tag_value: el = ET.SubElement(dist, tag.format(**ns)) if isinstance(tag_value, str): @@ -97,6 +95,30 @@ def access_url(file): if isinstance(tag_value, dict): el.attrib.update(tag_value) + checksum = file.get("checksum") if file.get("checksum") else None + if checksum: + value = checksum.split(":")[1] + spdx_checksum_el = ET.SubElement( + dist, "{{{spdx}}}checksum".format(**ns) + ) + spdx_checksum_obj = ET.SubElement( + spdx_checksum_el, "{{{spdx}}}Checksum".format(**ns) + ) + + algo_el = ET.SubElement( + spdx_checksum_obj, + "{{{spdx}}}algorithm".format(**ns), + ) + algo_el.attrib["{{{rdf}}}resource".format(**ns)] = ( + f"http://spdx.org/rdf/terms#checksumAlgorithm_md5" + ) + + value_el = ET.SubElement( + spdx_checksum_obj, + "{{{spdx}}}checksumValue".format(**ns), + ) + value_el.text = value + def add_missing_creatibutor_links(self, rdf_tree): """Add missing `rdf:about` attributes to within and and within .""" namespaces = rdf_tree.nsmap diff --git a/invenio_rdm_records/resources/serializers/dcat/datacite-to-dcat-ap.xsl b/invenio_rdm_records/resources/serializers/dcat/datacite-to-dcat-ap.xsl index 2d60621e3..791a84ecf 100644 --- a/invenio_rdm_records/resources/serializers/dcat/datacite-to-dcat-ap.xsl +++ b/invenio_rdm_records/resources/serializers/dcat/datacite-to-dcat-ap.xsl @@ -53,6 +53,7 @@ xmlns:dct = "http://purl.org/dc/terms/" xmlns:dctype = "http://purl.org/dc/dcmitype/" xmlns:dcat = "http://www.w3.org/ns/dcat#" + xmlns:spdx = "http://spdx.org/rdf/terms#" xmlns:foaf = "http://xmlns.com/foaf/0.1/" xmlns:gsp = "http://www.opengis.net/ont/geosparql#" xmlns:locn = "http://www.w3.org/ns/locn#" diff --git a/invenio_rdm_records/resources/serializers/dcat/schema.py b/invenio_rdm_records/resources/serializers/dcat/schema.py index 532afcc96..d691f945b 100644 --- a/invenio_rdm_records/resources/serializers/dcat/schema.py +++ b/invenio_rdm_records/resources/serializers/dcat/schema.py @@ -50,6 +50,7 @@ def get_files(self, obj): access_url=access_url, download_url=url, key=value["key"], + checksum=value["checksum"], ) ) diff --git a/tests/resources/serializers/test_dcat_serializer.py b/tests/resources/serializers/test_dcat_serializer.py index 7eda62d38..098c150cf 100644 --- a/tests/resources/serializers/test_dcat_serializer.py +++ b/tests/resources/serializers/test_dcat_serializer.py @@ -40,6 +40,7 @@ def test_dcat_serializer(running_app, full_record_to_dict): 'xmlns:dct="http://purl.org/dc/terms/" ' 'xmlns:dctype="http://purl.org/dc/dcmitype/" ' 'xmlns:dcat="http://www.w3.org/ns/dcat#" ' + 'xmlns:spdx="http://spdx.org/rdf/terms#" ' 'xmlns:foaf="http://xmlns.com/foaf/0.1/" ' 'xmlns:gsp="http://www.opengis.net/ont/geosparql#" ' 'xmlns:locn="http://www.w3.org/ns/locn#" ' @@ -253,6 +254,13 @@ def test_dcat_serializer(running_app, full_record_to_dict): " 9\n" " \n' + " \n" + " \n" + " \n' + " e795abeef2c38de2b064be9f6364ceae\n" + " \n" + " \n" " \n" " \n" " \n"