Skip to content

Support conan in packagedcode #3650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@ Changes in Output Data Structure:
- Upgraded spdx-tools dependency to v0.8.
See https://github.com/nexB/scancode-toolkit/issues/3455

Support for Conan package parser:

- We now support the parsing of Conan manifest files, such as
`conanfile.py`, as described here https://docs.conan.io/2.0/reference/conanfile.html.
We also support source extraction from `conandata.yml`, as described here
https://docs.conan.io/2/tutorial/creating_packages/handle_sources_in_packages.html#using-the-conandata-yml-file.


v32.0.8 - 2023-10-11
------------------------
Expand Down
12 changes: 12 additions & 0 deletions docs/source/reference/available_package_parsers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,18 @@ parsers in scancode-toolkit during documentation builds.
- ``php_composer_lock``
- PHP
- https://getcomposer.org/doc/01-basic-usage.md#commit-your-composer-lock-file-to-version-control
* - conan external source
- ``*/conandata.yml``
- ``conan``
- ``conan_conandata_yml``
- C++
- https://docs.conan.io/2/tutorial/creating_packages/handle_sources_in_packages.html#using-the-conandata-yml-file
* - conan recipe
- ``*/conanfile.py``
- ``conan``
- ``conan_conanfile_py``
- C++
- https://docs.conan.io/2.0/reference/conanfile.html
* - Conda meta.yml manifest
- ``*/meta.yaml``
- ``conda``
Expand Down
4 changes: 4 additions & 0 deletions src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from packagedcode import debian_copyright
from packagedcode import distro
from packagedcode import conda
from packagedcode import conan
from packagedcode import cocoapods
from packagedcode import cran
from packagedcode import freebsd
Expand Down Expand Up @@ -77,6 +78,9 @@
conda.CondaYamlHandler,
conda.CondaMetaYamlHandler,

conan.ConanFileHandler,
conan.ConanDataHandler,

cran.CranDescriptionFileHandler,

debian_copyright.DebianCopyrightFileInPackageHandler,
Expand Down
297 changes: 297 additions & 0 deletions src/packagedcode/conan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import ast
import io
import logging
import os

import saneyaml
from packageurl import PackageURL

from packagedcode import models

"""
Handle conanfile recipes for conan packages
https://docs.conan.io/2/reference/conanfile.html
"""


SCANCODE_DEBUG_PACKAGE = os.environ.get("SCANCODE_DEBUG_PACKAGE", False)

TRACE = SCANCODE_DEBUG_PACKAGE


def logger_debug(*args):
pass


logger = logging.getLogger(__name__)

if TRACE:
import sys

logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)

def logger_debug(*args):
return logger.debug(" ".join(isinstance(a, str) and a or repr(a) for a in args))


class ConanFileParser(ast.NodeVisitor):
def __init__(self):
self.name = None
self.version = None
self.description = None
self.author = None
self.homepage_url = None
self.vcs_url = None
self.license = []
self.keywords = []
self.requires = []

def to_dict(self):
return {
"name": self.name,
"version": self.version,
"description": self.description,
"author": self.author,
"homepage_url": self.homepage_url,
"vcs_url": self.vcs_url,
"license": self.license,
"keywords": self.keywords,
"requires": self.requires,
}

def visit_Assign(self, node):
if not node.targets or not isinstance(node.targets[0], ast.Name):
return
if not node.value or not (
isinstance(node.value, ast.Constant) or isinstance(node.value, ast.Tuple)
):
return

attribute_mapping = {
"name": "name",
"version": "version",
"description": "description",
"author": "author",
"homepage": "homepage_url",
"url": "vcs_url",
"license": "license",
"topics": "keywords",
"requires": "requires",
}
variable_name = node.targets[0].id
values = node.value

if variable_name in attribute_mapping:
attribute_name = attribute_mapping[variable_name]
if variable_name in ("topics", "requires", "license"):
current_list = getattr(self, attribute_name)
if isinstance(values, ast.Tuple):
current_list.extend(
[el.value for el in values.elts if isinstance(el, ast.Constant)]
)
elif isinstance(values, ast.Constant):
current_list.append(values.value)
setattr(self, attribute_name, current_list)
else:
setattr(self, attribute_name, values.value)

def visit_Call(self, node):
if not isinstance(node.func, ast.Attribute) or not isinstance(
node.func.value, ast.Name
):
return
if node.func.value.id == "self" and node.func.attr == "requires":
if node.args and isinstance(node.args[0], ast.Constant):
self.requires.append(node.args[0].value)


class ConanFileHandler(models.DatafileHandler):
datasource_id = "conan_conanfile_py"
path_patterns = ("*/conanfile.py",)
default_package_type = "conan"
default_primary_language = "C++"
description = "conan recipe"
documentation_url = "https://docs.conan.io/2.0/reference/conanfile.html"

@classmethod
def _parse(cls, conan_recipe):
try:
tree = ast.parse(conan_recipe)
recipe_class_def = next(
(
node
for node in tree.body
if isinstance(node, ast.ClassDef)
and node.bases
and isinstance(node.bases[0], ast.Name)
and node.bases[0].id == "ConanFile"
),
None,
)

parser = ConanFileParser()
parser.visit(recipe_class_def)
except SyntaxError as e:
if TRACE:
logger_debug(f"Syntax error in conan recipe: {e}")
return

if TRACE:
logger_debug(f"ConanFileHandler: parse: package: {parser.to_dict()}")

dependencies = get_dependencies(parser.requires)

return models.PackageData(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
namespace=None,
name=parser.name,
version=parser.version,
description=parser.description,
homepage_url=parser.homepage_url,
keywords=parser.keywords,
extracted_license_statement=parser.license,
dependencies=dependencies,
)

@classmethod
def parse(cls, location):
with io.open(location, encoding="utf-8") as loc:
conan_recipe = loc.read()

yield cls._parse(conan_recipe)


class ConanDataHandler(models.DatafileHandler):
datasource_id = "conan_conandata_yml"
path_patterns = ("*/conandata.yml",)
default_package_type = "conan"
default_primary_language = "C++"
description = "conan external source"
documentation_url = (
"https://docs.conan.io/2/tutorial/creating_packages/"
"handle_sources_in_packages.html#using-the-conandata-yml-file"
)

@classmethod
def parse(cls, location):
with io.open(location, encoding="utf-8") as loc:
conan_data = loc.read()

conan_data = saneyaml.load(conan_data)
sources = conan_data.get("sources", {})

for version, source in sources.items():
sha256 = source.get("sha256", None)
source_urls = source.get("url")
if not source_urls:
continue

url = None
if isinstance(source_urls, str):
url = source_urls
elif isinstance(source_urls, list):
url = source_urls[0]

yield models.PackageData(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
namespace=None,
version=version,
download_url=url,
sha256=sha256,
)

@classmethod
def assemble(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually we have the assemble function under a base class and then inherit that in all other datafile handlers if these manifests can exist on their own and also together, to cover all the cases, would that also benifit us here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

conanfile.py can exist on its own, but conandata.yml should not exist in isolation. We need the custom assembly only in the case of conandata.yml

Copy link
Member

@pombredanne pombredanne Feb 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which brings to light the limits of a single-file-at-a-time approach. Should we start looking outside of the current file? ScanCode is not design for this at all though for now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case of conan, we need to process 2 files to obtain the package data. However, if we encounter a scenario where we have more than 2 files that need to be processed to obtain package data, processing multiple files at once would be helpful. Otherwise, the assembly will become a bit complex.

cls, package_data, resource, codebase, package_adder=models.add_to_package
):
"""
`conandata.yml` only contains the `version` and `download_url` use the conanfile.py
to enhance the package metadata.
"""
siblings = resource.siblings(codebase)
conanfile_package_resource = [r for r in siblings if r.name == "conanfile.py"]
package_data_dict = package_data.to_dict()

if conanfile_package_resource:
conanfile_package_resource = conanfile_package_resource[0]

conanfile_package_data = conanfile_package_resource.package_data
if conanfile_package_data:
conanfile_package_data = conanfile_package_data[0]

package_data_dict["name"] = conanfile_package_data.get("name")
package_data_dict["description"] = conanfile_package_data.get(
"description"
)
package_data_dict["homepage_url"] = conanfile_package_data.get(
"homepage_url"
)
package_data_dict["keywords"] = conanfile_package_data.get("keywords")
package_data_dict[
"extracted_license_statement"
] = conanfile_package_data.get("extracted_license_statement")

datafile_path = resource.path
pkg_data = models.PackageData.from_dict(package_data_dict)

if pkg_data.purl:
package = models.Package.from_package_data(
package_data=pkg_data,
datafile_path=datafile_path,
)
package.datafile_paths.append(conanfile_package_resource.path)
package.datasource_ids.append(ConanFileHandler.datasource_id)

package.populate_license_fields()
yield package

cls.assign_package_to_resources(
package=package,
resource=resource,
codebase=codebase,
package_adder=package_adder,
)
yield resource


def is_constraint_resolved(constraint):
"""
Checks if a constraint is resolved and it specifies an exact version.
"""
range_characters = {">", "<", "[", "]", ">=", "<="}
return not any(char in range_characters for char in constraint)


def get_dependencies(requires):
dependent_packages = []
for req in requires:
name, constraint = req.split("/", 1)
is_resolved = is_constraint_resolved(constraint)
version = None
if is_resolved:
version = constraint
purl = PackageURL(type="conan", name=name, version=version)
dependent_packages.append(
models.DependentPackage(
purl=purl.to_string(),
scope="install",
is_runtime=True,
is_optional=False,
is_resolved=is_resolved,
extracted_requirement=constraint,
)
)
return dependent_packages
Loading