diff --git a/cyclonedx/model/bom.py b/cyclonedx/model/bom.py index 7cb0081e..3e2734d9 100644 --- a/cyclonedx/model/bom.py +++ b/cyclonedx/model/bom.py @@ -816,13 +816,20 @@ def validate(self) -> bool: Deprecated without any replacement. """ # !! deprecated function. have this as an part of the normalization process, like the BomRefDiscrimator - # 0. Make sure all Dependable have a Dependency entry - if self.metadata.component: - self.register_dependency(target=self.metadata.component) - for _c in self.components: - self.register_dependency(target=_c) - for _s in self.services: - self.register_dependency(target=_s) + # 0. Make sure all Dependable have a Dependency entry. + # Resolve "already registered" via a set of refs (O(1) per lookup) + # rather than the linear scan in register_dependency(); otherwise this + # loop is O(n^2) and dominates serialization time for large BOMs. + # see https://github.com/CycloneDX/cyclonedx-python-lib/issues/1006 + _registered_refs = {_d.ref for _d in self._dependencies} + for _dependable in chain( + (self.metadata.component,) if self.metadata.component else (), + self.components, + self.services, + ): + if _dependable.bom_ref not in _registered_refs: + self._dependencies.add(Dependency(ref=_dependable.bom_ref)) + _registered_refs.add(_dependable.bom_ref) # 1. Make sure dependencies are all in this Bom. component_bom_refs = set(map(lambda c: c.bom_ref, self._get_all_components())) | set( diff --git a/tests/test_model_bom.py b/tests/test_model_bom.py index 030ee4a5..a97231a0 100644 --- a/tests/test_model_bom.py +++ b/tests/test_model_bom.py @@ -18,6 +18,7 @@ from collections.abc import Callable from random import shuffle from unittest import TestCase +from unittest.mock import patch from uuid import uuid4 from ddt import ddt, named_data @@ -329,6 +330,60 @@ def test_register_dependency(self, dependencies: tuple[tuple[Component, tuple[Co for dd in d2: self.assertIn(dd.bom_ref, bom_dep.dependencies_as_bom_refs()) + def test_regression_issue_1006(self) -> None: + """regression test for issue #1006 + + ``Bom.validate()`` must register a Dependency entry for the metadata + component and every component/service, without quadratic behaviour. + see https://github.com/CycloneDX/cyclonedx-python-lib/issues/1006 + """ + n = 100 + bom = Bom(metadata=BomMetaData(component=Component(name='root', bom_ref='root'))) + for i in range(n): + bom.components.add(Component(name=f'c{i}', bom_ref=f'ref-{i}')) + with self.assertWarns(expected_warning=UserWarning): + bom.validate() + registered = {d.ref.value for d in bom.dependencies} + self.assertEqual(len(registered), n + 1) + self.assertIn('root', registered) + for i in range(n): + self.assertIn(f'ref-{i}', registered) + + def test_regression_issue_1006_scales_linearly(self) -> None: + """regression test for issue #1006 -- guards the *complexity*. + + The previous implementation located an existing dependency entry with a + linear scan in ``register_dependency()``, called once per component by + ``validate()`` -- making the pass O(n^2). This guards against that + regression without relying on wall-clock timing (flaky in CI): it counts + the ``BomRef`` equality comparisons performed during ``validate()`` for + ``n`` and ``2n`` components. A quadratic implementation roughly + quadruples the comparisons when the input doubles, a linear one only + roughly doubles them (measured: linear ~1.6k -> ~3.7k, ratio ~2.2; + quadratic ~22k -> ~84k, ratio ~3.9). + see https://github.com/CycloneDX/cyclonedx-python-lib/issues/1006 + """ + def _comparisons_for(n: int) -> int: + bom = Bom(metadata=BomMetaData(component=Component(name='root', bom_ref='root'))) + for i in range(n): + bom.components.add(Component(name=f'c{i}', bom_ref=f'ref-{i}')) + count = 0 + original_eq = BomRef.__eq__ + + def _counting_eq(self: BomRef, other: object) -> bool: + nonlocal count + count += 1 + return original_eq(self, other) + with patch.object(BomRef, '__eq__', _counting_eq): + bom.validate() + return count + + base = _comparisons_for(200) + doubled = _comparisons_for(400) + # linear => ratio ~2; quadratic => ratio ~4. Generous headroom; the + # constant +10 keeps it robust if `base` is ever near zero. + self.assertLess(doubled, base * 3 + 10) + def test_regression_issue_539(self) -> None: """regression test for issue #539 see https://github.com/CycloneDX/cyclonedx-python-lib/issues/539