From bd2b3733ee87bd918c9ea7ca8f871951f7f86bca Mon Sep 17 00:00:00 2001 From: jaseg Date: Fri, 5 Jul 2024 11:13:25 +0200 Subject: Move from bs4 to etree BeautifulSoup when using lxml in XML mode would mis-parse XML with very long attributes. Specifically, a with about 18MB in its points attr would make lxml not return anything past that point in the file. bs4 uses lxml, which uses libxml2. libxml2 has a config option for parsing "huge" files that increases buffer sizes and avoids this error, and this option is exposed in lxml, but AFAICT you can't tell bs4 to set it, and bs4 just silently swallows the error from lxml. Fixes one half of #46 --- gerbolyze/__init__.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/gerbolyze/__init__.py b/gerbolyze/__init__.py index 48ee4f7..40238ed 100755 --- a/gerbolyze/__init__.py +++ b/gerbolyze/__init__.py @@ -14,8 +14,8 @@ import warnings import shutil from zipfile import ZipFile, is_zipfile from pathlib import Path +from xml.etree import ElementTree -from bs4 import BeautifulSoup import numpy as np import click @@ -23,6 +23,11 @@ import gerbonara as gn __version__ = '3.1.8' +ET_NS= {'svg': 'http://www.w3.org/2000/svg', + 'inkscape': 'http://www.inkscape.org/namespaces/inkscape'} +SVG_NS = f'{{{ET_NS["svg"]}}}' +INKSCAPE_NS = f'{{{ET_NS["inkscape"]}}}' + @click.group() def cli(): pass @@ -71,16 +76,17 @@ def paste(input_gerbers, input_svg, output_gerbers, is_zip, with tempfile.NamedTemporaryFile(suffix='.svg') as processed_svg: run_cargo_command('usvg', *shlex.split(os.environ.get('USVG_OPTIONS', '')), input_svg, processed_svg.name) - with open(processed_svg.name) as f: - soup = BeautifulSoup(f.read(), features='xml') + et = ElementTree.parse(processed_svg) for (side, use), layer in [ *stack.graphic_layers.items(), (('drill', 'plated'), stack.drill_pth), (('drill', 'nonplated'), stack.drill_npth)]: logging.info(f'Layer {side} {use}') - if (soup_layer := soup.find('g', id=f'g-{side}-{use}')): - if not soup_layer.contents: + + et_layer = et.find(f".//{SVG_NS}g[@id='g-{side}-{use}']") + if et_layer is not None: + if not len(et_layer): logging.info(f' Corresponding overlay layer is empty. Skipping.') else: logging.info(f' Corresponding overlay layer not found. Skipping.') @@ -271,8 +277,9 @@ def convert(input_svg, output_gerbers, is_zip, dilate, curve_tolerance, subtract with tempfile.NamedTemporaryFile(suffix='.svg') as processed_svg: run_cargo_command('usvg', *shlex.split(os.environ.get('USVG_OPTIONS', '')), input_svg, processed_svg.name) - soup = BeautifulSoup(input_svg.read_text(), features='xml') - layers = {e.get('id'): e.get('inkscape:label') for e in soup.find_all('g', recursive=True)} + et = ElementTree.fromstring(input_svg.read_text()) + layers = {node.get(f'id'): node.get(f'{INKSCAPE_NS}label') + for node in et.findall(f'{SVG_NS}g')} stack = gn.LayerStack({}, None, None, [], board_name=input_svg.stem, original_path=input_svg) -- cgit