diff options
author | jaseg <git@jaseg.de> | 2024-07-05 11:13:25 +0200 |
---|---|---|
committer | jaseg <git@jaseg.de> | 2024-07-05 12:37:42 +0200 |
commit | bd2b3733ee87bd918c9ea7ca8f871951f7f86bca (patch) | |
tree | c348aa5d757abceafdd6ee1bd86f0ab5c4a705d5 | |
parent | 534c2e4ea3229c6ffef640ea8b55b0c18e02fd7b (diff) | |
download | gerbolyze-bd2b3733ee87bd918c9ea7ca8f871951f7f86bca.tar.gz gerbolyze-bd2b3733ee87bd918c9ea7ca8f871951f7f86bca.tar.bz2 gerbolyze-bd2b3733ee87bd918c9ea7ca8f871951f7f86bca.zip |
Move from bs4 to etree
BeautifulSoup when using lxml in XML mode would mis-parse XML with very
long attributes. Specifically, a <polygon> with about 18MB in its points
attr would make lxml not return anything past that point in the file.
bs4 uses lxml, which uses libxml2. libxml2 has a config option for
parsing "huge" files that increases buffer sizes and avoids this error,
and this option is exposed in lxml, but AFAICT you can't tell bs4 to set
it, and bs4 just silently swallows the error from lxml.
Fixes one half of #46
-rwxr-xr-x | gerbolyze/__init__.py | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/gerbolyze/__init__.py b/gerbolyze/__init__.py index 48ee4f7..40238ed 100755 --- a/gerbolyze/__init__.py +++ b/gerbolyze/__init__.py @@ -14,8 +14,8 @@ import warnings import shutil from zipfile import ZipFile, is_zipfile from pathlib import Path +from xml.etree import ElementTree -from bs4 import BeautifulSoup import numpy as np import click @@ -23,6 +23,11 @@ import gerbonara as gn __version__ = '3.1.8' +ET_NS= {'svg': 'http://www.w3.org/2000/svg', + 'inkscape': 'http://www.inkscape.org/namespaces/inkscape'} +SVG_NS = f'{{{ET_NS["svg"]}}}' +INKSCAPE_NS = f'{{{ET_NS["inkscape"]}}}' + @click.group() def cli(): pass @@ -71,16 +76,17 @@ def paste(input_gerbers, input_svg, output_gerbers, is_zip, with tempfile.NamedTemporaryFile(suffix='.svg') as processed_svg: run_cargo_command('usvg', *shlex.split(os.environ.get('USVG_OPTIONS', '')), input_svg, processed_svg.name) - with open(processed_svg.name) as f: - soup = BeautifulSoup(f.read(), features='xml') + et = ElementTree.parse(processed_svg) for (side, use), layer in [ *stack.graphic_layers.items(), (('drill', 'plated'), stack.drill_pth), (('drill', 'nonplated'), stack.drill_npth)]: logging.info(f'Layer {side} {use}') - if (soup_layer := soup.find('g', id=f'g-{side}-{use}')): - if not soup_layer.contents: + + et_layer = et.find(f".//{SVG_NS}g[@id='g-{side}-{use}']") + if et_layer is not None: + if not len(et_layer): logging.info(f' Corresponding overlay layer is empty. Skipping.') else: logging.info(f' Corresponding overlay layer not found. Skipping.') @@ -271,8 +277,9 @@ def convert(input_svg, output_gerbers, is_zip, dilate, curve_tolerance, subtract with tempfile.NamedTemporaryFile(suffix='.svg') as processed_svg: run_cargo_command('usvg', *shlex.split(os.environ.get('USVG_OPTIONS', '')), input_svg, processed_svg.name) - soup = BeautifulSoup(input_svg.read_text(), features='xml') - layers = {e.get('id'): e.get('inkscape:label') for e in soup.find_all('g', recursive=True)} + et = ElementTree.fromstring(input_svg.read_text()) + layers = {node.get(f'id'): node.get(f'{INKSCAPE_NS}label') + for node in et.findall(f'{SVG_NS}g')} stack = gn.LayerStack({}, None, None, [], board_name=input_svg.stem, original_path=input_svg) |