aboutsummaryrefslogtreecommitdiff
path: root/gerbolyze/__init__.py
diff options
context:
space:
mode:
authorjaseg <git@jaseg.de>2024-07-05 11:13:25 +0200
committerjaseg <git@jaseg.de>2024-07-05 12:37:42 +0200
commitbd2b3733ee87bd918c9ea7ca8f871951f7f86bca (patch)
treec348aa5d757abceafdd6ee1bd86f0ab5c4a705d5 /gerbolyze/__init__.py
parent534c2e4ea3229c6ffef640ea8b55b0c18e02fd7b (diff)
downloadgerbolyze-bd2b3733ee87bd918c9ea7ca8f871951f7f86bca.tar.gz
gerbolyze-bd2b3733ee87bd918c9ea7ca8f871951f7f86bca.tar.bz2
gerbolyze-bd2b3733ee87bd918c9ea7ca8f871951f7f86bca.zip
Move from bs4 to etree
BeautifulSoup when using lxml in XML mode would mis-parse XML with very long attributes. Specifically, a <polygon> with about 18MB in its points attr would make lxml not return anything past that point in the file. bs4 uses lxml, which uses libxml2. libxml2 has a config option for parsing "huge" files that increases buffer sizes and avoids this error, and this option is exposed in lxml, but AFAICT you can't tell bs4 to set it, and bs4 just silently swallows the error from lxml. Fixes one half of #46
Diffstat (limited to 'gerbolyze/__init__.py')
-rwxr-xr-xgerbolyze/__init__.py21
1 files changed, 14 insertions, 7 deletions
diff --git a/gerbolyze/__init__.py b/gerbolyze/__init__.py
index 48ee4f7..40238ed 100755
--- a/gerbolyze/__init__.py
+++ b/gerbolyze/__init__.py
@@ -14,8 +14,8 @@ import warnings
import shutil
from zipfile import ZipFile, is_zipfile
from pathlib import Path
+from xml.etree import ElementTree
-from bs4 import BeautifulSoup
import numpy as np
import click
@@ -23,6 +23,11 @@ import gerbonara as gn
__version__ = '3.1.8'
+ET_NS= {'svg': 'http://www.w3.org/2000/svg',
+ 'inkscape': 'http://www.inkscape.org/namespaces/inkscape'}
+SVG_NS = f'{{{ET_NS["svg"]}}}'
+INKSCAPE_NS = f'{{{ET_NS["inkscape"]}}}'
+
@click.group()
def cli():
pass
@@ -71,16 +76,17 @@ def paste(input_gerbers, input_svg, output_gerbers, is_zip,
with tempfile.NamedTemporaryFile(suffix='.svg') as processed_svg:
run_cargo_command('usvg', *shlex.split(os.environ.get('USVG_OPTIONS', '')), input_svg, processed_svg.name)
- with open(processed_svg.name) as f:
- soup = BeautifulSoup(f.read(), features='xml')
+ et = ElementTree.parse(processed_svg)
for (side, use), layer in [
*stack.graphic_layers.items(),
(('drill', 'plated'), stack.drill_pth),
(('drill', 'nonplated'), stack.drill_npth)]:
logging.info(f'Layer {side} {use}')
- if (soup_layer := soup.find('g', id=f'g-{side}-{use}')):
- if not soup_layer.contents:
+
+ et_layer = et.find(f".//{SVG_NS}g[@id='g-{side}-{use}']")
+ if et_layer is not None:
+ if not len(et_layer):
logging.info(f' Corresponding overlay layer is empty. Skipping.')
else:
logging.info(f' Corresponding overlay layer not found. Skipping.')
@@ -271,8 +277,9 @@ def convert(input_svg, output_gerbers, is_zip, dilate, curve_tolerance, subtract
with tempfile.NamedTemporaryFile(suffix='.svg') as processed_svg:
run_cargo_command('usvg', *shlex.split(os.environ.get('USVG_OPTIONS', '')), input_svg, processed_svg.name)
- soup = BeautifulSoup(input_svg.read_text(), features='xml')
- layers = {e.get('id'): e.get('inkscape:label') for e in soup.find_all('g', recursive=True)}
+ et = ElementTree.fromstring(input_svg.read_text())
+ layers = {node.get(f'id'): node.get(f'{INKSCAPE_NS}label')
+ for node in et.findall(f'{SVG_NS}g')}
stack = gn.LayerStack({}, None, None, [], board_name=input_svg.stem, original_path=input_svg)