Skip to content

Commit

Permalink
Output parse errors for the Rust part of the build step
Browse files Browse the repository at this point in the history
This fixes #290, by outputting the parse errors encountered by the Rust build step's parser. Previously they were being stored in the RcDom instance's errors vector, and ignored. Now they are threaded through to the final io::Result, and then output by main().

The hardest part of this was adding line numbers to the errors. Doing this necessitated creating a wrapper for RcDom, called RcDomWithLineNumbers, which implements TreeSink with two methods parse_error() and set_current_line() given custom behavior, while the other many methods just delegate to RcDom's implementation.

Additionally, this enables exact_errors as a parser option, which provides slightly more information in a couple of cases related to character references.
  • Loading branch information
domenic authored Apr 25, 2024
1 parent 6757955 commit 06326f7
Show file tree
Hide file tree
Showing 10 changed files with 303 additions and 42 deletions.
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ tokio = { version = "1", features = ["full"] }
html5ever = "0.26.0"
markup5ever_rcdom = "0.2.0"
regex = "1"
delegate = "0.12.0"

[dev-dependencies]
tempfile = "3"
15 changes: 10 additions & 5 deletions src/annotate_attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ mod tests {
// reordered in the HTML spec).
let document = parse_document_async(
r#"
<!DOCTYPE html>
<h3>The a element</h3>
<dl class="element">
<dt>Categories
Expand All @@ -338,7 +339,7 @@ mod tests {
assert_eq!(
serialize_for_test(&[document]),
r#"
<html><head></head><body><h3>The a element</h3>
<!DOCTYPE html><html><head></head><body><h3>The a element</h3>
<dl class="element">
<dt>Categories
</dt><dd>Flow content
Expand Down Expand Up @@ -369,6 +370,7 @@ mod tests {
// i.e., the variant description is used where requested
let document = parse_document_async(
r#"
<!DOCTYPE html>
<h3>The a element</h3>
<dl class="element">
<dt><span data-x="concept-element-attributes">Content attributes</span>
Expand All @@ -390,7 +392,7 @@ mod tests {
assert_eq!(
serialize_for_test(&[document]),
r#"
<html><head></head><body><h3>The a element</h3>
<!DOCTYPE html><html><head></head><body><h3>The a element</h3>
<dl class="element">
<dt><span data-x="concept-element-attributes">Content attributes</span>
</dt><dd><code data-x="attr-a-href">href</code>
Expand All @@ -415,6 +417,7 @@ mod tests {
// Checks that the special rules for using : instead of an em dash work.
let document = parse_document_async(
r#"
<!DOCTYPE html>
<h3>The a element</h3>
<dl class="element">
<dt><span data-x="concept-element-attributes">Content attributes</span>
Expand All @@ -431,7 +434,7 @@ mod tests {
assert_eq!(
serialize_for_test(&[document]),
r#"
<html><head></head><body><h3>The a element</h3>
<!DOCTYPE html><html><head></head><body><h3>The a element</h3>
<dl class="element">
<dt><span data-x="concept-element-attributes">Content attributes</span>
</dt><dd>Also, the <code data-x="attr-a-name">name</code> attribute <span data-x="attr-a-name">has special semantics</span> on this element: Anchor name
Expand All @@ -450,6 +453,7 @@ mod tests {
// Checks that the special rules for joining any special semantics with a ; work.
let document = parse_document_async(
r#"
<!DOCTYPE html>
<h3>The a element</h3>
<dl class="element">
<dt><span data-x="concept-element-attributes">Content attributes</span>
Expand All @@ -467,7 +471,7 @@ mod tests {
assert_eq!(
serialize_for_test(&[document]),
r#"
<html><head></head><body><h3>The a element</h3>
<!DOCTYPE html><html><head></head><body><h3>The a element</h3>
<dl class="element">
<dt><span data-x="concept-element-attributes">Content attributes</span>
</dt><dd>Also, the <code data-x="attr-a-name">name</code> attribute <span data-x="attr-a-name">has special semantics</span> on this element: Anchor name; Name of the anchor
Expand All @@ -488,6 +492,7 @@ mod tests {
// repeating the description.
let document = parse_document_async(
r#"
<!DOCTYPE html>
<h3>The img element</h3>
<dl class="element">
<dt><span data-x="concept-element-attributes">Content attributes</span>
Expand All @@ -509,7 +514,7 @@ mod tests {
assert_eq!(
serialize_for_test(&[document]),
r#"
<html><head></head><body><h3>The img element</h3>
<!DOCTYPE html><html><head></head><body><h3>The img element</h3>
<dl class="element">
<dt><span data-x="concept-element-attributes">Content attributes</span>
</dt><dd><code data-x="attr-dim-width">width</code>
Expand Down
27 changes: 15 additions & 12 deletions src/boilerplate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,16 @@ mod tests {
"<tr><td>en<td>English",
)
.await?;
let document =
parse_document_async("<table><!--BOILERPLATE languages-->".as_bytes()).await?;
let document = parse_document_async(
"<!DOCTYPE html><table><!--BOILERPLATE languages--></table>".as_bytes(),
)
.await?;
let mut proc = Processor::new(boilerplate_dir.path(), Path::new("."));
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
proc.apply().await?;
assert_eq!(
serialize_for_test(&[document]),
"<html><head></head><body><table><tbody><tr><td>en</td><td>English</td></tr></tbody></table></body></html>");
"<!DOCTYPE html><html><head></head><body><table><tbody><tr><td>en</td><td>English</td></tr></tbody></table></body></html>");
Ok(())
}

Expand All @@ -189,15 +191,16 @@ mod tests {
"data:text/html,Hello, world!",
)
.await?;
let document =
parse_document_async("<a href=\"<!--BOILERPLATE data.url-->\">hello</a>".as_bytes())
.await?;
let document = parse_document_async(
"<!DOCTYPE html><a href=\"<!--BOILERPLATE data.url-->\">hello</a>".as_bytes(),
)
.await?;
let mut proc = Processor::new(boilerplate_dir.path(), Path::new("."));
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
proc.apply().await?;
assert_eq!(
serialize_for_test(&[document]),
"<html><head></head><body><a href=\"data:text/html,Hello, world!\">hello</a></body></html>");
"<!DOCTYPE html><html><head></head><body><a href=\"data:text/html,Hello, world!\">hello</a></body></html>");
Ok(())
}

Expand All @@ -208,23 +211,23 @@ mod tests {
tokio::fs::write(example_dir.path().join("ex2"), "second").await?;
tokio::fs::write(example_dir.path().join("ignored"), "bad").await?;
let document =
parse_document_async("<pre>EXAMPLE ex1</pre><pre><code class=html>\nEXAMPLE ex2 </code></pre><p>EXAMPLE ignored</p>".as_bytes())
parse_document_async("<!DOCTYPE html><pre>EXAMPLE ex1</pre><pre><code class=html>\nEXAMPLE ex2 </code></pre><p>EXAMPLE ignored</p>".as_bytes())
.await?;
let mut proc = Processor::new(Path::new("."), example_dir.path());
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
proc.apply().await?;
assert_eq!(
serialize_for_test(&[document]),
"<html><head></head><body><pre>first</pre><pre><code class=\"html\">second</code></pre><p>EXAMPLE ignored</p></body></html>" );
"<!DOCTYPE html><html><head></head><body><pre>first</pre><pre><code class=\"html\">second</code></pre><p>EXAMPLE ignored</p></body></html>" );
Ok(())
}

#[tokio::test]
async fn test_errors_unsafe_paths() -> io::Result<()> {
let bad_path_examples = [
"<body><!--BOILERPLATE /etc/passwd-->",
"<body><pre data-x=\"<!--BOILERPLATE src/../../foo-->\"></pre>",
"<body><pre>EXAMPLE ../foo</pre>",
"<!DOCTYPE html><body><!--BOILERPLATE /etc/passwd-->",
"<!DOCTYPE html><body><pre data-x=\"<!--BOILERPLATE src/../../foo-->\"></pre>",
"<!DOCTYPE html><body><pre>EXAMPLE ../foo</pre>",
];
for example in bad_path_examples {
let document = parse_document_async(example.as_bytes()).await?;
Expand Down
28 changes: 18 additions & 10 deletions src/interface_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ mod tests {
async fn test_two_interfaces_in_one_block() -> io::Result<()> {
let document = parse_document_async(
r#"
<!DOCTYPE html>
<pre><code class=idl>
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
interface <dfn interface>HTMLBlinkElement</dfn> { ... }
Expand All @@ -204,7 +205,7 @@ INSERT INTERFACES HERE
assert_eq!(
serialize_for_test(&[document]),
r#"
<html><head></head><body><pre><code class="idl">
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
</code></pre>
Expand All @@ -217,6 +218,7 @@ interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
async fn test_two_interfaces_in_separate_blocks() -> io::Result<()> {
let document = parse_document_async(
r#"
<!DOCTYPE html>
<pre><code class=idl>
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
</code></pre>
Expand All @@ -235,7 +237,7 @@ INSERT INTERFACES HERE
assert_eq!(
serialize_for_test(&[document]),
r#"
<html><head></head><body><pre><code class="idl">
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
</code></pre>
<pre><code class="idl">
Expand All @@ -250,6 +252,7 @@ interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
async fn interface_with_partial() -> io::Result<()> {
let document = parse_document_async(
r#"
<!DOCTYPE html>
<pre><code class=idl>
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
</code></pre>
Expand All @@ -268,7 +271,7 @@ INSERT INTERFACES HERE
assert_eq!(
serialize_for_test(&[document]),
r##"
<html><head></head><body><pre><code class="idl">
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
</code></pre>
<pre><code class="idl">
Expand All @@ -283,6 +286,7 @@ partial interface <span id="HTMLMarqueeElement-partial">HTMLMarqueeElement</span
async fn interface_with_two_partials() -> io::Result<()> {
let document = parse_document_async(
r#"
<!DOCTYPE html>
<pre><code class=idl>
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
partial interface <span id=HTMLMarqueeElement-partial>HTMLMarqueeElement</span> { ... }
Expand All @@ -300,7 +304,7 @@ INSERT INTERFACES HERE
assert_eq!(
serialize_for_test(&[document]),
r##"
<html><head></head><body><pre><code class="idl">
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
partial interface <span id="HTMLMarqueeElement-partial">HTMLMarqueeElement</span> { ... }
partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</span> { ... }
Expand All @@ -314,6 +318,7 @@ partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</sp
async fn only_partials() -> io::Result<()> {
let document = parse_document_async(
r#"
<!DOCTYPE html>
<pre><code class=idl>
partial interface <span id=HTMLMarqueeElement-partial>HTMLMarqueeElement</span> { ... }
partial interface <span id=HTMLMarqueeElement-partial-2>HTMLMarqueeElement</span> { ... }
Expand All @@ -330,7 +335,7 @@ INSERT INTERFACES HERE
assert_eq!(
serialize_for_test(&[document]),
r##"
<html><head></head><body><pre><code class="idl">
<!DOCTYPE html><html><head></head><body><pre><code class="idl">
partial interface <span id="HTMLMarqueeElement-partial">HTMLMarqueeElement</span> { ... }
partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</span> { ... }
</code></pre>
Expand All @@ -343,6 +348,7 @@ partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</sp
async fn marker_before() -> io::Result<()> {
let document = parse_document_async(
r#"
<!DOCTYPE html>
INSERT INTERFACES HERE
<pre><code class=idl>
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
Expand All @@ -357,20 +363,20 @@ interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
proc.apply()?;
assert_eq!(
serialize_for_test(&[document]),
r##"
<html><head></head><body><ul class="brief"><li><code>HTMLMarqueeElement</code></li></ul>
r#"
<!DOCTYPE html><html><head></head><body><ul class="brief"><li><code>HTMLMarqueeElement</code></li></ul>
<pre><code class="idl">
interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
</code></pre></body></html>
"##
"#
.trim()
);
Ok(())
}

#[tokio::test]
async fn no_marker() -> io::Result<()> {
let document = parse_document_async("".as_bytes()).await?;
let document = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
let mut proc = Processor::new();
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
let result = proc.apply();
Expand All @@ -381,7 +387,8 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
#[tokio::test]
async fn duplicate_marker() -> io::Result<()> {
let document = parse_document_async(
"<div>INSERT INTERFACES HERE</div><div>INSERT INTERFACES HERE</div>".as_bytes(),
"<!DOCTYPE html><div>INSERT INTERFACES HERE</div><div>INSERT INTERFACES HERE</div>"
.as_bytes(),
)
.await?;
let mut proc = Processor::new();
Expand All @@ -395,6 +402,7 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
async fn duplicate_dfn() -> io::Result<()> {
let document = parse_document_async(
r#"
<!DOCTYPE html>
<pre><code class=idl>
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
Expand Down
10 changes: 10 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,21 @@ mod dom_utils;
mod interface_index;
mod io_utils;
mod parser;
mod rcdom_with_line_numbers;
mod represents;
mod tag_omission;

#[tokio::main]
async fn main() -> io::Result<()> {
// This gives slightly prettier error-printing.
if let Err(e) = run().await {
eprintln!("{}", e);
std::process::exit(1);
}
Ok(())
}

async fn run() -> io::Result<()> {
// Since we're using Rc in the DOM implementation, we must ensure that tasks
// which act on it are confined to this thread.

Expand Down
Loading

0 comments on commit 06326f7

Please sign in to comment.