Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion shared/yeast-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,21 @@ mod parse;
///
/// ```text
/// (_) - match any named node (skips unnamed tokens)
/// _ - match any node, named or unnamed
/// (kind) - match a named node of the given kind
/// ("literal") - match an unnamed token by its text
/// "literal" - shorthand for `("literal")`
/// (kind field: (pattern)) - match with named field
/// (kind (pat) (pat)...) - match unnamed children (after all fields)
/// (kind field: _) - bare `_` and bare literals work in field position too
/// (kind (pat) (pat)...) - match unnamed children
/// (pattern) @capture - capture the matched node
/// "literal" @capture - capture an unnamed token
/// _ @capture - capture any node
/// (pattern)* @capture - capture each repeated match
/// (pattern)? - zero or one
/// ```
///
/// Named fields and bare child patterns may be intermixed in any order.
#[proc_macro]
pub fn query(input: TokenStream) -> TokenStream {
let input2: TokenStream2 = input.into();
Expand Down
50 changes: 35 additions & 15 deletions shared/yeast-macros/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
}
}

/// Parse a query atom: `(kind fields...)` or `(kind fields... bare_children...)`.
/// Parse a query atom: a parenthesized node, a bare `_` (any node), or a
/// bare string literal (unnamed token).
/// Does not handle `@capture` — that's handled by the caller as a postfix.
fn parse_query_atom(tokens: &mut Tokens) -> Result<TokenStream> {
match tokens.peek() {
Expand All @@ -58,9 +59,17 @@ fn parse_query_atom(tokens: &mut Tokens) -> Result<TokenStream> {
}
Ok(result)
}
Some(TokenTree::Ident(id)) if *id == "_" => {
tokens.next();
Ok(quote! { yeast::query::QueryNode::Any { match_unnamed: true } })
}
Some(TokenTree::Literal(_)) => {
let lit = expect_literal(tokens)?;
Ok(quote! { yeast::query::QueryNode::UnnamedNode { kind: #lit } })
}
Some(tok) => Err(syn::Error::new_spanned(
tok.clone(),
"expected `(` in query; use `(_) @name` to capture a wildcard",
"expected `(`, `_`, or string literal in query",
)),
}
}
Expand All @@ -74,7 +83,7 @@ fn parse_query_node_inner(tokens: &mut Tokens) -> Result<TokenStream> {
)),
Some(TokenTree::Ident(id)) if *id == "_" => {
tokens.next();
Ok(quote! { yeast::query::QueryNode::Any() })
Ok(quote! { yeast::query::QueryNode::Any { match_unnamed: false } })
}
Some(TokenTree::Literal(_)) => {
let lit = expect_literal(tokens)?;
Expand All @@ -98,11 +107,14 @@ fn parse_query_node_inner(tokens: &mut Tokens) -> Result<TokenStream> {
}
}

/// Parse zero or more field specifications and trailing bare patterns.
/// Named fields: `name: pattern` or `name*: (list...)`.
/// Bare patterns (no field name) become implicit `child` field entries.
/// Parse zero or more field specifications and bare patterns.
/// Named fields: `name: pattern`. Bare patterns (no field name) become
/// implicit `child` field entries. Named fields and bare patterns may
/// appear in any order; bare patterns are accumulated and emitted as a
/// single `("child", ...)` entry.
fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
let mut fields = Vec::new();
let mut bare_children: Vec<TokenStream> = Vec::new();
while tokens.peek().is_some() {
if peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
Expand All @@ -115,16 +127,21 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
(#field_str, vec![yeast::query::QueryListElem::SingleNode(#child)])
});
} else {
// Bare patterns — collect as implicit `child` field
// Bare patterns — accumulate into the implicit `child` field.
// We don't break here, so we can interleave with named fields.
let elems = parse_query_list(tokens)?;
if !elems.is_empty() {
fields.push(quote! {
("child", vec![#(#elems),*])
});
if elems.is_empty() {
// Nothing more we can parse at this level.
break;
}
break;
bare_children.extend(elems);
}
}
if !bare_children.is_empty() {
fields.push(quote! {
("child", vec![#(#bare_children),*])
});
}
Ok(fields)
}

Expand Down Expand Up @@ -178,10 +195,11 @@ fn parse_query_list(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
continue;
}

// Check for string literal (unnamed node)
// Check for string literal (unnamed node), optionally followed by @capture
if peek_is_literal(tokens) {
let lit = expect_literal(tokens)?;
let node = quote! { yeast::query::QueryNode::UnnamedNode { kind: #lit } };
let node = maybe_wrap_capture(tokens, node)?;
let elem = maybe_wrap_repetition(
tokens,
quote! {
Expand All @@ -192,10 +210,12 @@ fn parse_query_list(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
continue;
}

// Check for bare _ (wildcard), possibly followed by @capture
// Check for bare `_` (any node, named or unnamed), possibly followed by @capture.
// Distinct from `(_)` which only matches named nodes — this matches
// tree-sitter query semantics.
if peek_is_underscore(tokens) {
tokens.next();
let node = quote! { yeast::query::QueryNode::Any() };
let node = quote! { yeast::query::QueryNode::Any { match_unnamed: true } };
let node = maybe_wrap_capture(tokens, node)?;
let elem = maybe_wrap_repetition(
tokens,
Expand Down
35 changes: 28 additions & 7 deletions shared/yeast/doc/yeast.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,40 @@ Captures bind matched nodes to names for use in the transform. A capture
(identifier) @name // capture an identifier node
(_) @value // capture any named node
(identifier)* @items // capture each repeated match
("=") @op // capture an unnamed token by its text
"=" @op // shorthand for the line above
_ @anything // capture any node, named or unnamed
```

### Unnamed children
### Named vs unnamed children

Patterns that appear after all named fields match unnamed (positional)
children. Named node patterns like `(_)` automatically skip unnamed tokens
(keywords, operators, punctuation), matching tree-sitter semantics:
The two wildcard forms `(_)` and bare `_` differ:

- `(_)` matches only **named** nodes. When used as a positional pattern,
unnamed children (keywords, operators, punctuation) are skipped over.
- Bare `_` matches **any** node, named or unnamed, taking whatever is next
in the child list.

Bare child patterns are matched **forward-scan**: each pattern advances
through the iterator until it finds a child that matches, skipping
non-matching children along the way. So `(foo ("baz"))` against a `foo`
whose children are `[bar, baz]` succeeds — the matcher scans past `bar`
and matches `baz`. The iterator advances as it goes, so subsequent
patterns can never match children that appear earlier in source order
than already-matched ones.

For named-only patterns (`(_)`, `(some_kind ...)`), the scan additionally
skips past unnamed tokens without trying to match them, since they can
never match anyway.

Anchors (`.`) for forcing immediate adjacency, like in tree-sitter
queries, are not supported.

```rust
(for
pattern: (_) @pat // named field
value: (in (_) @val) // "in" token is skipped automatically
body: (do (_)* @body) // "do" and "end" tokens skipped
pattern: (_) @pat // named field, captures any named node
value: (in (_) @val) // "in" wrapper is a named node here
body: (do (_)* @body) // "do" and "end" tokens skipped by (_)
)
```

Expand Down
51 changes: 30 additions & 21 deletions shared/yeast/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@ use crate::{captures::Captures, Ast, Id};

#[derive(Debug, Clone)]
pub enum QueryNode {
Any(),
/// A wildcard. With `match_unnamed = false` (the default for `(_)`),
/// only matches named nodes when used positionally — unnamed children
/// are skipped over. With `match_unnamed = true` (for bare `_`), the
/// wildcard consumes whatever the next child is, named or unnamed.
Any {
match_unnamed: bool,
},
Node {
kind: &'static str,
children: Vec<(&'static str, Vec<QueryListElem>)>,
Expand All @@ -24,7 +30,7 @@ impl QueryNode {
QueryNode::Node { kind, .. } => Some(kind),
QueryNode::UnnamedNode { kind } => Some(kind),
QueryNode::Capture { node, .. } => node.root_kind(),
QueryNode::Any() => None,
QueryNode::Any { .. } => None,
}
}
}
Expand All @@ -51,7 +57,7 @@ impl QueryNode {
/// semantics where `(_)` only matches named nodes.
fn matches_named_only(&self) -> bool {
match self {
QueryNode::Any() => true,
QueryNode::Any { match_unnamed } => !match_unnamed,
QueryNode::Node { .. } => true,
QueryNode::UnnamedNode { .. } => false,
QueryNode::Capture { node, .. } => node.matches_named_only(),
Expand All @@ -60,7 +66,7 @@ impl QueryNode {

pub fn do_match(&self, ast: &Ast, node: Id, matches: &mut Captures) -> Result<bool, String> {
match self {
QueryNode::Any() => Ok(true),
QueryNode::Any { .. } => Ok(true),
QueryNode::Node { kind, children } => {
let node = ast.get_node(node).unwrap();
let target_kind = ast
Expand Down Expand Up @@ -161,25 +167,28 @@ impl QueryListElem {
}
}
QueryListElem::SingleNode(sub_query) => {
if sub_query.matches_named_only() {
// Skip unnamed children, matching tree-sitter semantics
// where (_) only matches named nodes.
loop {
match remaining_children.next() {
Some(child) => {
let node = ast.get_node(child).unwrap();
if node.is_named() {
return sub_query.do_match(ast, child, matches);
}
// Skip unnamed child, continue to next
}
None => return Ok(false),
// Forward-scan semantics: advance through the iterator until
// we find a child that matches `sub_query`. Skip ahead past
// unnamed children when the sub-query is named-only (so they
// can never match anyway). On a match attempt that fails,
// restore the captures so partial captures from a complex
// sub-query don't leak.
let skip_unnamed = sub_query.matches_named_only();
loop {
let Some(child) = remaining_children.next() else {
return Ok(false);
};
if skip_unnamed {
let node = ast.get_node(child).unwrap();
if !node.is_named() {
continue;
Comment thread
tausbn marked this conversation as resolved.
}
}
} else if let Some(child) = remaining_children.next() {
sub_query.do_match(ast, child, matches)
} else {
Ok(false)
let snapshot = matches.clone();
if sub_query.do_match(ast, child, matches)? {
return Ok(true);
}
*matches = snapshot;
}
}
}
Expand Down
20 changes: 11 additions & 9 deletions shared/yeast/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,10 @@ impl Schema {
}
}
// Import all node kind names, preserving tree-sitter's IDs.
// Track named and unnamed variants separately.
// For named kinds, use the canonical ID from id_for_node_kind(name, true)
// since some languages have multiple IDs for the same named kind.
// Track named and unnamed variants separately. For both named and
// unnamed kinds, use the canonical ID from id_for_node_kind, since
// some languages have multiple IDs for the same name (e.g., the
// reserved error token at ID 0 may share a name with a real token).
for id in 0..language.node_kind_count() as u16 {
if let Some(name) = language.node_kind_for_id(id) {
if !name.is_empty() {
Expand All @@ -75,12 +76,13 @@ impl Schema {
schema.kind_names.insert(canonical_id, name);
}
} else {
// For unnamed kinds, only insert if we don't already have one
// (some languages have multiple unnamed IDs for the same text)
schema
.unnamed_kind_ids
.entry(name.to_string())
.or_insert(id);
let canonical_id = language.id_for_node_kind(name, false);
if canonical_id != 0 && !schema.unnamed_kind_ids.contains_key(name) {
schema
.unnamed_kind_ids
.insert(name.to_string(), canonical_id);
schema.kind_names.insert(canonical_id, name);
}
}
// Always track the name for any ID we encounter
schema.kind_names.entry(id).or_insert(name);
Expand Down
Loading
Loading