Looking at this pest grammar through the VIBES lens, I'd assess the current state as <ππͺ’π§>
- it has some expressiveness (aliases like "filename"/"name"), clean pipeline flow, but runtime error potential (e.g., @size == "hello"
).
Here's how to transform it toward <π¬πͺ’π >
for optimal LLM ergonomics:
1. Expressive Power (πβπ¬): Add Natural Language Patterns
// Current: @filename == "test.txt"
// Goal: Support multiple natural expressions
predicate = {
// Original structured form
selector ~ op ~ rhs |
// Natural language patterns
natural_predicate
}
natural_predicate = {
// "files named test.txt"
file_descriptor ~ "named" ~ rhs |
// "files containing TODO"
file_descriptor ~ "containing" ~ rhs |
// "files larger than 1MB"
file_descriptor ~ size_comparison ~ size_value |
// "test.txt files"
rhs ~ file_descriptor |
// "PDF documents"
format_name ~ ("files" | "documents")
}
file_descriptor = { "files" | "file" | "documents" | "items" }
size_comparison = { "larger than" | "smaller than" | "bigger than" | "over" | "under" }
format_name = { "PDF" | "JSON" | "XML" | "text" | "image" | "video" }
2. Error Surface (π§βπ ): Type-Safe Operations
// Instead of generic predicates, use type-specific ones
predicate = {
string_predicate |
numeric_predicate |
type_predicate |
content_predicate
}
string_predicate = {
string_selector ~ string_op ~ string_value
}
string_selector = { "@" ~ ("filename" | "name" | "filepath" | "path" | "extension" | "ext") }
string_op = { "==" | "=" | "~=" | "~" | "matches" | "contains" | "starts with" | "ends with" }
string_value = { quoted_string | pattern }
numeric_predicate = {
numeric_selector ~ numeric_op ~ numeric_value
}
numeric_selector = { "@" ~ ("filesize" | "size") }
numeric_op = { "==" | "=" | ">=" | "<=" | ">" | "<" | "between" }
numeric_value = { size_with_unit | number }
size_with_unit = @{ number ~ size_unit }
size_unit = { "KB" | "MB" | "GB" | "bytes" | "B" }
type_predicate = {
type_selector ~ "is" ~ file_type
}
type_selector = { "@" ~ ("filetype" | "type") }
file_type = { "directory" | "file" | "symlink" | "executable" }
// This makes "@size == 'hello'" unparseable - compile-time safety!
3. Enhanced Expressiveness: Multiple Query Styles
program = { SOI ~ (expr | sql_style | function_style) ~ EOI }
// SQL-style: SELECT * WHERE filename = 'test.txt' AND size > 1MB
sql_style = {
"SELECT" ~ ("*" | selector_list) ~
"WHERE" ~ expr
}
// Function-style: find(name: "test.txt", size: >1MB)
function_style = {
"find" ~ "(" ~ param_list ~ ")"
}
param_list = { param ~ ("," ~ param)* }
param = { param_name ~ ":" ~ param_value }
4. Context Flow Enhancement: Query Composition
// Allow saving and reusing sub-queries
program = { SOI ~ (definition | query) ~ EOI }
definition = {
"let" ~ identifier ~ "=" ~ expr
}
query = {
expr |
identifier | // Reference saved query
expr ~ "in" ~ identifier // Combine queries
}
// Examples:
// let large_files = @size > 1MB
// let source_code = @ext in [py, js, rs]
// large_files && source_code
5. Smart Defaults and Inference
// Infer selector from value type
implicit_predicate = {
// "test.txt" implies @filename == "test.txt"
quoted_string |
// ">1MB" implies @size > 1MB
size_comparison_value |
// "TODO" implies @contents ~ "TODO"
bare_pattern
}
size_comparison_value = { (">" | "<") ~ size_with_unit }
bare_pattern = @{ char+ } // Implies content search
The transformed grammar would achieve <π¬πͺ’π >
:
- π¬: Multiple natural ways to express the same query
- πͺ’: Clean pipeline structure maintained
- π : Type mismatches impossible at parse time
This makes the tool significantly more ergonomic for LLMs by allowing natural expression while preventing invalid constructions entirely.