diff --git a/gcc/rust/parse/rust-cfg-parser.cc b/gcc/rust/parse/rust-cfg-parser.cc
new file mode 100644
index 000000000000..0a62dd11ede9
--- /dev/null
+++ b/gcc/rust/parse/rust-cfg-parser.cc
@@ -0,0 +1,127 @@
+/* This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
+#include "rust-cfg-parser.h"
+#include "rust-lex.h"
+#include "rust-parse.h"
+#include "rust-session-manager.h"
+#include "selftest.h"
+
+namespace Rust {
+bool
+parse_cfg_option (std::string &input, std::string &key, std::string &value)
+{
+ key.clear ();
+ value.clear ();
+
+ auto lexer = Lexer (input);
+ auto parser = Parser (lexer);
+
+ auto token = parser.peek_current_token ();
+ if (token->get_id () != IDENTIFIER)
+ {
+ return false;
+ }
+
+ key = token->get_str ();
+
+ rust_assert (parser.skip_token (IDENTIFIER));
+ token = parser.peek_current_token ();
+
+ switch (token->get_id ())
+ {
+ case END_OF_FILE:
+ // we're done parsing, we had a valid key, return happily
+ return true;
+ case EQUAL:
+ // We have an equal sign: Skip the token and parse an identifier
+ {
+ rust_assert (parser.skip_token (EQUAL));
+
+ auto value_expr = parser.parse_literal_expr ();
+ // We had an equal sign but no value, error out
+ if (!value_expr)
+ return false;
+
+ if (value_expr->get_lit_type () != AST::Literal::LitType::STRING)
+ return false;
+
+ value = value_expr->get_literal ().as_string ();
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+} // namespace Rust
+
+#if CHECKING_P
+
+namespace selftest {
+
+void
+rust_cfg_parser_test (void)
+{
+ std::string key;
+ std::string value;
+
+ auto input = std::string ("key_no_value");
+
+ ASSERT_TRUE (Rust::parse_cfg_option (input, key, value));
+ ASSERT_EQ (key, "key_no_value");
+ ASSERT_TRUE (value.empty ());
+
+ input = std::string ("k=\"v\"");
+
+ ASSERT_TRUE (Rust::parse_cfg_option (input, key, value));
+ ASSERT_EQ (key, "k");
+ ASSERT_EQ (value, "v");
+
+ // values should be between double quotes
+ input = std::string ("k=v");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+
+ // No value is an error if there is an equal sign
+ input = std::string ("k=");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+
+ // No key is an error
+ input = std::string ("=");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+
+ input = std::string ("=value");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+
+ // values that are not string literals are an error
+ input = std::string ("key=b\"a\"");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+
+ input = std::string ("key='v'");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+
+ input = std::string ("key=155");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+
+ input = std::string ("key=3.14");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+
+ // kebab case is not valid for an identifier
+ input = std::string ("key-no-value");
+ ASSERT_FALSE (Rust::parse_cfg_option (input, key, value));
+}
+} // namespace selftest
+
+#endif // CHECKING_P
diff --git a/gcc/rust/parse/rust-cfg-parser.h b/gcc/rust/parse/rust-cfg-parser.h
new file mode 100644
index 000000000000..0d64016f86dc
--- /dev/null
+++ b/gcc/rust/parse/rust-cfg-parser.h
@@ -0,0 +1,52 @@
+/* This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
+#ifndef RUST_CFG_PARSER_H
+#define RUST_CFG_PARSER_H
+
+#include "rust-system.h"
+
+namespace Rust {
+/**
+ * Parse a `key` or `key="value"` pair given to the `-frust-cfg` compiler
+ * option.
+ *
+ * The format is as follows:
+ *
+ * -frust-cfg=
+ *
+ * cfg_input: identifier | identifier '=' '"' identifier '"'
+ *
+ * @param input User input given to the -frust-cfg option
+ * @param key String in which to store the parsed `key`.
+ * @param value String in which to store the parsed `value` if it exists
+ *
+ * @return false if the given input was invalid, true otherwise
+ */
+bool
+parse_cfg_option (std::string &input, std::string &key, std::string &value);
+} // namespace Rust
+
+#if CHECKING_P
+
+namespace selftest {
+extern void
+rust_cfg_parser_test (void);
+} // namespace selftest
+
+#endif // CHECKING_P
+
+#endif // RUST_CFG_PARSER_H
diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
new file mode 100644
index 000000000000..45e0f6972c6a
--- /dev/null
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -0,0 +1,6904 @@
+// Copyright (C) 2020-2022 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3. If not see
+// .
+
+/* Template implementation for Rust::Parser. Previously in rust-parse.cc (before
+ * Parser was template). Separated from rust-parse.h for readability. */
+
+/* DO NOT INCLUDE ANYWHERE - this is automatically included with rust-parse.h
+ * This is also the reason why there are no include guards. */
+
+#define INCLUDE_ALGORITHM
+#include "rust-diagnostics.h"
+#include "rust-make-unique.h"
+
+namespace Rust {
+// Left binding powers of operations.
+enum binding_powers
+{
+ // Highest priority
+ LBP_HIGHEST = 100,
+
+ LBP_PATH = 95,
+
+ LBP_METHOD_CALL = 90,
+
+ LBP_FIELD_EXPR = 85,
+
+ LBP_FUNCTION_CALL = 80,
+ LBP_ARRAY_REF = LBP_FUNCTION_CALL,
+
+ LBP_QUESTION_MARK = 75, // unary postfix - counts as left
+
+ LBP_UNARY_PLUS = 70, // Used only when the null denotation is +
+ LBP_UNARY_MINUS = LBP_UNARY_PLUS, // Used only when the null denotation is -
+ LBP_UNARY_ASTERISK = LBP_UNARY_PLUS, // deref operator - unary prefix
+ LBP_UNARY_EXCLAM = LBP_UNARY_PLUS,
+ LBP_UNARY_AMP = LBP_UNARY_PLUS,
+ LBP_UNARY_AMP_MUT = LBP_UNARY_PLUS,
+
+ LBP_AS = 65,
+
+ LBP_MUL = 60,
+ LBP_DIV = LBP_MUL,
+ LBP_MOD = LBP_MUL,
+
+ LBP_PLUS = 55,
+ LBP_MINUS = LBP_PLUS,
+
+ LBP_L_SHIFT = 50,
+ LBP_R_SHIFT = LBP_L_SHIFT,
+
+ LBP_AMP = 45,
+
+ LBP_CARET = 40,
+
+ LBP_PIPE = 35,
+
+ LBP_EQUAL = 30,
+ LBP_NOT_EQUAL = LBP_EQUAL,
+ LBP_SMALLER_THAN = LBP_EQUAL,
+ LBP_SMALLER_EQUAL = LBP_EQUAL,
+ LBP_GREATER_THAN = LBP_EQUAL,
+ LBP_GREATER_EQUAL = LBP_EQUAL,
+
+ LBP_LOGICAL_AND = 25,
+
+ LBP_LOGICAL_OR = 20,
+
+ LBP_DOT_DOT = 15,
+ LBP_DOT_DOT_EQ = LBP_DOT_DOT,
+
+ // TODO: note all these assig operators are RIGHT associative!
+ LBP_ASSIG = 10,
+ LBP_PLUS_ASSIG = LBP_ASSIG,
+ LBP_MINUS_ASSIG = LBP_ASSIG,
+ LBP_MULT_ASSIG = LBP_ASSIG,
+ LBP_DIV_ASSIG = LBP_ASSIG,
+ LBP_MOD_ASSIG = LBP_ASSIG,
+ LBP_AMP_ASSIG = LBP_ASSIG,
+ LBP_PIPE_ASSIG = LBP_ASSIG,
+ LBP_CARET_ASSIG = LBP_ASSIG,
+ LBP_L_SHIFT_ASSIG = LBP_ASSIG,
+ LBP_R_SHIFT_ASSIG = LBP_ASSIG,
+
+ // return, break, and closures as lowest priority?
+ LBP_RETURN = 5,
+ LBP_BREAK = LBP_RETURN,
+ LBP_CLOSURE = LBP_RETURN, // unary prefix operators
+
+#if 0
+ // rust precedences
+ // used for closures
+ PREC_CLOSURE = -40,
+ // used for break, continue, return, and yield
+ PREC_JUMP = -30,
+ // used for range (although weird comment in rustc about this)
+ PREC_RANGE = -10,
+ // used for binary operators mentioned below - also cast, colon (type),
+ // assign, assign_op
+ PREC_BINOP = FROM_ASSOC_OP,
+ // used for box, address_of, let, unary (again, weird comment on let)
+ PREC_PREFIX = 50,
+ // used for await, call, method call, field, index, try,
+ // inline asm, macro invocation
+ PREC_POSTFIX = 60,
+ // used for array, repeat, tuple, literal, path, paren, if,
+ // while, for, 'loop', match, block, try block, async, struct
+ PREC_PAREN = 99,
+ PREC_FORCE_PAREN = 100,
+#endif
+
+ // lowest priority
+ LBP_LOWEST = 0
+};
+
+/* Returns whether the token can start a type (i.e. there is a valid type
+ * beginning with the token). */
+inline bool
+can_tok_start_type (TokenId id)
+{
+ switch (id)
+ {
+ case EXCLAM:
+ case LEFT_SQUARE:
+ case LEFT_ANGLE:
+ case UNDERSCORE:
+ case ASTERISK:
+ case AMP:
+ case LIFETIME:
+ case IDENTIFIER:
+ case SUPER:
+ case SELF:
+ case SELF_ALIAS:
+ case CRATE:
+ case DOLLAR_SIGN:
+ case SCOPE_RESOLUTION:
+ case LEFT_PAREN:
+ case FOR:
+ case ASYNC:
+ case CONST:
+ case UNSAFE:
+ case EXTERN_TOK:
+ case FN_TOK:
+ case IMPL:
+ case DYN:
+ case QUESTION_MARK:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* Returns whether the token id is (or is likely to be) a right angle bracket.
+ * i.e. '>', '>>', '>=' and '>>=' tokens. */
+inline bool
+is_right_angle_tok (TokenId id)
+{
+ switch (id)
+ {
+ case RIGHT_ANGLE:
+ case RIGHT_SHIFT:
+ case GREATER_OR_EQUAL:
+ case RIGHT_SHIFT_EQ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* HACK-y special handling for skipping a right angle token at the end of
+ * generic arguments.
+ * Currently, this replaces the "current token" with one that is identical
+ * except has the leading '>' removed (e.g. '>>' becomes '>'). This is bad
+ * for several reasons - it modifies the token stream to something that
+ * actually doesn't make syntactic sense, it may not worked if the token
+ * has already been skipped, etc. It was done because it would not
+ * actually require inserting new items into the token stream (which I
+ * thought would take more work to not mess up) and because I wasn't sure
+ * if the "already seen right angle" flag in the parser would work
+ * correctly.
+ * Those two other approaches listed are in my opinion actually better
+ * long-term - insertion is probably best as it reflects syntactically
+ * what occurs. On the other hand, I need to do a code audit to make sure
+ * that insertion doesn't mess anything up. So that's a FIXME. */
+template
+bool
+Parser::skip_generics_right_angle ()
+{
+ /* OK, new great idea. Have a lexer method called
+ * "split_current_token(TokenType newLeft, TokenType newRight)", which is
+ * called here with whatever arguments are appropriate. That lexer method
+ * handles "replacing" the current token with the "newLeft" and "inserting"
+ * the next token with the "newRight" (and creating a location, etc. for it)
+ */
+
+ /* HACK: special handling for right shift '>>', greater or equal '>=', and
+ * right shift assig */
+ // '>>='
+ const_TokenPtr tok = lexer.peek_token ();
+ switch (tok->get_id ())
+ {
+ case RIGHT_ANGLE:
+ // this is good - skip token
+ lexer.skip_token ();
+ return true;
+ case RIGHT_SHIFT: {
+ // new implementation that should be better
+ lexer.split_current_token (RIGHT_ANGLE, RIGHT_ANGLE);
+ lexer.skip_token ();
+ return true;
+ }
+ case GREATER_OR_EQUAL: {
+ // new implementation that should be better
+ lexer.split_current_token (RIGHT_ANGLE, EQUAL);
+ lexer.skip_token ();
+ return true;
+ }
+ case RIGHT_SHIFT_EQ: {
+ // new implementation that should be better
+ lexer.split_current_token (RIGHT_ANGLE, GREATER_OR_EQUAL);
+ lexer.skip_token ();
+ return true;
+ }
+ default:
+ add_error (Error (tok->get_locus (),
+ "expected %<>%> at end of generic argument - found %qs",
+ tok->get_token_description ()));
+ return false;
+ }
+}
+
+/* Gets left binding power for specified token.
+ * Not suitable for use at the moment or possibly ever because binding power
+ * cannot be purely determined from operator token with Rust grammar - e.g.
+ * method call and field access have
+ * different left binding powers but the same operator token. */
+template
+int
+Parser::left_binding_power (const_TokenPtr token)
+{
+ // HACK: called with "peek_token()", so lookahead is "peek_token(1)"
+ switch (token->get_id ())
+ {
+ /* TODO: issue here - distinguish between method calls and field access
+ * somehow? Also would have to distinguish between paths and function
+ * calls (:: operator), maybe more stuff. */
+ /* Current plan for tackling LBP - don't do it based on token, use
+ * lookahead. Or alternatively, only use Pratt parsing for OperatorExpr
+ * and handle other expressions without it. rustc only considers
+ * arithmetic, logical/relational, 'as',
+ * '?=', ranges, colons, and assignment to have operator precedence and
+ * associativity rules applicable. It then has
+ * a separate "ExprPrecedence" that also includes binary operators. */
+
+ // TODO: handle operator overloading - have a function replace the
+ // operator?
+
+ /*case DOT:
+ return LBP_DOT;*/
+
+ case SCOPE_RESOLUTION:
+ rust_debug (
+ "possible error - looked up LBP of scope resolution operator. should "
+ "be handled elsewhere.");
+ return LBP_PATH;
+
+ /* Resolved by lookahead HACK that should work with current code. If next
+ * token is identifier and token after that isn't parenthesised expression
+ * list, it is a field reference. */
+ case DOT:
+ if (lexer.peek_token (1)->get_id () == IDENTIFIER
+ && lexer.peek_token (2)->get_id () != LEFT_PAREN)
+ {
+ return LBP_FIELD_EXPR;
+ }
+ return LBP_METHOD_CALL;
+
+ case LEFT_PAREN:
+ return LBP_FUNCTION_CALL;
+
+ case LEFT_SQUARE:
+ return LBP_ARRAY_REF;
+
+ // postfix question mark (i.e. error propagation expression)
+ case QUESTION_MARK:
+ return LBP_QUESTION_MARK;
+
+ case AS:
+ return LBP_AS;
+
+ case ASTERISK:
+ return LBP_MUL;
+ case DIV:
+ return LBP_DIV;
+ case PERCENT:
+ return LBP_MOD;
+
+ case PLUS:
+ return LBP_PLUS;
+ case MINUS:
+ return LBP_MINUS;
+
+ case LEFT_SHIFT:
+ return LBP_L_SHIFT;
+ case RIGHT_SHIFT:
+ return LBP_R_SHIFT;
+
+ // binary & operator
+ case AMP:
+ return LBP_AMP;
+
+ // binary ^ operator
+ case CARET:
+ return LBP_CARET;
+
+ // binary | operator
+ case PIPE:
+ return LBP_PIPE;
+
+ case EQUAL_EQUAL:
+ return LBP_EQUAL;
+ case NOT_EQUAL:
+ return LBP_NOT_EQUAL;
+ case RIGHT_ANGLE:
+ return LBP_GREATER_THAN;
+ case GREATER_OR_EQUAL:
+ return LBP_GREATER_EQUAL;
+ case LEFT_ANGLE:
+ return LBP_SMALLER_THAN;
+ case LESS_OR_EQUAL:
+ return LBP_SMALLER_EQUAL;
+
+ case LOGICAL_AND:
+ return LBP_LOGICAL_AND;
+
+ case OR:
+ return LBP_LOGICAL_OR;
+
+ case DOT_DOT:
+ return LBP_DOT_DOT;
+
+ case DOT_DOT_EQ:
+ return LBP_DOT_DOT_EQ;
+
+ case EQUAL:
+ return LBP_ASSIG;
+ case PLUS_EQ:
+ return LBP_PLUS_ASSIG;
+ case MINUS_EQ:
+ return LBP_MINUS_ASSIG;
+ case ASTERISK_EQ:
+ return LBP_MULT_ASSIG;
+ case DIV_EQ:
+ return LBP_DIV_ASSIG;
+ case PERCENT_EQ:
+ return LBP_MOD_ASSIG;
+ case AMP_EQ:
+ return LBP_AMP_ASSIG;
+ case PIPE_EQ:
+ return LBP_PIPE_ASSIG;
+ case CARET_EQ:
+ return LBP_CARET_ASSIG;
+ case LEFT_SHIFT_EQ:
+ return LBP_L_SHIFT_ASSIG;
+ case RIGHT_SHIFT_EQ:
+ return LBP_R_SHIFT_ASSIG;
+
+ /* HACK: float literal due to lexer misidentifying a dot then an integer as
+ * a float */
+ case FLOAT_LITERAL:
+ return LBP_FIELD_EXPR;
+ // field expr is same as tuple expr in precedence, i imagine
+ // TODO: is this needed anymore? lexer shouldn't do that anymore
+
+ // anything that can't appear in an infix position is given lowest priority
+ default:
+ return LBP_LOWEST;
+ }
+}
+
+// Returns true when current token is EOF.
+template
+bool
+Parser::done_end_of_file ()
+{
+ return lexer.peek_token ()->get_id () == END_OF_FILE;
+}
+
+// Parses a sequence of items within a module or the implicit top-level module
+// in a crate
+template
+std::vector>
+Parser::parse_items ()
+{
+ std::vector> items;
+
+ const_TokenPtr t = lexer.peek_token ();
+ while (t->get_id () != END_OF_FILE)
+ {
+ std::unique_ptr item = parse_item (false);
+ if (item == nullptr)
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse item in crate");
+ add_error (std::move (error));
+
+ // TODO: should all items be cleared?
+ items = std::vector> ();
+ break;
+ }
+
+ items.push_back (std::move (item));
+
+ t = lexer.peek_token ();
+ }
+
+ return items;
+}
+
+// Parses a crate (compilation unit) - entry point
+template
+std::unique_ptr
+Parser::parse_crate ()
+{
+ // parse inner attributes
+ AST::AttrVec inner_attrs = parse_inner_attributes ();
+
+ // parse items
+ std::vector> items = parse_items ();
+
+ // emit all errors
+ for (const auto &error : error_table)
+ error.emit_error ();
+
+ return std::unique_ptr (
+ new AST::Crate (std::move (items), std::move (inner_attrs)));
+}
+
+// Parse a contiguous block of inner attributes.
+template
+AST::AttrVec
+Parser::parse_inner_attributes ()
+{
+ AST::AttrVec inner_attributes;
+
+ // only try to parse it if it starts with "#!" not only "#"
+ while ((lexer.peek_token ()->get_id () == HASH
+ && lexer.peek_token (1)->get_id () == EXCLAM)
+ || lexer.peek_token ()->get_id () == INNER_DOC_COMMENT)
+ {
+ AST::Attribute inner_attr = parse_inner_attribute ();
+
+ /* Ensure only valid inner attributes are added to the inner_attributes
+ * list */
+ if (!inner_attr.is_empty ())
+ {
+ inner_attributes.push_back (std::move (inner_attr));
+ }
+ else
+ {
+ /* If no more valid inner attributes, break out of loop (only
+ * contiguous inner attributes parsed). */
+ break;
+ }
+ }
+
+ inner_attributes.shrink_to_fit ();
+ return inner_attributes;
+}
+
+// Parse a inner or outer doc comment into an doc attribute
+template
+AST::Attribute
+Parser::parse_doc_comment ()
+{
+ const_TokenPtr token = lexer.peek_token ();
+ Location locus = token->get_locus ();
+ AST::SimplePathSegment segment ("doc", locus);
+ std::vector segments;
+ segments.push_back (std::move (segment));
+ AST::SimplePath attr_path (std::move (segments), false, locus);
+ AST::LiteralExpr lit_expr (token->get_str (), AST::Literal::STRING,
+ PrimitiveCoreType::CORETYPE_STR, {}, locus);
+ std::unique_ptr attr_input (
+ new AST::AttrInputLiteral (std::move (lit_expr)));
+ lexer.skip_token ();
+ return AST::Attribute (std::move (attr_path), std::move (attr_input), locus);
+}
+
+// Parse a single inner attribute.
+template
+AST::Attribute
+Parser::parse_inner_attribute ()
+{
+ if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT)
+ return parse_doc_comment ();
+
+ if (lexer.peek_token ()->get_id () != HASH)
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "BUG: token %<#%> is missing, but % "
+ "was invoked");
+ add_error (std::move (error));
+
+ return AST::Attribute::create_empty ();
+ }
+ lexer.skip_token ();
+
+ if (lexer.peek_token ()->get_id () != EXCLAM)
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "expected % or %<[%> for inner attribute");
+ add_error (std::move (error));
+
+ return AST::Attribute::create_empty ();
+ }
+ lexer.skip_token ();
+
+ if (!skip_token (LEFT_SQUARE))
+ return AST::Attribute::create_empty ();
+
+ AST::Attribute actual_attribute = parse_attribute_body ();
+
+ if (!skip_token (RIGHT_SQUARE))
+ return AST::Attribute::create_empty ();
+
+ return actual_attribute;
+}
+
+// Parses the body of an attribute (inner or outer).
+template
+AST::Attribute
+Parser::parse_attribute_body ()
+{
+ Location locus = lexer.peek_token ()->get_locus ();
+
+ AST::SimplePath attr_path = parse_simple_path ();
+ // ensure path is valid to parse attribute input
+ if (attr_path.is_empty ())
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "empty simple path in attribute");
+ add_error (std::move (error));
+
+ // Skip past potential further info in attribute (i.e. attr_input)
+ skip_after_end_attribute ();
+ return AST::Attribute::create_empty ();
+ }
+
+ std::unique_ptr attr_input = parse_attr_input ();
+ // AttrInput is allowed to be null, so no checks here
+
+ return AST::Attribute (std::move (attr_path), std::move (attr_input), locus);
+}
+
+/* Determines whether token is a valid simple path segment. This does not
+ * include scope resolution operators. */
+inline bool
+is_simple_path_segment (TokenId id)
+{
+ switch (id)
+ {
+ case IDENTIFIER:
+ case SUPER:
+ case SELF:
+ case CRATE:
+ return true;
+ case DOLLAR_SIGN:
+ // assume that dollar sign leads to $crate
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Parses a SimplePath AST node, if it exists. Does nothing otherwise.
+template
+AST::SimplePath
+Parser::parse_simple_path ()
+{
+ bool has_opening_scope_resolution = false;
+ Location locus = Linemap::unknown_location ();
+
+ // don't parse anything if not a path upfront
+ if (!is_simple_path_segment (lexer.peek_token ()->get_id ())
+ && !is_simple_path_segment (lexer.peek_token (1)->get_id ()))
+ return AST::SimplePath::create_empty ();
+
+ /* Checks for opening scope resolution (i.e. global scope fully-qualified
+ * path) */
+ if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION)
+ {
+ has_opening_scope_resolution = true;
+
+ locus = lexer.peek_token ()->get_locus ();
+
+ lexer.skip_token ();
+ }
+
+ // Parse single required simple path segment
+ AST::SimplePathSegment segment = parse_simple_path_segment ();
+
+ // get location if not gotten already
+ if (locus == Linemap::unknown_location ())
+ locus = segment.get_locus ();
+
+ std::vector segments;
+
+ // Return empty vector if first, actually required segment is an error
+ if (segment.is_error ())
+ return AST::SimplePath::create_empty ();
+
+ segments.push_back (std::move (segment));
+
+ // Parse all other simple path segments
+ while (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION)
+ {
+ // Skip scope resolution operator
+ lexer.skip_token ();
+
+ AST::SimplePathSegment new_segment = parse_simple_path_segment ();
+
+ // Return path as currently constructed if segment in error state.
+ if (new_segment.is_error ())
+ break;
+
+ segments.push_back (std::move (new_segment));
+ }
+
+ // DEBUG: check for any empty segments
+ for (const auto &seg : segments)
+ {
+ if (seg.is_error ())
+ {
+ rust_debug (
+ "when parsing simple path, somehow empty path segment was "
+ "not filtered out. Path begins with '%s'",
+ segments.at (0).as_string ().c_str ());
+ }
+ }
+
+ return AST::SimplePath (std::move (segments), has_opening_scope_resolution,
+ locus);
+ /* TODO: now that is_simple_path_segment exists, could probably start
+ * actually making errors upon parse failure of segments and whatever */
+}
+
+/* Parses a single SimplePathSegment (does not handle the scope resolution
+ * operators) */
+template
+AST::SimplePathSegment
+Parser::parse_simple_path_segment ()
+{
+ const_TokenPtr t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case IDENTIFIER:
+ lexer.skip_token ();
+
+ return AST::SimplePathSegment (t->get_str (), t->get_locus ());
+ case SUPER:
+ lexer.skip_token ();
+
+ return AST::SimplePathSegment ("super", t->get_locus ());
+ case SELF:
+ lexer.skip_token ();
+
+ return AST::SimplePathSegment ("self", t->get_locus ());
+ case CRATE:
+ lexer.skip_token ();
+
+ return AST::SimplePathSegment ("crate", t->get_locus ());
+ case DOLLAR_SIGN:
+ if (lexer.peek_token (1)->get_id () == CRATE)
+ {
+ lexer.skip_token (1);
+
+ return AST::SimplePathSegment ("$crate", t->get_locus ());
+ }
+ gcc_fallthrough ();
+ default:
+ // do nothing but inactivates warning from gcc when compiling
+ /* could put the rust_error_at thing here but fallthrough (from failing
+ * $crate condition) isn't completely obvious if it is. */
+
+ // test prevent error
+ return AST::SimplePathSegment::create_error ();
+ }
+ gcc_unreachable ();
+ /*rust_error_at(
+ t->get_locus(), "invalid token '%s' in simple path segment",
+ t->get_token_description());*/
+ // this is not necessarily an error, e.g. end of path
+ // return AST::SimplePathSegment::create_error();
+}
+
+// Parses a PathIdentSegment - an identifier segment of a non-SimplePath path.
+template
+AST::PathIdentSegment
+Parser::parse_path_ident_segment ()
+{
+ const_TokenPtr t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case IDENTIFIER:
+ lexer.skip_token ();
+
+ return AST::PathIdentSegment (t->get_str (), t->get_locus ());
+ case SUPER:
+ lexer.skip_token ();
+
+ return AST::PathIdentSegment ("super", t->get_locus ());
+ case SELF:
+ lexer.skip_token ();
+
+ return AST::PathIdentSegment ("self", t->get_locus ());
+ case SELF_ALIAS:
+ lexer.skip_token ();
+
+ return AST::PathIdentSegment ("Self", t->get_locus ());
+ case CRATE:
+ lexer.skip_token ();
+
+ return AST::PathIdentSegment ("crate", t->get_locus ());
+ case DOLLAR_SIGN:
+ if (lexer.peek_token (1)->get_id () == CRATE)
+ {
+ lexer.skip_token (1);
+
+ return AST::PathIdentSegment ("$crate", t->get_locus ());
+ }
+ gcc_fallthrough ();
+ default:
+ /* do nothing but inactivates warning from gcc when compiling
+ * could put the error_at thing here but fallthrough (from failing $crate
+ * condition) isn't completely obvious if it is. */
+
+ // test prevent error
+ return AST::PathIdentSegment::create_error ();
+ }
+ gcc_unreachable ();
+ // not necessarily an error
+}
+
+// Parses an AttrInput AST node (polymorphic, as AttrInput is abstract)
+template
+std::unique_ptr
+Parser::parse_attr_input ()
+{
+ const_TokenPtr t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case LEFT_PAREN:
+ case LEFT_SQUARE:
+ case LEFT_CURLY: {
+ // must be a delimited token tree, so parse that
+ std::unique_ptr input_tree (
+ new AST::DelimTokenTree (parse_delim_token_tree ()));
+
+ // TODO: potential checks on DelimTokenTree before returning
+
+ return input_tree;
+ }
+ case EQUAL: {
+ // = LiteralExpr
+ lexer.skip_token ();
+
+ t = lexer.peek_token ();
+
+ /* Ensure token is a "literal expression" (literally only a literal
+ * token of any type) */
+ if (!t->is_literal ())
+ {
+ Error error (
+ t->get_locus (),
+ "unknown token %qs in attribute body - literal expected",
+ t->get_token_description ());
+ add_error (std::move (error));
+
+ skip_after_end_attribute ();
+ return nullptr;
+ }
+
+ AST::Literal::LitType lit_type = AST::Literal::STRING;
+ // Crappy mapping of token type to literal type
+ switch (t->get_id ())
+ {
+ case INT_LITERAL:
+ lit_type = AST::Literal::INT;
+ break;
+ case FLOAT_LITERAL:
+ lit_type = AST::Literal::FLOAT;
+ break;
+ case CHAR_LITERAL:
+ lit_type = AST::Literal::CHAR;
+ break;
+ case BYTE_CHAR_LITERAL:
+ lit_type = AST::Literal::BYTE;
+ break;
+ case BYTE_STRING_LITERAL:
+ lit_type = AST::Literal::BYTE_STRING;
+ break;
+ case STRING_LITERAL:
+ default:
+ lit_type = AST::Literal::STRING;
+ break; // TODO: raw string? don't eliminate it from lexer?
+ }
+
+ // create actual LiteralExpr
+ AST::LiteralExpr lit_expr (t->get_str (), lit_type, t->get_type_hint (),
+ {}, t->get_locus ());
+ lexer.skip_token ();
+
+ std::unique_ptr attr_input_lit (
+ new AST::AttrInputLiteral (std::move (lit_expr)));
+
+ // do checks or whatever? none required, really
+
+ // FIXME: shouldn't a skip token be required here?
+
+ return attr_input_lit;
+ }
+ break;
+ case RIGHT_SQUARE:
+ // means AttrInput is missing, which is allowed
+ return nullptr;
+ default:
+ add_error (
+ Error (t->get_locus (),
+ "unknown token %qs in attribute body - attribute input or "
+ "none expected",
+ t->get_token_description ()));
+
+ skip_after_end_attribute ();
+ return nullptr;
+ }
+ gcc_unreachable ();
+ // TODO: find out how to stop gcc error on "no return value"
+}
+
+/* Returns true if the token id matches the delimiter type. Note that this only
+ * operates for END delimiter tokens. */
+inline bool
+token_id_matches_delims (TokenId token_id, AST::DelimType delim_type)
+{
+ return ((token_id == RIGHT_PAREN && delim_type == AST::PARENS)
+ || (token_id == RIGHT_SQUARE && delim_type == AST::SQUARE)
+ || (token_id == RIGHT_CURLY && delim_type == AST::CURLY));
+}
+
+/* Returns true if the likely result of parsing the next few tokens is a path.
+ * Not guaranteed, though, especially in the case of syntax errors. */
+inline bool
+is_likely_path_next (TokenId next_token_id)
+{
+ switch (next_token_id)
+ {
+ case IDENTIFIER:
+ case SUPER:
+ case SELF:
+ case SELF_ALIAS:
+ case CRATE:
+ // maybe - maybe do extra check. But then requires another TokenId.
+ case DOLLAR_SIGN:
+ case SCOPE_RESOLUTION:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Parses a delimited token tree
+template
+AST::DelimTokenTree
+Parser::parse_delim_token_tree ()
+{
+ const_TokenPtr t = lexer.peek_token ();
+ lexer.skip_token ();
+ Location initial_loc = t->get_locus ();
+
+ // save delim type to ensure it is reused later
+ AST::DelimType delim_type = AST::PARENS;
+
+ // Map tokens to DelimType
+ switch (t->get_id ())
+ {
+ case LEFT_PAREN:
+ delim_type = AST::PARENS;
+ break;
+ case LEFT_SQUARE:
+ delim_type = AST::SQUARE;
+ break;
+ case LEFT_CURLY:
+ delim_type = AST::CURLY;
+ break;
+ default:
+ add_error (Error (t->get_locus (),
+ "unexpected token %qs - expecting delimiters (for a "
+ "delimited token tree)",
+ t->get_token_description ()));
+
+ return AST::DelimTokenTree::create_empty ();
+ }
+
+ // parse actual token tree vector - 0 or more
+ std::vector> token_trees_in_tree;
+ auto delim_open
+ = std::unique_ptr (new AST::Token (std::move (t)));
+ token_trees_in_tree.push_back (std::move (delim_open));
+
+ // repeat loop until finding the matching delimiter
+ t = lexer.peek_token ();
+ while (!token_id_matches_delims (t->get_id (), delim_type)
+ && t->get_id () != END_OF_FILE)
+ {
+ std::unique_ptr tok_tree = parse_token_tree ();
+
+ if (tok_tree == nullptr)
+ {
+ // TODO: is this error handling appropriate?
+ Error error (
+ t->get_locus (),
+ "failed to parse token tree in delimited token tree - found %qs",
+ t->get_token_description ());
+ add_error (std::move (error));
+
+ return AST::DelimTokenTree::create_empty ();
+ }
+
+ token_trees_in_tree.push_back (std::move (tok_tree));
+
+ // lexer.skip_token();
+ t = lexer.peek_token ();
+ }
+ auto delim_close
+ = std::unique_ptr (new AST::Token (std::move (t)));
+ token_trees_in_tree.push_back (std::move (delim_close));
+
+ AST::DelimTokenTree token_tree (delim_type, std::move (token_trees_in_tree),
+ initial_loc);
+
+ // parse end delimiters
+ t = lexer.peek_token ();
+
+ if (token_id_matches_delims (t->get_id (), delim_type))
+ {
+ // tokens match opening delimiter, so skip.
+ lexer.skip_token ();
+
+ // DEBUG
+ rust_debug ("finished parsing new delim token tree - peeked token is now "
+ "'%s' while t is '%s'",
+ lexer.peek_token ()->get_token_description (),
+ t->get_token_description ());
+
+ return token_tree;
+ }
+ else
+ {
+ // tokens don't match opening delimiters, so produce error
+ Error error (t->get_locus (),
+ "unexpected token %qs - expecting closing delimiter %qs "
+ "(for a delimited token tree)",
+ t->get_token_description (),
+ (delim_type == AST::PARENS
+ ? ")"
+ : (delim_type == AST::SQUARE ? "]" : "}")));
+ add_error (std::move (error));
+
+ /* return empty token tree despite possibly parsing valid token tree -
+ * TODO is this a good idea? */
+ return AST::DelimTokenTree::create_empty ();
+ }
+}
+
+/* Parses a TokenTree syntactical production. This is either a delimited token
+ * tree or a non-delimiter token. */
+template
+std::unique_ptr
+Parser::parse_token_tree ()
+{
+ const_TokenPtr t = lexer.peek_token ();
+
+ switch (t->get_id ())
+ {
+ case LEFT_PAREN:
+ case LEFT_SQUARE:
+ case LEFT_CURLY:
+ // Parse delimited token tree
+ // TODO: use move rather than copy constructor
+ return std::unique_ptr (
+ new AST::DelimTokenTree (parse_delim_token_tree ()));
+ case RIGHT_PAREN:
+ case RIGHT_SQUARE:
+ case RIGHT_CURLY:
+ // error - should not be called when this a token
+ add_error (
+ Error (t->get_locus (),
+ "unexpected closing delimiter %qs - token tree requires "
+ "either paired delimiters or non-delimiter tokens",
+ t->get_token_description ()));
+
+ lexer.skip_token ();
+ return nullptr;
+ default:
+ // parse token itself as TokenTree
+ lexer.skip_token ();
+ return std::unique_ptr (new AST::Token (std::move (t)));
+ }
+}
+
+// Parses a single item
+template
+std::unique_ptr
+Parser::parse_item (bool called_from_statement)
+{
+ // has a "called_from_statement" parameter for better error message handling
+
+ // parse outer attributes for item
+ AST::AttrVec outer_attrs = parse_outer_attributes ();
+
+ // TODO: decide how to deal with VisItem vs MacroItem dichotomy
+ /* best current solution: catch all keywords that would imply a VisItem in a
+ * switch and have MacroItem as a last resort */
+
+ const_TokenPtr t = lexer.peek_token ();
+
+ switch (t->get_id ())
+ {
+ case END_OF_FILE:
+ // not necessarily an error, unless we just read outer
+ // attributes which needs to be attached
+ if (!outer_attrs.empty ())
+ {
+ Rust::AST::Attribute attr = outer_attrs.back ();
+ Error error (attr.get_locus (),
+ "expected item after outer attribute or doc comment");
+ add_error (std::move (error));
+ }
+ return nullptr;
+ case PUB:
+ case MOD:
+ case EXTERN_TOK:
+ case USE:
+ case FN_TOK:
+ case TYPE:
+ case STRUCT_TOK:
+ case ENUM_TOK:
+ case CONST:
+ case STATIC_TOK:
+ case TRAIT:
+ case IMPL:
+ /* TODO: implement union keyword but not really because of
+ * context-dependence crappy hack way to parse a union written below to
+ * separate it from the good code. */
+ // case UNION:
+ case UNSAFE: // maybe - unsafe traits are a thing
+ // if any of these (should be all possible VisItem prefixes), parse a
+ // VisItem
+ return parse_vis_item (std::move (outer_attrs));
+ break;
+ case SUPER:
+ case SELF:
+ case CRATE:
+ case DOLLAR_SIGN:
+ // almost certainly macro invocation semi
+ return parse_macro_item (std::move (outer_attrs));
+ break;
+ // crappy hack to do union "keyword"
+ case IDENTIFIER:
+ // TODO: ensure std::string and literal comparison works
+ if (t->get_str () == "union"
+ && lexer.peek_token (1)->get_id () == IDENTIFIER)
+ {
+ return parse_vis_item (std::move (outer_attrs));
+ // or should this go straight to parsing union?
+ }
+ else if (t->get_str () == "macro_rules")
+ {
+ // macro_rules! macro item
+ return parse_macro_item (std::move (outer_attrs));
+ }
+ else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION
+ || lexer.peek_token (1)->get_id () == EXCLAM)
+ {
+ /* path (probably) or macro invocation, so probably a macro invocation
+ * semi */
+ return parse_macro_item (std::move (outer_attrs));
+ }
+ gcc_fallthrough ();
+ default:
+ // otherwise unrecognised
+ // return parse_macro_item(std::move(outer_attrs));
+ add_error (Error (t->get_locus (),
+ "unrecognised token %qs for start of %s",
+ t->get_token_description (),
+ called_from_statement ? "statement" : "item"));
+
+ // skip somewhere?
+ return nullptr;
+ break;
+ }
+}
+
+// Parses a contiguous block of outer attributes.
+template
+AST::AttrVec
+Parser::parse_outer_attributes ()
+{
+ AST::AttrVec outer_attributes;
+
+ while (lexer.peek_token ()->get_id ()
+ == HASH /* Can also be #!, which catches errors. */
+ || lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT
+ || lexer.peek_token ()->get_id ()
+ == INNER_DOC_COMMENT) /* For error handling. */
+ {
+ AST::Attribute outer_attr = parse_outer_attribute ();
+
+ /* Ensure only valid outer attributes are added to the outer_attributes
+ * list */
+ if (!outer_attr.is_empty ())
+ {
+ outer_attributes.push_back (std::move (outer_attr));
+ }
+ else
+ {
+ /* If no more valid outer attributes, break out of loop (only
+ * contiguous outer attributes parsed). */
+ break;
+ }
+ }
+
+ outer_attributes.shrink_to_fit ();
+ return outer_attributes;
+
+ /* TODO: this shares basically all code with parse_inner_attributes except
+ * function call - find way of making it more modular? function pointer? */
+}
+
+// Parse a single outer attribute.
+template
+AST::Attribute
+Parser::parse_outer_attribute ()
+{
+ if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT)
+ return parse_doc_comment ();
+
+ if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT)
+ {
+ Error error (
+ lexer.peek_token ()->get_locus (),
+ "inner doc (%/!%> or %*!%>) only allowed at start of item "
+ "and before any outer attribute or doc (%<#[%>, %//%> or %**%>)");
+ add_error (std::move (error));
+ lexer.skip_token ();
+ return AST::Attribute::create_empty ();
+ }
+
+ /* OuterAttribute -> '#' '[' Attr ']' */
+
+ if (lexer.peek_token ()->get_id () != HASH)
+ return AST::Attribute::create_empty ();
+
+ lexer.skip_token ();
+
+ TokenId id = lexer.peek_token ()->get_id ();
+ if (id != LEFT_SQUARE)
+ {
+ if (id == EXCLAM)
+ {
+ // this is inner attribute syntax, so throw error
+ // inner attributes were either already parsed or not allowed here.
+ Error error (
+ lexer.peek_token ()->get_locus (),
+ "token % found, indicating inner attribute definition. Inner "
+ "attributes are not possible at this location");
+ add_error (std::move (error));
+ }
+ return AST::Attribute::create_empty ();
+ }
+
+ lexer.skip_token ();
+
+ AST::Attribute actual_attribute = parse_attribute_body ();
+
+ if (lexer.peek_token ()->get_id () != RIGHT_SQUARE)
+ return AST::Attribute::create_empty ();
+
+ lexer.skip_token ();
+
+ return actual_attribute;
+}
+
+// Parses a VisItem (item that can have non-default visibility).
+template
+std::unique_ptr
+Parser::parse_vis_item (AST::AttrVec outer_attrs)
+{
+ // parse visibility, which may or may not exist
+ AST::Visibility vis = parse_visibility ();
+
+ // select VisItem to create depending on keyword
+ const_TokenPtr t = lexer.peek_token ();
+
+ switch (t->get_id ())
+ {
+ case MOD:
+ return parse_module (std::move (vis), std::move (outer_attrs));
+ case EXTERN_TOK:
+ // lookahead to resolve syntactical production
+ t = lexer.peek_token (1);
+
+ switch (t->get_id ())
+ {
+ case CRATE:
+ return parse_extern_crate (std::move (vis), std::move (outer_attrs));
+ case FN_TOK: // extern function
+ return parse_function (std::move (vis), std::move (outer_attrs));
+ case LEFT_CURLY: // extern block
+ return parse_extern_block (std::move (vis), std::move (outer_attrs));
+ case STRING_LITERAL: // for specifying extern ABI
+ // could be extern block or extern function, so more lookahead
+ t = lexer.peek_token (2);
+
+ switch (t->get_id ())
+ {
+ case FN_TOK:
+ return parse_function (std::move (vis), std::move (outer_attrs));
+ case LEFT_CURLY:
+ return parse_extern_block (std::move (vis),
+ std::move (outer_attrs));
+ default:
+ add_error (
+ Error (t->get_locus (),
+ "unexpected token %qs in some sort of extern production",
+ t->get_token_description ()));
+
+ lexer.skip_token (2); // TODO: is this right thing to do?
+ return nullptr;
+ }
+ default:
+ add_error (
+ Error (t->get_locus (),
+ "unexpected token %qs in some sort of extern production",
+ t->get_token_description ()));
+
+ lexer.skip_token (1); // TODO: is this right thing to do?
+ return nullptr;
+ }
+ case USE:
+ return parse_use_decl (std::move (vis), std::move (outer_attrs));
+ case FN_TOK:
+ return parse_function (std::move (vis), std::move (outer_attrs));
+ case TYPE:
+ return parse_type_alias (std::move (vis), std::move (outer_attrs));
+ case STRUCT_TOK:
+ return parse_struct (std::move (vis), std::move (outer_attrs));
+ case ENUM_TOK:
+ return parse_enum (std::move (vis), std::move (outer_attrs));
+ // TODO: implement union keyword but not really because of
+ // context-dependence case UNION: crappy hack to do union "keyword"
+ case IDENTIFIER:
+ if (t->get_str () == "union"
+ && lexer.peek_token (1)->get_id () == IDENTIFIER)
+ {
+ return parse_union (std::move (vis), std::move (outer_attrs));
+ // or should item switch go straight to parsing union?
+ }
+ else
+ {
+ break;
+ }
+ case CONST:
+ // lookahead to resolve syntactical production
+ t = lexer.peek_token (1);
+
+ switch (t->get_id ())
+ {
+ case IDENTIFIER:
+ case UNDERSCORE:
+ return parse_const_item (std::move (vis), std::move (outer_attrs));
+ case UNSAFE:
+ case EXTERN_TOK:
+ case FN_TOK:
+ return parse_function (std::move (vis), std::move (outer_attrs));
+ default:
+ add_error (
+ Error (t->get_locus (),
+ "unexpected token %qs in some sort of const production",
+ t->get_token_description ()));
+
+ lexer.skip_token (1); // TODO: is this right thing to do?
+ return nullptr;
+ }
+ case STATIC_TOK:
+ return parse_static_item (std::move (vis), std::move (outer_attrs));
+ case TRAIT:
+ return parse_trait (std::move (vis), std::move (outer_attrs));
+ case IMPL:
+ return parse_impl (std::move (vis), std::move (outer_attrs));
+ case UNSAFE: // unsafe traits, unsafe functions, unsafe impls (trait impls),
+ // lookahead to resolve syntactical production
+ t = lexer.peek_token (1);
+
+ switch (t->get_id ())
+ {
+ case TRAIT:
+ return parse_trait (std::move (vis), std::move (outer_attrs));
+ case EXTERN_TOK:
+ case FN_TOK:
+ return parse_function (std::move (vis), std::move (outer_attrs));
+ case IMPL:
+ return parse_impl (std::move (vis), std::move (outer_attrs));
+ default:
+ add_error (
+ Error (t->get_locus (),
+ "unexpected token %qs in some sort of unsafe production",
+ t->get_token_description ()));
+
+ lexer.skip_token (1); // TODO: is this right thing to do?
+ return nullptr;
+ }
+ default:
+ // otherwise vis item clearly doesn't exist, which is not an error
+ // has a catch-all post-switch return to allow other breaks to occur
+ break;
+ }
+ return nullptr;
+}
+
+// Parses a MacroItem (either a MacroInvocationSemi or MacroRulesDefinition).
+template
+std::unique_ptr
+Parser::parse_macro_item (AST::AttrVec outer_attrs)
+{
+ const_TokenPtr t = lexer.peek_token ();
+
+ /* dodgy way of detecting macro due to weird context-dependence thing.
+ * probably can be improved */
+ // TODO: ensure that string compare works properly
+ if (t->get_id () == IDENTIFIER && t->get_str () == "macro_rules")
+ {
+ return parse_macro_rules_def (std::move (outer_attrs));
+ }
+ else
+ {
+ // DEBUG: TODO: remove
+ rust_debug (
+ "DEBUG - parse_macro_item called and token is not macro_rules");
+ if (t->get_id () == IDENTIFIER)
+ {
+ rust_debug ("just add to last error: token is not macro_rules and is "
+ "instead '%s'",
+ t->get_str ().c_str ());
+ }
+ else
+ {
+ rust_debug ("just add to last error: token is not macro_rules and is "
+ "not an identifier either - it is '%s'",
+ t->get_token_description ());
+ }
+
+ return parse_macro_invocation_semi (std::move (outer_attrs));
+ }
+}
+
+// Parses a macro rules definition syntax extension whatever thing.
+template
+std::unique_ptr
+Parser::parse_macro_rules_def (AST::AttrVec outer_attrs)
+{
+ // ensure that first token is identifier saying "macro_rules"
+ const_TokenPtr t = lexer.peek_token ();
+ if (t->get_id () != IDENTIFIER || t->get_str () != "macro_rules")
+ {
+ Error error (
+ t->get_locus (),
+ "macro rules definition does not start with %");
+ add_error (std::move (error));
+
+ // skip after somewhere?
+ return nullptr;
+ }
+ lexer.skip_token ();
+ Location macro_locus = t->get_locus ();
+
+ if (!skip_token (EXCLAM))
+ {
+ // skip after somewhere?
+ return nullptr;
+ }
+
+ // parse macro name
+ const_TokenPtr ident_tok = expect_token (IDENTIFIER);
+ if (ident_tok == nullptr)
+ {
+ return nullptr;
+ }
+ Identifier rule_name = ident_tok->get_str ();
+
+ // DEBUG
+ rust_debug ("in macro rules def, about to parse parens.");
+
+ // save delim type to ensure it is reused later
+ AST::DelimType delim_type = AST::PARENS;
+
+ // Map tokens to DelimType
+ t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case LEFT_PAREN:
+ delim_type = AST::PARENS;
+ break;
+ case LEFT_SQUARE:
+ delim_type = AST::SQUARE;
+ break;
+ case LEFT_CURLY:
+ delim_type = AST::CURLY;
+ break;
+ default:
+ add_error (Error (t->get_locus (),
+ "unexpected token %qs - expecting delimiters (for a "
+ "macro rules definition)",
+ t->get_token_description ()));
+
+ return nullptr;
+ }
+ lexer.skip_token ();
+
+ // parse actual macro rules
+ std::vector macro_rules;
+
+ // must be at least one macro rule, so parse it
+ AST::MacroRule initial_rule = parse_macro_rule ();
+ if (initial_rule.is_error ())
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "required first macro rule in macro rules definition "
+ "could not be parsed");
+ add_error (std::move (error));
+
+ // skip after somewhere?
+ return nullptr;
+ }
+ macro_rules.push_back (std::move (initial_rule));
+
+ // DEBUG
+ rust_debug ("successfully pushed back initial macro rule");
+
+ t = lexer.peek_token ();
+ // parse macro rules
+ while (t->get_id () == SEMICOLON)
+ {
+ // skip semicolon
+ lexer.skip_token ();
+
+ // don't parse if end of macro rules
+ if (token_id_matches_delims (lexer.peek_token ()->get_id (), delim_type))
+ {
+ // DEBUG
+ rust_debug (
+ "broke out of parsing macro rules loop due to finding delim");
+
+ break;
+ }
+
+ // try to parse next rule
+ AST::MacroRule rule = parse_macro_rule ();
+ if (rule.is_error ())
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse macro rule in macro rules definition");
+ add_error (std::move (error));
+
+ return nullptr;
+ }
+
+ macro_rules.push_back (std::move (rule));
+
+ // DEBUG
+ rust_debug ("successfully pushed back another macro rule");
+
+ t = lexer.peek_token ();
+ }
+
+ // parse end delimiters
+ t = lexer.peek_token ();
+ if (token_id_matches_delims (t->get_id (), delim_type))
+ {
+ // tokens match opening delimiter, so skip.
+ lexer.skip_token ();
+
+ if (delim_type != AST::CURLY)
+ {
+ // skip semicolon at end of non-curly macro definitions
+ if (!skip_token (SEMICOLON))
+ {
+ // as this is the end, allow recovery (probably) - may change
+ return std::unique_ptr (
+ new AST::MacroRulesDefinition (
+ std::move (rule_name), delim_type, std::move (macro_rules),
+ std::move (outer_attrs), macro_locus));
+ }
+ }
+
+ return std::unique_ptr (
+ new AST::MacroRulesDefinition (std::move (rule_name), delim_type,
+ std::move (macro_rules),
+ std::move (outer_attrs), macro_locus));
+ }
+ else
+ {
+ // tokens don't match opening delimiters, so produce error
+ Error error (t->get_locus (),
+ "unexpected token %qs - expecting closing delimiter %qs "
+ "(for a macro rules definition)",
+ t->get_token_description (),
+ (delim_type == AST::PARENS
+ ? ")"
+ : (delim_type == AST::SQUARE ? "]" : "}")));
+ add_error (std::move (error));
+
+ /* return empty macro definiton despite possibly parsing mostly valid one
+ * - TODO is this a good idea? */
+ return nullptr;
+ }
+}
+
+// Parses a semi-coloned (except for full block) macro invocation item.
+template
+std::unique_ptr
+Parser::parse_macro_invocation_semi (
+ AST::AttrVec outer_attrs)
+{
+ Location macro_locus = lexer.peek_token ()->get_locus ();
+ AST::SimplePath path = parse_simple_path ();
+
+ if (!skip_token (EXCLAM))
+ {
+ // skip after somewhere?
+ return nullptr;
+ }
+
+ // save delim type to ensure it is reused later
+ AST::DelimType delim_type = AST::PARENS;
+
+ // Map tokens to DelimType
+ const_TokenPtr t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case LEFT_PAREN:
+ delim_type = AST::PARENS;
+ break;
+ case LEFT_SQUARE:
+ delim_type = AST::SQUARE;
+ break;
+ case LEFT_CURLY:
+ delim_type = AST::CURLY;
+ break;
+ default:
+ add_error (Error (t->get_locus (),
+ "unexpected token %qs - expecting delimiters (for a "
+ "macro invocation semi body)",
+ t->get_token_description ()));
+
+ return nullptr;
+ }
+ Location tok_tree_locus = t->get_locus ();
+ lexer.skip_token ();
+
+ // parse actual token trees
+ std::vector> token_trees;
+ auto delim_open
+ = std::unique_ptr (new AST::Token (std::move (t)));
+ token_trees.push_back (std::move (delim_open));
+
+ t = lexer.peek_token ();
+ // parse token trees until the initial delimiter token is found again
+ while (!token_id_matches_delims (t->get_id (), delim_type))
+ {
+ std::unique_ptr tree = parse_token_tree ();
+
+ if (tree == nullptr)
+ {
+ Error error (t->get_locus (),
+ "failed to parse token tree for macro invocation semi "
+ "- found %qs",
+ t->get_token_description ());
+ add_error (std::move (error));
+
+ return nullptr;
+ }
+
+ token_trees.push_back (std::move (tree));
+
+ t = lexer.peek_token ();
+ }
+ auto delim_close
+ = std::unique_ptr (new AST::Token (std::move (t)));
+ token_trees.push_back (std::move (delim_close));
+
+ AST::DelimTokenTree delim_tok_tree (delim_type, std::move (token_trees),
+ tok_tree_locus);
+ AST::MacroInvocData invoc_data (std::move (path), std::move (delim_tok_tree));
+
+ // parse end delimiters
+ t = lexer.peek_token ();
+ if (token_id_matches_delims (t->get_id (), delim_type))
+ {
+ // tokens match opening delimiter, so skip.
+ lexer.skip_token ();
+
+ if (delim_type != AST::CURLY)
+ {
+ // skip semicolon at end of non-curly macro invocation semis
+ if (!skip_token (SEMICOLON))
+ {
+ // as this is the end, allow recovery (probably) - may change
+
+ return std::unique_ptr (
+ new AST::MacroInvocation (std::move (invoc_data),
+ std::move (outer_attrs), macro_locus,
+ true));
+ }
+ }
+
+ // DEBUG:
+ rust_debug ("skipped token is '%s', next token (current peek) is '%s'",
+ t->get_token_description (),
+ lexer.peek_token ()->get_token_description ());
+
+ return std::unique_ptr (
+ new AST::MacroInvocation (std::move (invoc_data),
+ std::move (outer_attrs), macro_locus, true));
+ }
+ else
+ {
+ // tokens don't match opening delimiters, so produce error
+ Error error (t->get_locus (),
+ "unexpected token %qs - expecting closing delimiter %qs "
+ "(for a macro invocation semi)",
+ t->get_token_description (),
+ (delim_type == AST::PARENS
+ ? ")"
+ : (delim_type == AST::SQUARE ? "]" : "}")));
+ add_error (std::move (error));
+
+ /* return empty macro invocation despite possibly parsing mostly valid one
+ * - TODO is this a good idea? */
+ return nullptr;
+ }
+}
+
+// Parses a non-semicoloned macro invocation (i.e. as pattern or expression).
+template
+std::unique_ptr
+Parser::parse_macro_invocation (AST::AttrVec outer_attrs)
+{
+ // parse macro path
+ AST::SimplePath macro_path = parse_simple_path ();
+ if (macro_path.is_empty ())
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse macro invocation path");
+ add_error (std::move (error));
+
+ // skip?
+ return nullptr;
+ }
+
+ if (!skip_token (EXCLAM))
+ {
+ // skip after somewhere?
+ return nullptr;
+ }
+
+ // parse internal delim token tree
+ AST::DelimTokenTree delim_tok_tree = parse_delim_token_tree ();
+
+ Location macro_locus = macro_path.get_locus ();
+
+ return std::unique_ptr (
+ new AST::MacroInvocation (AST::MacroInvocData (std::move (macro_path),
+ std::move (delim_tok_tree)),
+ std::move (outer_attrs), macro_locus));
+}
+
+// Parses a macro rule definition - does not parse semicolons.
+template
+AST::MacroRule
+Parser::parse_macro_rule ()
+{
+ Location locus = lexer.peek_token ()->get_locus ();
+
+ // parse macro matcher
+ AST::MacroMatcher matcher = parse_macro_matcher ();
+
+ if (matcher.is_error ())
+ return AST::MacroRule::create_error (locus);
+
+ if (!skip_token (MATCH_ARROW))
+ {
+ // skip after somewhere?
+ return AST::MacroRule::create_error (locus);
+ }
+
+ // parse transcriber (this is just a delim token tree)
+ Location token_tree_loc = lexer.peek_token ()->get_locus ();
+ AST::MacroTranscriber transcriber (parse_delim_token_tree (), token_tree_loc);
+
+ return AST::MacroRule (std::move (matcher), std::move (transcriber), locus);
+}
+
+// Parses a macro matcher (part of a macro rule definition).
+template
+AST::MacroMatcher
+Parser::parse_macro_matcher ()
+{
+ // save delim type to ensure it is reused later
+ AST::DelimType delim_type = AST::PARENS;
+
+ // DEBUG
+ rust_debug ("begun parsing macro matcher");
+
+ // Map tokens to DelimType
+ const_TokenPtr t = lexer.peek_token ();
+ Location locus = t->get_locus ();
+ switch (t->get_id ())
+ {
+ case LEFT_PAREN:
+ delim_type = AST::PARENS;
+ break;
+ case LEFT_SQUARE:
+ delim_type = AST::SQUARE;
+ break;
+ case LEFT_CURLY:
+ delim_type = AST::CURLY;
+ break;
+ default:
+ add_error (Error (
+ t->get_locus (),
+ "unexpected token %qs - expecting delimiters (for a macro matcher)",
+ t->get_token_description ()));
+
+ return AST::MacroMatcher::create_error (t->get_locus ());
+ }
+ lexer.skip_token ();
+
+ // parse actual macro matches
+ std::vector> matches;
+ // Set of possible preceding macro matches to make sure follow-set
+ // restrictions are respected.
+ // TODO: Consider using std::reference_wrapper instead of raw pointers?
+ std::vector last_matches;
+
+ t = lexer.peek_token ();
+ // parse token trees until the initial delimiter token is found again
+ while (!token_id_matches_delims (t->get_id (), delim_type))
+ {
+ std::unique_ptr match = parse_macro_match ();
+
+ if (match == nullptr)
+ {
+ Error error (
+ t->get_locus (),
+ "failed to parse macro match for macro matcher - found %qs",
+ t->get_token_description ());
+ add_error (std::move (error));
+
+ return AST::MacroMatcher::create_error (t->get_locus ());
+ }
+
+ if (matches.size () > 0)
+ {
+ const auto *last_match = matches.back ().get ();
+
+ // We want to check if we are dealing with a zeroable repetition
+ bool zeroable = false;
+ if (last_match->get_macro_match_type ()
+ == AST::MacroMatch::MacroMatchType::Repetition)
+ {
+ auto repetition
+ = static_cast (last_match);
+
+ if (repetition->get_op ()
+ != AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE)
+ zeroable = true;
+ }
+
+ if (!zeroable)
+ last_matches.clear ();
+
+ last_matches.emplace_back (last_match);
+
+ for (auto last : last_matches)
+ if (!is_match_compatible (*last, *match))
+ return AST::MacroMatcher::create_error (
+ match->get_match_locus ());
+ }
+
+ matches.push_back (std::move (match));
+
+ // DEBUG
+ rust_debug ("pushed back a match in macro matcher");
+
+ t = lexer.peek_token ();
+ }
+
+ // parse end delimiters
+ t = lexer.peek_token ();
+ if (token_id_matches_delims (t->get_id (), delim_type))
+ {
+ // tokens match opening delimiter, so skip.
+ lexer.skip_token ();
+
+ return AST::MacroMatcher (delim_type, std::move (matches), locus);
+ }
+ else
+ {
+ // tokens don't match opening delimiters, so produce error
+ Error error (t->get_locus (),
+ "unexpected token %qs - expecting closing delimiter %qs "
+ "(for a macro matcher)",
+ t->get_token_description (),
+ (delim_type == AST::PARENS
+ ? ")"
+ : (delim_type == AST::SQUARE ? "]" : "}")));
+ add_error (std::move (error));
+
+ /* return error macro matcher despite possibly parsing mostly correct one?
+ * TODO is this the best idea? */
+ return AST::MacroMatcher::create_error (t->get_locus ());
+ }
+}
+
+// Parses a macro match (syntax match inside a matcher in a macro rule).
+template
+std::unique_ptr
+Parser::parse_macro_match ()
+{
+ // branch based on token available
+ const_TokenPtr t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case LEFT_PAREN:
+ case LEFT_SQUARE:
+ case LEFT_CURLY: {
+ // must be macro matcher as delimited
+ AST::MacroMatcher matcher = parse_macro_matcher ();
+ if (matcher.is_error ())
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse macro matcher in macro match");
+ add_error (std::move (error));
+
+ return nullptr;
+ }
+ return std::unique_ptr (
+ new AST::MacroMatcher (std::move (matcher)));
+ }
+ case DOLLAR_SIGN: {
+ // have to do more lookahead to determine if fragment or repetition
+ const_TokenPtr t2 = lexer.peek_token (1);
+ switch (t2->get_id ())
+ {
+ case ABSTRACT:
+ case AS:
+ case ASYNC:
+ case BECOME:
+ case BOX:
+ case BREAK:
+ case CONST:
+ case CONTINUE:
+ case CRATE:
+ case DO:
+ case DYN:
+ case ELSE:
+ case ENUM_TOK:
+ case EXTERN_TOK:
+ case FALSE_LITERAL:
+ case FINAL_TOK:
+ case FN_TOK:
+ case FOR:
+ case IF:
+ case IMPL:
+ case IN:
+ case LET:
+ case LOOP:
+ case MACRO:
+ case MATCH_TOK:
+ case MOD:
+ case MOVE:
+ case MUT:
+ case OVERRIDE_TOK:
+ case PRIV:
+ case PUB:
+ case REF:
+ case RETURN_TOK:
+ case SELF_ALIAS:
+ case SELF:
+ case STATIC_TOK:
+ case STRUCT_TOK:
+ case SUPER:
+ case TRAIT:
+ case TRUE_LITERAL:
+ case TRY:
+ case TYPE:
+ case TYPEOF:
+ case UNSAFE:
+ case UNSIZED:
+ case USE:
+ case VIRTUAL:
+ case WHERE:
+ case WHILE:
+ case YIELD:
+ case IDENTIFIER:
+ // macro fragment
+ return parse_macro_match_fragment ();
+ case LEFT_PAREN:
+ // macro repetition
+ return parse_macro_match_repetition ();
+ default:
+ // error: unrecognised
+ add_error (
+ Error (t2->get_locus (),
+ "unrecognised token combination %<$%s%> at start of "
+ "macro match - did you mean %<$identifier%> or %<$(%>?",
+ t2->get_token_description ()));
+
+ // skip somewhere?
+ return nullptr;
+ }
+ }
+ case RIGHT_PAREN:
+ case RIGHT_SQUARE:
+ case RIGHT_CURLY:
+ // not allowed
+ add_error (Error (
+ t->get_locus (),
+ "closing delimiters like %qs are not allowed at the start of a macro "
+ "match",
+ t->get_token_description ()));
+
+ // skip somewhere?
+ return nullptr;
+ default:
+ // just the token
+ lexer.skip_token ();
+ return std::unique_ptr (new AST::Token (std::move (t)));
+ }
+}
+
+// Parses a fragment macro match.
+template
+std::unique_ptr
+Parser::parse_macro_match_fragment ()
+{
+ Location fragment_locus = lexer.peek_token ()->get_locus ();
+ skip_token (DOLLAR_SIGN);
+
+ Identifier ident = "";
+ auto identifier = lexer.peek_token ();
+ if (identifier->has_str ())
+ ident = identifier->get_str ();
+ else
+ ident = std::string (token_id_to_str (identifier->get_id ()));
+
+ if (ident.empty ())
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "missing identifier in macro match fragment");
+ add_error (std::move (error));
+
+ return nullptr;
+ }
+ skip_token (identifier->get_id ());
+
+ if (!skip_token (COLON))
+ {
+ // skip after somewhere?
+ return nullptr;
+ }
+
+ // get MacroFragSpec for macro
+ const_TokenPtr t = expect_token (IDENTIFIER);
+ if (t == nullptr)
+ return nullptr;
+
+ AST::MacroFragSpec frag
+ = AST::MacroFragSpec::get_frag_spec_from_str (t->get_str ());
+ if (frag.is_error ())
+ {
+ Error error (t->get_locus (),
+ "invalid fragment specifier %qs in fragment macro match",
+ t->get_str ().c_str ());
+ add_error (std::move (error));
+
+ return nullptr;
+ }
+
+ return std::unique_ptr (
+ new AST::MacroMatchFragment (std::move (ident), frag, fragment_locus));
+}
+
+// Parses a repetition macro match.
+template
+std::unique_ptr
+Parser::parse_macro_match_repetition ()
+{
+ skip_token (DOLLAR_SIGN);
+ skip_token (LEFT_PAREN);
+
+ std::vector> matches;
+
+ // parse required first macro match
+ std::unique_ptr initial_match = parse_macro_match ();
+ if (initial_match == nullptr)
+ {
+ Error error (
+ lexer.peek_token ()->get_locus (),
+ "could not parse required first macro match in macro match repetition");
+ add_error (std::move (error));
+
+ // skip after somewhere?
+ return nullptr;
+ }
+ matches.push_back (std::move (initial_match));
+
+ // parse optional later macro matches
+ const_TokenPtr t = lexer.peek_token ();
+ while (t->get_id () != RIGHT_PAREN)
+ {
+ std::unique_ptr match = parse_macro_match ();
+
+ if (match == nullptr)
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse macro match in macro match repetition");
+ add_error (std::move (error));
+
+ return nullptr;
+ }
+
+ matches.push_back (std::move (match));
+
+ t = lexer.peek_token ();
+ }
+
+ if (!skip_token (RIGHT_PAREN))
+ {
+ // skip after somewhere?
+ return nullptr;
+ }
+
+ t = lexer.peek_token ();
+ // see if separator token exists
+ std::unique_ptr separator = nullptr;
+ switch (t->get_id ())
+ {
+ // repetition operators
+ case ASTERISK:
+ case PLUS:
+ case QUESTION_MARK:
+ // delimiters
+ case LEFT_PAREN:
+ case LEFT_CURLY:
+ case LEFT_SQUARE:
+ case RIGHT_PAREN:
+ case RIGHT_CURLY:
+ case RIGHT_SQUARE:
+ // separator does not exist, so still null and don't skip token
+ break;
+ default:
+ // separator does exist
+ separator = std::unique_ptr (new AST::Token (std::move (t)));
+ lexer.skip_token ();
+ break;
+ }
+
+ // parse repetition operator
+ t = lexer.peek_token ();
+ AST::MacroMatchRepetition::MacroRepOp op = AST::MacroMatchRepetition::NONE;
+ switch (t->get_id ())
+ {
+ case ASTERISK:
+ op = AST::MacroMatchRepetition::ANY;
+ lexer.skip_token ();
+ break;
+ case PLUS:
+ op = AST::MacroMatchRepetition::ONE_OR_MORE;
+ lexer.skip_token ();
+ break;
+ case QUESTION_MARK:
+ op = AST::MacroMatchRepetition::ZERO_OR_ONE;
+ lexer.skip_token ();
+ break;
+ default:
+ add_error (
+ Error (t->get_locus (),
+ "expected macro repetition operator (%<*%>, %<+%>, or %%>) in "
+ "macro match - found %qs",
+ t->get_token_description ()));
+
+ // skip after somewhere?
+ return nullptr;
+ }
+
+ return std::unique_ptr (
+ new AST::MacroMatchRepetition (std::move (matches), op,
+ std::move (separator), t->get_locus ()));
+}
+
+/* Parses a visibility syntactical production (i.e. creating a non-default
+ * visibility) */
+template
+AST::Visibility
+Parser::parse_visibility ()
+{
+ // check for no visibility
+ if (lexer.peek_token ()->get_id () != PUB)
+ {
+ return AST::Visibility::create_private ();
+ }
+
+ lexer.skip_token ();
+
+ // create simple pub visibility if no parentheses
+ if (lexer.peek_token ()->get_id () != LEFT_PAREN)
+ {
+ return AST::Visibility::create_public ();
+ // or whatever
+ }
+
+ lexer.skip_token ();
+
+ const_TokenPtr t = lexer.peek_token ();
+ auto path_loc = t->get_locus ();
+
+ switch (t->get_id ())
+ {
+ case CRATE:
+ lexer.skip_token ();
+
+ skip_token (RIGHT_PAREN);
+
+ return AST::Visibility::create_crate (path_loc);
+ case SELF:
+ lexer.skip_token ();
+
+ skip_token (RIGHT_PAREN);
+
+ return AST::Visibility::create_self (path_loc);
+ case SUPER:
+ lexer.skip_token ();
+
+ skip_token (RIGHT_PAREN);
+
+ return AST::Visibility::create_super (path_loc);
+ case IN: {
+ lexer.skip_token ();
+
+ // parse the "in" path as well
+ AST::SimplePath path = parse_simple_path ();
+ if (path.is_empty ())
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "missing path in pub(in path) visibility");
+ add_error (std::move (error));
+
+ // skip after somewhere?
+ return AST::Visibility::create_error ();
+ }
+
+ skip_token (RIGHT_PAREN);
+
+ return AST::Visibility::create_in_path (std::move (path));
+ }
+ default:
+ add_error (Error (t->get_locus (), "unexpected token %qs in visibility",
+ t->get_token_description ()));
+
+ lexer.skip_token ();
+ return AST::Visibility::create_error ();
+ }
+}
+
+// Parses a module - either a bodied module or a module defined in another file.
+template
+std::unique_ptr
+Parser::parse_module (AST::Visibility vis,
+ AST::AttrVec outer_attrs)
+{
+ Location locus = lexer.peek_token ()->get_locus ();
+ skip_token (MOD);
+
+ const_TokenPtr module_name = expect_token (IDENTIFIER);
+ if (module_name == nullptr)
+ {
+ return nullptr;
+ }
+ Identifier name = module_name->get_str ();
+
+ const_TokenPtr t = lexer.peek_token ();
+
+ switch (t->get_id ())
+ {
+ case SEMICOLON:
+ lexer.skip_token ();
+
+ // Construct an external module
+ return std::unique_ptr (
+ new AST::Module (std::move (name), std::move (vis),
+ std::move (outer_attrs), locus, lexer.get_filename (),
+ inline_module_stack));
+ case LEFT_CURLY: {
+ lexer.skip_token ();
+
+ // parse inner attributes
+ AST::AttrVec inner_attrs = parse_inner_attributes ();
+
+ std::string module_path_name
+ = extract_module_path (inner_attrs, outer_attrs, name);
+ InlineModuleStackScope scope (*this, std::move (module_path_name));
+
+ // parse items
+ std::vector> items;
+ const_TokenPtr tok = lexer.peek_token ();
+ while (tok->get_id () != RIGHT_CURLY)
+ {
+ std::unique_ptr item = parse_item (false);
+ if (item == nullptr)
+ {
+ Error error (tok->get_locus (),
+ "failed to parse item in module");
+ add_error (std::move (error));
+
+ return nullptr;
+ }
+
+ items.push_back (std::move (item));
+
+ tok = lexer.peek_token ();
+ }
+
+ if (!skip_token (RIGHT_CURLY))
+ {
+ // skip somewhere?
+ return nullptr;
+ }
+
+ return std::unique_ptr (
+ new AST::Module (std::move (name), locus, std::move (items),
+ std::move (vis), std::move (inner_attrs),
+ std::move (outer_attrs))); // module name?
+ }
+ default:
+ add_error (
+ Error (t->get_locus (),
+ "unexpected token %qs in module declaration/definition item",
+ t->get_token_description ()));
+
+ lexer.skip_token ();
+ return nullptr;
+ }
+}
+
+// Parses an extern crate declaration (dependency on external crate)
+template
+std::unique_ptr
+Parser::parse_extern_crate (AST::Visibility vis,
+ AST::AttrVec outer_attrs)
+{
+ Location locus = lexer.peek_token ()->get_locus ();
+ if (!skip_token (EXTERN_TOK))
+ {
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ if (!skip_token (CRATE))
+ {
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ /* parse crate reference name - this has its own syntactical rule in reference
+ * but seems to not be used elsewhere, so i'm putting it here */
+ const_TokenPtr crate_name_tok = lexer.peek_token ();
+ std::string crate_name;
+
+ switch (crate_name_tok->get_id ())
+ {
+ case IDENTIFIER:
+ crate_name = crate_name_tok->get_str ();
+ lexer.skip_token ();
+ break;
+ case SELF:
+ crate_name = "self";
+ lexer.skip_token ();
+ break;
+ default:
+ add_error (
+ Error (crate_name_tok->get_locus (),
+ "expecting crate name (identifier or %), found %qs",
+ crate_name_tok->get_token_description ()));
+
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ // don't parse as clause if it doesn't exist
+ if (lexer.peek_token ()->get_id () == SEMICOLON)
+ {
+ lexer.skip_token ();
+
+ return std::unique_ptr (
+ new AST::ExternCrate (std::move (crate_name), std::move (vis),
+ std::move (outer_attrs), locus));
+ }
+
+ /* parse as clause - this also has its own syntactical rule in reference and
+ * also seems to not be used elsewhere, so including here again. */
+ if (!skip_token (AS))
+ {
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ const_TokenPtr as_name_tok = lexer.peek_token ();
+ std::string as_name;
+
+ switch (as_name_tok->get_id ())
+ {
+ case IDENTIFIER:
+ as_name = as_name_tok->get_str ();
+ lexer.skip_token ();
+ break;
+ case UNDERSCORE:
+ as_name = "_";
+ lexer.skip_token ();
+ break;
+ default:
+ add_error (
+ Error (as_name_tok->get_locus (),
+ "expecting as clause name (identifier or %<_%>), found %qs",
+ as_name_tok->get_token_description ()));
+
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ if (!skip_token (SEMICOLON))
+ {
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ return std::unique_ptr (
+ new AST::ExternCrate (std::move (crate_name), std::move (vis),
+ std::move (outer_attrs), locus, std::move (as_name)));
+}
+
+// Parses a use declaration.
+template
+std::unique_ptr
+Parser::parse_use_decl (AST::Visibility vis,
+ AST::AttrVec outer_attrs)
+{
+ Location locus = lexer.peek_token ()->get_locus ();
+ if (!skip_token (USE))
+ {
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ // parse use tree, which is required
+ std::unique_ptr use_tree = parse_use_tree ();
+ if (use_tree == nullptr)
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "could not parse use tree in use declaration");
+ add_error (std::move (error));
+
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ if (!skip_token (SEMICOLON))
+ {
+ skip_after_semicolon ();
+ return nullptr;
+ }
+
+ return std::unique_ptr (
+ new AST::UseDeclaration (std::move (use_tree), std::move (vis),
+ std::move (outer_attrs), locus));
+}
+
+// Parses a use tree (which can be recursive and is actually a base class).
+template
+std::unique_ptr
+Parser::parse_use_tree ()
+{
+ /* potential syntax definitions in attempt to get algorithm:
+ * Glob:
+ * <- SimplePath :: *
+ * <- :: *
+ * <- *
+ * Nested tree thing:
+ * <- SimplePath :: { COMPLICATED_INNER_TREE_THING }
+ * <- :: COMPLICATED_INNER_TREE_THING }
+ * <- { COMPLICATED_INNER_TREE_THING }
+ * Rebind thing:
+ * <- SimplePath as IDENTIFIER
+ * <- SimplePath as _
+ * <- SimplePath
+ */
+
+ /* current plan of attack: try to parse SimplePath first - if fails, one of
+ * top two then try parse :: - if fails, one of top two. Next is deciding
+ * character for top two. */
+
+ /* Thus, parsing smaller parts of use tree may require feeding into function
+ * via parameters (or could handle all in this single function because other
+ * use tree types aren't recognised as separate in the spec) */
+
+ // TODO: I think this function is too complex, probably should split it
+
+ Location locus = lexer.peek_token ()->get_locus ();
+
+ // bool has_path = false;
+ AST::SimplePath path = parse_simple_path ();
+
+ if (path.is_empty ())
+ {
+ // has no path, so must be glob or nested tree UseTree type
+
+ bool is_global = false;
+
+ // check for global scope resolution operator
+ if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION)
+ {
+ lexer.skip_token ();
+ is_global = true;
+ }
+
+ const_TokenPtr t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case ASTERISK:
+ // glob UseTree type
+ lexer.skip_token ();
+
+ if (is_global)
+ return std::unique_ptr (
+ new AST::UseTreeGlob (AST::UseTreeGlob::GLOBAL,
+ AST::SimplePath::create_empty (), locus));
+ else
+ return std::unique_ptr (
+ new AST::UseTreeGlob (AST::UseTreeGlob::NO_PATH,
+ AST::SimplePath::create_empty (), locus));
+ case LEFT_CURLY: {
+ // nested tree UseTree type
+ lexer.skip_token ();
+
+ std::vector> use_trees;
+
+ const_TokenPtr t = lexer.peek_token ();
+ while (t->get_id () != RIGHT_CURLY)
+ {
+ std::unique_ptr use_tree = parse_use_tree ();
+ if (use_tree == nullptr)
+ {
+ break;
+ }
+
+ use_trees.push_back (std::move (use_tree));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ lexer.skip_token ();
+ t = lexer.peek_token ();
+ }
+
+ // skip end curly delimiter
+ if (!skip_token (RIGHT_CURLY))
+ {
+ // skip after somewhere?
+ return nullptr;
+ }
+
+ if (is_global)
+ return std::unique_ptr (
+ new AST::UseTreeList (AST::UseTreeList::GLOBAL,
+ AST::SimplePath::create_empty (),
+ std::move (use_trees), locus));
+ else
+ return std::unique_ptr (
+ new AST::UseTreeList (AST::UseTreeList::NO_PATH,
+ AST::SimplePath::create_empty (),
+ std::move (use_trees), locus));
+ }
+ case AS:
+ // this is not allowed
+ add_error (Error (
+ t->get_locus (),
+ "use declaration with rebind % requires a valid simple path - "
+ "none found"));
+
+ skip_after_semicolon ();
+ return nullptr;
+ default:
+ add_error (Error (t->get_locus (),
+ "unexpected token %qs in use tree with "
+ "no valid simple path (i.e. list"
+ " or glob use tree)",
+ t->get_token_description ()));
+
+ skip_after_semicolon ();
+ return nullptr;
+ }
+ }
+ else
+ {
+ /* Due to aforementioned implementation issues, the trailing :: token is
+ * consumed by the path, so it can not be used as a disambiguator.
+ * NOPE, not true anymore - TODO what are the consequences of this? */
+
+ const_TokenPtr t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case ASTERISK:
+ // glob UseTree type
+ lexer.skip_token ();
+
+ return std::unique_ptr (
+ new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED,
+ std::move (path), locus));
+ case LEFT_CURLY: {
+ // nested tree UseTree type
+ lexer.skip_token ();
+
+ std::vector> use_trees;
+
+ // TODO: think of better control structure
+ const_TokenPtr t = lexer.peek_token ();
+ while (t->get_id () != RIGHT_CURLY)
+ {
+ std::unique_ptr use_tree = parse_use_tree ();
+ if (use_tree == nullptr)
+ {
+ break;
+ }
+
+ use_trees.push_back (std::move (use_tree));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ lexer.skip_token ();
+ t = lexer.peek_token ();
+ }
+
+ // skip end curly delimiter
+ if (!skip_token (RIGHT_CURLY))
+ {
+ // skip after somewhere?
+ return nullptr;
+ }
+
+ return std::unique_ptr (
+ new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED,
+ std::move (path), std::move (use_trees),
+ locus));
+ }
+ case AS: {
+ // rebind UseTree type
+ lexer.skip_token ();
+
+ const_TokenPtr t = lexer.peek_token ();
+ switch (t->get_id ())
+ {
+ case IDENTIFIER:
+ // skip lexer token
+ lexer.skip_token ();
+
+ return std::unique_ptr (
+ new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER,
+ std::move (path), locus,
+ t->get_str ()));
+ case UNDERSCORE:
+ // skip lexer token
+ lexer.skip_token ();
+
+ return std::unique_ptr (
+ new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD,
+ std::move (path), locus, "_"));
+ default:
+ add_error (Error (
+ t->get_locus (),
+ "unexpected token %qs in use tree with as clause - expected "
+ "identifier or %<_%>",
+ t->get_token_description ()));
+
+ skip_after_semicolon ();
+ return nullptr;
+ }
+ }
+ case SEMICOLON:
+ // rebind UseTree type without rebinding - path only
+
+ // don't skip semicolon - handled in parse_use_tree
+ // lexer.skip_token();
+
+ return std::unique_ptr (
+ new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path),
+ locus));
+ case COMMA:
+ case RIGHT_CURLY:
+ // this may occur in recursive calls - assume it is ok and ignore it
+ return std::unique_ptr (
+ new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path),
+ locus));
+ default:
+ add_error (Error (t->get_locus (),
+ "unexpected token %qs in use tree with valid path",
+ t->get_token_description ()));
+
+ // skip_after_semicolon();
+ return nullptr;
+ }
+ }
+}
+
+// Parses a function (not a method).
+template
+std::unique_ptr
+Parser::parse_function (AST::Visibility vis,
+ AST::AttrVec outer_attrs)
+{
+ Location locus = lexer.peek_token ()->get_locus ();
+ // Get qualifiers for function if they exist
+ AST::FunctionQualifiers qualifiers = parse_function_qualifiers ();
+
+ skip_token (FN_TOK);
+
+ // Save function name token
+ const_TokenPtr function_name_tok = expect_token (IDENTIFIER);
+ if (function_name_tok == nullptr)
+ {
+ skip_after_next_block ();
+ return nullptr;
+ }
+ Identifier function_name = function_name_tok->get_str ();
+
+ // parse generic params - if exist
+ std::vector> generic_params
+ = parse_generic_params_in_angles ();
+
+ if (!skip_token (LEFT_PAREN))
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "function declaration missing opening parentheses before "
+ "parameter list");
+ add_error (std::move (error));
+
+ skip_after_next_block ();
+ return nullptr;
+ }
+
+ // parse function parameters (only if next token isn't right paren)
+ std::vector function_params;
+ if (lexer.peek_token ()->get_id () != RIGHT_PAREN)
+ function_params
+ = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; });
+
+ if (!skip_token (RIGHT_PAREN))
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "function declaration missing closing parentheses after "
+ "parameter list");
+ add_error (std::move (error));
+
+ skip_after_next_block ();
+ return nullptr;
+ }
+
+ // parse function return type - if exists
+ std::unique_ptr return_type = parse_function_return_type ();
+
+ // parse where clause - if exists
+ AST::WhereClause where_clause = parse_where_clause ();
+
+ // parse block expression
+ std::unique_ptr block_expr = parse_block_expr ();
+
+ return std::unique_ptr (
+ new AST::Function (std::move (function_name), std::move (qualifiers),
+ std::move (generic_params), std::move (function_params),
+ std::move (return_type), std::move (where_clause),
+ std::move (block_expr), std::move (vis),
+ std::move (outer_attrs), locus));
+}
+
+// Parses function or method qualifiers (i.e. const, unsafe, and extern).
+template
+AST::FunctionQualifiers
+Parser::parse_function_qualifiers ()
+{
+ AsyncConstStatus const_status = NONE;
+ bool has_unsafe = false;
+ bool has_extern = false;
+ std::string abi;
+
+ // Check in order of const, unsafe, then extern
+ const_TokenPtr t = lexer.peek_token ();
+ Location locus = t->get_locus ();
+ switch (t->get_id ())
+ {
+ case CONST:
+ lexer.skip_token ();
+ const_status = CONST_FN;
+ break;
+ case ASYNC:
+ lexer.skip_token ();
+ const_status = ASYNC_FN;
+ break;
+ default:
+ // const status is still none
+ break;
+ }
+
+ if (lexer.peek_token ()->get_id () == UNSAFE)
+ {
+ lexer.skip_token ();
+ has_unsafe = true;
+ }
+
+ if (lexer.peek_token ()->get_id () == EXTERN_TOK)
+ {
+ lexer.skip_token ();
+ has_extern = true;
+
+ // detect optional abi name
+ const_TokenPtr next_tok = lexer.peek_token ();
+ if (next_tok->get_id () == STRING_LITERAL)
+ {
+ lexer.skip_token ();
+ abi = next_tok->get_str ();
+ }
+ }
+
+ return AST::FunctionQualifiers (locus, const_status, has_unsafe, has_extern,
+ std::move (abi));
+}
+
+// Parses generic (lifetime or type) params inside angle brackets (optional).
+template
+std::vector>
+Parser::parse_generic_params_in_angles ()
+{
+ if (lexer.peek_token ()->get_id () != LEFT_ANGLE)
+ {
+ // seems to be no generic params, so exit with empty vector
+ return std::vector> ();
+ }
+ lexer.skip_token ();
+
+ // DEBUG:
+ rust_debug ("skipped left angle in generic param");
+
+ std::vector> generic_params
+ = parse_generic_params (is_right_angle_tok);
+
+ // DEBUG:
+ rust_debug ("finished parsing actual generic params (i.e. inside angles)");
+
+ if (!skip_generics_right_angle ())
+ {
+ // DEBUG
+ rust_debug ("failed to skip generics right angle - returning empty "
+ "generic params");
+
+ return std::vector> ();
+ }
+
+ return generic_params;
+}
+
+template
+template
+std::unique_ptr
+Parser::parse_generic_param (EndTokenPred is_end_token)
+{
+ auto token = lexer.peek_token ();
+ auto outer_attrs = parse_outer_attribute ();
+ std::unique_ptr param;
+
+ switch (token->get_id ())
+ {
+ case LIFETIME: {
+ auto lifetime = parse_lifetime ();
+ if (lifetime.is_error ())
+ {
+ rust_error_at (
+ token->get_locus (),
+ "failed to parse lifetime in generic parameter list");
+ return nullptr;
+ }
+
+ std::vector lifetime_bounds;
+ if (lexer.peek_token ()->get_id () == COLON)
+ {
+ lexer.skip_token ();
+ // parse required bounds
+ lifetime_bounds
+ = parse_lifetime_bounds ([is_end_token] (TokenId id) {
+ return is_end_token (id) || id == COMMA;
+ });
+ }
+
+ param = std::unique_ptr (new AST::LifetimeParam (
+ std::move (lifetime), std::move (lifetime_bounds),
+ std::move (outer_attrs), token->get_locus ()));
+ break;
+ }
+ case IDENTIFIER: {
+ auto type_ident = token->get_str ();
+ lexer.skip_token ();
+
+ std::vector> type_param_bounds;
+ if (lexer.peek_token ()->get_id () == COLON)
+ {
+ lexer.skip_token ();
+
+ // parse optional type param bounds
+ type_param_bounds = parse_type_param_bounds ();
+ }
+
+ std::unique_ptr type = nullptr;
+ if (lexer.peek_token ()->get_id () == EQUAL)
+ {
+ lexer.skip_token ();
+
+ // parse required type
+ type = parse_type ();
+ if (!type)
+ {
+ rust_error_at (
+ lexer.peek_token ()->get_locus (),
+ "failed to parse type in type param in generic params");
+ return nullptr;
+ }
+ }
+
+ param = std::unique_ptr (
+ new AST::TypeParam (std::move (type_ident), token->get_locus (),
+ std::move (type_param_bounds), std::move (type),
+ std::move (outer_attrs)));
+ break;
+ }
+ case CONST: {
+ lexer.skip_token ();
+
+ auto name_token = expect_token (IDENTIFIER);
+
+ if (!name_token || !expect_token (COLON))
+ return nullptr;
+
+ auto type = parse_type ();
+ if (!type)
+ return nullptr;
+
+ // optional default value
+ auto default_expr = AST::GenericArg::create_error ();
+ if (lexer.peek_token ()->get_id () == EQUAL)
+ {
+ lexer.skip_token ();
+ auto tok = lexer.peek_token ();
+ default_expr = parse_generic_arg ();
+
+ if (default_expr.is_error ())
+ rust_error_at (tok->get_locus (),
+ "invalid token for start of default value for "
+ "const generic parameter: expected %, "
+ "% or %, got %qs",
+ token_id_to_str (tok->get_id ()));
+
+ // At this point, we *know* that we are parsing a const
+ // expression
+ if (default_expr.get_kind () == AST::GenericArg::Kind::Either)
+ default_expr = default_expr.disambiguate_to_const ();
+ }
+
+ param = std::unique_ptr (
+ new AST::ConstGenericParam (name_token->get_str (), std::move (type),
+ default_expr, std::move (outer_attrs),
+ token->get_locus ()));
+
+ break;
+ }
+ default:
+ // FIXME: Can we clean this last call with a method call?
+ rust_error_at (token->get_locus (),
+ "unexpected token when parsing generic parameters: %qs",
+ token->get_str ().c_str ());
+ return nullptr;
+ }
+
+ return param;
+}
+
+/* Parse generic (lifetime or type) params NOT INSIDE ANGLE BRACKETS!!! Almost
+ * always parse_generic_params_in_angles is what is wanted. */
+template
+template
+std::vector>
+Parser::parse_generic_params (EndTokenPred is_end_token)
+{
+ std::vector> generic_params;
+
+ /* can't parse lifetime and type params separately due to lookahead issues
+ * thus, parse them all here */
+
+ /* HACK: used to retain attribute data if a lifetime param is tentatively
+ * parsed but it turns out to be type param */
+ AST::Attribute parsed_outer_attr = AST::Attribute::create_empty ();
+
+ // Did we parse a generic type param yet
+ auto type_seen = false;
+ // Did the user write a lifetime parameter after a type one
+ auto order_error = false;
+
+ // parse lifetime params
+ while (!is_end_token (lexer.peek_token ()->get_id ()))
+ {
+ auto param = parse_generic_param (is_end_token);
+ if (param)
+ {
+ // TODO: Handle `Const` here as well if necessary
+ if (param->get_kind () == AST::GenericParam::Kind::Type)
+ type_seen = true;
+ else if (param->get_kind () == AST::GenericParam::Kind::Lifetime
+ && type_seen)
+ order_error = true;
+
+ generic_params.emplace_back (std::move (param));
+ maybe_skip_token (COMMA);
+ }
+ }
+
+ // FIXME: Add reordering hint
+ if (order_error)
+ rust_error_at (generic_params.front ()->get_locus (),
+ "invalid order for generic parameters: lifetimes should "
+ "always come before types");
+
+ generic_params.shrink_to_fit ();
+ return generic_params;
+}
+
+/* Parses lifetime generic parameters (pointers). Will also consume any
+ * trailing comma. No extra checks for end token. */
+template
+std::vector>
+Parser::parse_lifetime_params ()
+{
+ std::vector> lifetime_params;
+
+ while (lexer.peek_token ()->get_id () != END_OF_FILE)
+ {
+ AST::LifetimeParam lifetime_param = parse_lifetime_param ();
+
+ if (lifetime_param.is_error ())
+ {
+ // can't treat as error as only way to get out with trailing comma
+ break;
+ }
+
+ lifetime_params.push_back (std::unique_ptr (
+ new AST::LifetimeParam (std::move (lifetime_param))));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ // skip commas, including trailing commas
+ lexer.skip_token ();
+ }
+
+ lifetime_params.shrink_to_fit ();
+
+ return lifetime_params;
+}
+
+/* Parses lifetime generic parameters (pointers). Will also consume any
+ * trailing comma. Has extra is_end_token predicate checking. */
+template
+template
+std::vector>
+Parser::parse_lifetime_params (EndTokenPred is_end_token)
+{
+ std::vector> lifetime_params;
+
+ // if end_token is not specified, it defaults to EOF, so should work fine
+ while (!is_end_token (lexer.peek_token ()->get_id ()))
+ {
+ AST::LifetimeParam lifetime_param = parse_lifetime_param ();
+
+ if (lifetime_param.is_error ())
+ {
+ /* TODO: is it worth throwing away all lifetime params just because
+ * one failed? */
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse lifetime param in lifetime params");
+ add_error (std::move (error));
+
+ return {};
+ }
+
+ lifetime_params.push_back (std::unique_ptr (
+ new AST::LifetimeParam (std::move (lifetime_param))));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ // skip commas, including trailing commas
+ lexer.skip_token ();
+ }
+
+ lifetime_params.shrink_to_fit ();
+
+ return lifetime_params;
+}
+
+/* Parses lifetime generic parameters (objects). Will also consume any
+ * trailing comma. No extra checks for end token.
+ * TODO: is this best solution? implements most of the same algorithm. */
+template
+std::vector
+Parser::parse_lifetime_params_objs ()
+{
+ std::vector lifetime_params;
+
+ // bad control structure as end token cannot be guaranteed
+ while (true)
+ {
+ AST::LifetimeParam lifetime_param = parse_lifetime_param ();
+
+ if (lifetime_param.is_error ())
+ {
+ // not an error as only way to exit if trailing comma
+ break;
+ }
+
+ lifetime_params.push_back (std::move (lifetime_param));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ // skip commas, including trailing commas
+ lexer.skip_token ();
+ }
+
+ lifetime_params.shrink_to_fit ();
+
+ return lifetime_params;
+}
+
+/* Parses lifetime generic parameters (objects). Will also consume any
+ * trailing comma. Has extra is_end_token predicate checking.
+ * TODO: is this best solution? implements most of the same algorithm. */
+template
+template
+std::vector
+Parser::parse_lifetime_params_objs (
+ EndTokenPred is_end_token)
+{
+ std::vector lifetime_params;
+
+ while (!is_end_token (lexer.peek_token ()->get_id ()))
+ {
+ AST::LifetimeParam lifetime_param = parse_lifetime_param ();
+
+ if (lifetime_param.is_error ())
+ {
+ /* TODO: is it worth throwing away all lifetime params just because
+ * one failed? */
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse lifetime param in lifetime params");
+ add_error (std::move (error));
+
+ return {};
+ }
+
+ lifetime_params.push_back (std::move (lifetime_param));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ // skip commas, including trailing commas
+ lexer.skip_token ();
+ }
+
+ lifetime_params.shrink_to_fit ();
+
+ return lifetime_params;
+}
+
+/* Parses a sequence of a certain grammar rule in object form (not pointer or
+ * smart pointer), delimited by commas and ending when 'is_end_token' is
+ * satisfied (templated). Will also consume any trailing comma.
+ * FIXME: this cannot be used due to member function pointer problems (i.e.
+ * parsing_function cannot be specified properly) */
+template
+template
+auto
+Parser::parse_non_ptr_sequence (
+ ParseFunction parsing_function, EndTokenPred is_end_token,
+ std::string error_msg) -> std::vector
+{
+ std::vector params;
+
+ while (!is_end_token (lexer.peek_token ()->get_id ()))
+ {
+ auto param = parsing_function ();
+
+ if (param.is_error ())
+ {
+ // TODO: is it worth throwing away all params just because one
+ // failed?
+ Error error (lexer.peek_token ()->get_locus (),
+ std::move (error_msg));
+ add_error (std::move (error));
+
+ return {};
+ }
+
+ params.push_back (std::move (param));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ // skip commas, including trailing commas
+ lexer.skip_token ();
+ }
+
+ params.shrink_to_fit ();
+
+ return params;
+}
+
+/* Parses a single lifetime generic parameter (not including comma). */
+template
+AST::LifetimeParam
+Parser::parse_lifetime_param ()
+{
+ // parse outer attribute, which is optional and may not exist
+ AST::Attribute outer_attr = parse_outer_attribute ();
+
+ // save lifetime token - required
+ const_TokenPtr lifetime_tok = lexer.peek_token ();
+ if (lifetime_tok->get_id () != LIFETIME)
+ {
+ // if lifetime is missing, must not be a lifetime param, so return null
+ return AST::LifetimeParam::create_error ();
+ }
+ lexer.skip_token ();
+ /* TODO: does this always create a named lifetime? or can a different type
+ * be made? */
+ AST::Lifetime lifetime (AST::Lifetime::NAMED, lifetime_tok->get_str (),
+ lifetime_tok->get_locus ());
+
+ // parse lifetime bounds, if it exists
+ std::vector lifetime_bounds;
+ if (lexer.peek_token ()->get_id () == COLON)
+ {
+ // parse lifetime bounds
+ lifetime_bounds = parse_lifetime_bounds ();
+ // TODO: have end token passed in?
+ }
+
+ return AST::LifetimeParam (std::move (lifetime), std::move (lifetime_bounds),
+ std::move (outer_attr),
+ lifetime_tok->get_locus ());
+}
+
+// Parses type generic parameters. Will also consume any trailing comma.
+template
+std::vector>
+Parser::parse_type_params ()
+{
+ std::vector> type_params;
+
+ // infinite loop with break on failure as no info on ending token
+ while (true)
+ {
+ std::unique_ptr type_param = parse_type_param ();
+
+ if (type_param == nullptr)
+ {
+ // break if fails to parse
+ break;
+ }
+
+ type_params.push_back (std::move (type_param));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ // skip commas, including trailing commas
+ lexer.skip_token ();
+ }
+
+ type_params.shrink_to_fit ();
+ return type_params;
+}
+
+// Parses type generic parameters. Will also consume any trailing comma.
+template
+template
+std::vector>
+Parser::parse_type_params (EndTokenPred is_end_token)
+{
+ std::vector> type_params;
+
+ while (!is_end_token (lexer.peek_token ()->get_id ()))
+ {
+ std::unique_ptr type_param = parse_type_param ();
+
+ if (type_param == nullptr)
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse type param in type params");
+ add_error (std::move (error));
+
+ return {};
+ }
+
+ type_params.push_back (std::move (type_param));
+
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ // skip commas, including trailing commas
+ lexer.skip_token ();
+ }
+
+ type_params.shrink_to_fit ();
+ return type_params;
+ /* TODO: this shares most code with parse_lifetime_params - good place to
+ * use template (i.e. parse_non_ptr_sequence if doable) */
+}
+
+/* Parses a single type (generic) parameter, not including commas. May change
+ * to return value. */
+template
+std::unique_ptr
+Parser::parse_type_param ()
+{
+ // parse outer attribute, which is optional and may not exist
+ AST::Attribute outer_attr = parse_outer_attribute ();
+
+ const_TokenPtr identifier_tok = lexer.peek_token ();
+ if (identifier_tok->get_id () != IDENTIFIER)
+ {
+ // return null as type param can't exist without this required
+ // identifier
+ return nullptr;
+ }
+ // TODO: create identifier from identifier token
+ Identifier ident = identifier_tok->get_str ();
+ lexer.skip_token ();
+
+ // parse type param bounds (if they exist)
+ std::vector> type_param_bounds;
+ if (lexer.peek_token ()->get_id () == COLON)
+ {
+ lexer.skip_token ();
+
+ // parse type param bounds, which may or may not exist
+ type_param_bounds = parse_type_param_bounds ();
+ }
+
+ // parse type (if it exists)
+ std::unique_ptr type = nullptr;
+ if (lexer.peek_token ()->get_id () == EQUAL)
+ {
+ lexer.skip_token ();
+
+ // parse type (now required)
+ type = parse_type ();
+ if (type == nullptr)
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse type in type param");
+ add_error (std::move (error));
+
+ return nullptr;
+ }
+ }
+
+ return std::unique_ptr (
+ new AST::TypeParam (std::move (ident), identifier_tok->get_locus (),
+ std::move (type_param_bounds), std::move (type),
+ std::move (outer_attr)));
+}
+
+/* Parses regular (i.e. non-generic) parameters in functions or methods. Also
+ * has end token handling. */
+template
+template
+std::vector
+Parser::parse_function_params (EndTokenPred is_end_token)
+{
+ std::vector params;
+
+ if (is_end_token (lexer.peek_token ()->get_id ()))
+ return params;
+
+ AST::FunctionParam initial_param = parse_function_param ();
+
+ // Return empty parameter list if no parameter there
+ if (initial_param.is_error ())
+ {
+ // TODO: is this an error?
+ return params;
+ }
+
+ params.push_back (std::move (initial_param));
+
+ // maybe think of a better control structure here - do-while with an initial
+ // error state? basically, loop through parameter list until can't find any
+ // more params
+ const_TokenPtr t = lexer.peek_token ();
+ while (t->get_id () == COMMA)
+ {
+ // skip comma if applies
+ lexer.skip_token ();
+
+ // TODO: strictly speaking, shouldn't there be no trailing comma?
+ if (is_end_token (lexer.peek_token ()->get_id ()))
+ break;
+
+ // now, as right paren would break, function param is required
+ AST::FunctionParam param = parse_function_param ();
+ if (param.is_error ())
+ {
+ Error error (lexer.peek_token ()->get_locus (),
+ "failed to parse function param (in function params)");
+ add_error (std::move (error));
+
+ // skip somewhere?
+ return std::vector ();
+ }
+
+ params.push_back (std::move (param));
+
+ t = lexer.peek_token ();
+ }
+
+ params.shrink_to_fit ();
+ return params;
+}
+
+/* Parses a single regular (i.e. non-generic) parameter in a function or
+ * method, i.e. the "name: type" bit. Also handles it not existing. */
+template
+AST::FunctionParam
+Parser::parse_function_param ()
+{
+ // parse outer attributes if they exist
+ AST::AttrVec outer_attrs = parse_outer_attributes ();
+
+ // TODO: should saved location be at start of outer attributes or pattern?
+ Location locus = lexer.peek_token ()->get_locus ();
+ std::unique_ptr param_pattern = parse_pattern ();
+
+ // create error function param if it doesn't exist
+ if (param_pattern == nullptr)
+ {
+ // skip after something
+ return AST::FunctionParam::create_error ();
+ }
+
+ if (!skip_token (COLON))
+ {
+ // skip after something
+ return AST::FunctionParam::create_error ();
+ }
+
+ std::unique_ptr param_type = parse_type ();
+ if (param_type == nullptr)
+ {
+ // skip?
+ return AST::FunctionParam::create_error ();
+ }
+
+ return AST::FunctionParam (std::move (param_pattern), std::move (param_type),
+ std::move (outer_attrs), locus);
+}
+
+/* Parses a function or method return type syntactical construction. Also
+ * handles a function return type not existing. */
+template
+std::unique_ptr
+Parser::parse_function_return_type ()
+{
+ if (lexer.peek_token ()->get_id () != RETURN_TYPE)
+ return nullptr;
+
+ // skip return type, as it now obviously exists
+ lexer.skip_token ();
+
+ std::unique_ptr type = parse_type ();
+
+ return type;
+}
+
+/* Parses a "where clause" (in a function, struct, method, etc.). Also handles
+ * a where clause not existing, in which it will return
+ * WhereClause::create_empty(), which can be checked via
+ * WhereClause::is_empty(). */
+template
+AST::WhereClause
+Parser::parse_where_clause ()
+{
+ const_TokenPtr where_tok = lexer.peek_token ();
+ if (where_tok->get_id () != WHERE)
+ {
+ // where clause doesn't exist, so create empty one
+ return AST::WhereClause::create_empty ();
+ }
+
+ lexer.skip_token ();
+
+ /* parse where clause items - this is not a separate rule in the reference
+ * so won't be here */
+ std::vector> where_clause_items;
+
+ /* HACK: where clauses end with a right curly or semicolon or equals in all
+ * uses currently */
+ const_TokenPtr t = lexer.peek_token ();
+ while (t->get_id () != LEFT_CURLY && t->get_id () != SEMICOLON
+ && t->get_id () != EQUAL)
+ {
+ std::unique_ptr where_clause_item
+ = parse_where_clause_item ();
+
+ if (where_clause_item == nullptr)
+ {
+ Error error (t->get_locus (), "failed to parse where clause item");
+ add_error (std::move (error));
+
+ return AST::WhereClause::create_empty ();
+ }
+
+ where_clause_items.push_back (std::move (where_clause_item));
+
+ // also skip comma if it exists
+ if (lexer.peek_token ()->get_id () != COMMA)
+ break;
+
+ lexer.skip_token ();
+ t = lexer.peek_token ();
+ }
+
+ where_clause_items.shrink_to_fit ();
+ return AST::WhereClause (std::move (where_clause_items));
+}
+
+/* Parses a where clause item (lifetime or type bound). Does not parse any
+ * commas. */
+template
+std::unique_ptr
+Parser::parse_where_clause_item ()
+{
+ // shitty cheat way of determining lifetime or type bound - test for
+ // lifetime
+ const_TokenPtr t = lexer.peek_token ();
+
+ if (t->get_id () == LIFETIME)
+ return parse_lifetime_where_clause_item ();
+ else
+ return parse_type_bound_where_clause_item ();
+}
+
+// Parses a lifetime where clause item.
+template
+std::unique_ptr
+Parser::parse_lifetime_where_clause_item ()
+{
+ AST::Lifetime lifetime = parse_lifetime ();
+ if (lifetime.is_error ())
+ {
+ // TODO: error here?
+ return nullptr;
+ }
+
+ if (!skip_token (COLON))
+ {
+ // TODO: skip after somewhere
+ return nullptr;
+ }
+
+ std::vector