Report a bug
If you spot a problem with this page, click here to create a Bugzilla issue.
Improve this page
Quickly fork, edit online, and submit a pull request for this page. Requires a signed-in GitHub account. This works well for small changes. If you'd like to make larger changes you may want to consider using a local clone.

dmd.lexer

Implements the lexical analyzer, which converts source code into lexical tokens.

Specification Lexical

Authors:

Source lexer.d

struct CompileEnv;
Values to use for various magic identifiers
uint versionNumber;
__VERSION__
const(char)[] date;
__DATE__
const(char)[] time;
__TIME__
const(char)[] vendor;
__VENDOR__
const(char)[] timestamp;
__TIMESTAMP__
bool previewIn;
in means [ref] scope const, accepts rvalues
bool ddocOutput;
collect embedded documentation comments
bool masm;
use MASM inline asm syntax
IdentifierCharLookup cCharLookupTable;
C identifier table (set to the lexer by the C parser)
IdentifierCharLookup dCharLookupTable;
D identifier table
class Lexer;
Examples:
Unittest
fprintf(stderr, "Lexer.unittest %d\n", __LINE__);

ErrorSink errorSink = new ErrorSinkStderr;

void test(T)(string sequence, T expected, bool Ccompile = false)
{
    auto p = cast(const(char)*)sequence.ptr;
    dchar c2;
    Lexer lexer = new Lexer(errorSink);
    assert(expected == lexer.escapeSequence(Loc.initial, p, Ccompile, c2));
    assert(p == sequence.ptr + sequence.length);
}

test(`'`, '\'');
test(`"`, '"');
test(`?`, '?');
test(`\`, '\\');
test(`0`, '\0');
test(`a`, '\a');
test(`b`, '\b');
test(`f`, '\f');
test(`n`, '\n');
test(`r`, '\r');
test(`t`, '\t');
test(`v`, '\v');

test(`x00`, 0x00);
test(`xff`, 0xff);
test(`xFF`, 0xff);
test(`xa7`, 0xa7);
test(`x3c`, 0x3c);
test(`xe2`, 0xe2);

test(`1`, '\1');
test(`42`, '\42');
test(`357`, '\357');

test(`u1234`, '\u1234');
test(`uf0e4`, '\uf0e4');

test(`U0001f603`, '\U0001f603');

test(`"`, '"');
test(`&lt;`, '<');
test(`&gt;`, '>');
IdentifierCharLookup charLookup;
Character table for identifiers
bool Ccompile;
true if compiling ImportC
ubyte boolsize;
size of a C Bool, default 1
ubyte shortsize;
size of a C short, default 2
ubyte intsize;
size of a C int, default 4
ubyte longsize;
size of C long, 4 or 8
ubyte long_longsize;
size of a C long long, default 8
ubyte long_doublesize;
size of C long double, 8 or D real.sizeof
ubyte wchar_tsize;
size of C wchar_t, 2 or 4
ErrorSink eSink;
send error messages through this interface
CompileEnv compileEnv;
environment
nothrow scope this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset, bool doDocComment, bool commentToken, ErrorSink errorSink, const CompileEnv* compileEnv);
Creates a Lexer for the source code base[begoffset..endoffset+1]. The last character, base[endoffset], must be null (0) or EOF (0x1A).
Parameters:
const(char)* filename used for error messages
const(char)* base source code, must be terminated by a null (0) or EOF (0x1A) character
size_t begoffset starting offset into base[]
size_t endoffset the last offset to read into base[]
bool doDocComment handle documentation comments
bool commentToken comments become TOK.comment's
ErrorSink errorSink where error messages go, must not be null
CompileEnv* compileEnv version, vendor, date, time, etc.
nothrow this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset, bool doDocComment, bool commentToken, bool whitespaceToken, ErrorSink errorSink, const CompileEnv* compileEnv = null);
Alternative entry point for DMDLIB, adds whitespaceToken
nothrow scope @safe this(ErrorSink errorSink);
Used for unittests for a mock Lexer
final nothrow void resetDefineLines(const(char)[] slice);
Reset lexer to lex #define's
final nothrow void nextDefineLine();
Set up for next #define line. p should be at start of next line.
final const pure nothrow @nogc @property @safe bool empty();
Range interface
pure nothrow @safe Token* allocateToken();
Returns:
a newly allocated Token.
final nothrow TOK peekNext();
Look ahead at next token's value.
final nothrow TOK peekNext2();
Look 2 tokens ahead at value.
final nothrow void scan(Token* t);
Turn next token in buffer into a token.
Parameters:
Token* t the token to set the resulting Token to
final nothrow Token* peekPastParen(Token* tk);
tk is on the opening (. Look ahead and return token that is past the closing ).
final nothrow TOK hexStringConstant(Token* t);
Lex hex strings: x"0A ae 34FE BD"
nothrow bool parseSpecialTokenSequence();
Parse special token sequence:
Returns:
true if the special token sequence was handled
final nothrow void poundLine(ref Token tok, bool linemarker);
Parse line/file preprocessor directive: #line linnum [filespec] Allow __LINE__ for linnum, and __FILE__ for filespec. Accept linemarker format:

linnum [filespec] {flags}

There can be zero or more flags, which are one of the digits 1..4, and must be in ascending order. The flags are ignored.
Parameters:
Token tok token we're on, which is linnum of linemarker
bool linemarker true if line marker format and lexer is on linnum
final nothrow void skipToNextLine(OutBuffer* defines = null);
Scan forward to start of next line.
Parameters:
OutBuffer* defines send characters to defines
static pure nothrow const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph);
Combine two document comments into one, separated by an extra newline if newParagraph is true.
nothrow void printRestOfTokens();
Print the tokens from the current token to the end, while not advancing the parser forward. Useful for debugging.