Drastically simplified Input class. Now most of the heavy lifting is done by in-built yy_scan_string and yy_scan_bytes. Comment handling will be done by the lexer.

git-svn-id: https://angleproject.googlecode.com/svn/trunk@1051 736b8ea6-26fd-11df-bfd4-992fa37f6226
author: alokp@chromium.org <alokp@chromium.org@736b8ea6-26fd-11df-bfd4-992fa37f6226> 2012-04-24 23:07:34 +0000
committer: alokp@chromium.org <alokp@chromium.org@736b8ea6-26fd-11df-bfd4-992fa37f6226> 2012-04-24 23:07:34 +0000
commit: 28182485765b0fb9cfc86abd0649f500ccb050c7 (patch)
tree: 883e8b6efba22a5ced4164ecf020d2b057b16067
parent: 23ff36a03d80588f25062fc625019a190ad9e621 (diff)
download: angle_dx11-28182485765b0fb9cfc86abd0649f500ccb050c7.tar.gz
7 files changed, 107 insertions, 252 deletions
diff --git a/src/compiler/preprocessor/new/Input.cpp b/src/compiler/preprocessor/new/Input.cpp
index 94f5b842..a080ef52 100644
--- a/src/compiler/preprocessor/new/Input.cpp
+++ b/src/compiler/preprocessor/new/Input.cpp
@@ -6,158 +6,11 @@
 
 #include "Input.h"
 
-#include <cassert>
-#include <cstdio>
-
 namespace pp
 {
 
-Input::Input(int count, const char* const string[], const int length[])
-    : mCount(count),
-      mString(string),
-      mLength(length),
-      mIndex(-1),
-      mSize(0),
-      mError(kErrorNone),
-      mState(kStateInitial)
-{
-    assert(mCount >= 0);
-    switchToNextString();
-}
-
-bool Input::eof() const
-{
-    assert(mIndex <= mCount);
-    return mIndex == mCount;
-}
-
-int Input::read(char* buf, int bufSize)
-{
-    int nread = 0;
-    int startIndex = mIndex;
-    // Keep reading until the buffer is full or the current string is exhausted.
-    while ((mIndex == startIndex) && (nread < bufSize))
-    {
-        int c = getChar();
-        if (c == EOF)
-        {
-            if (mState == kStateBlockComment)
-                mError = kErrorUnexpectedEOF;
-            break;
-        }
-
-        switch (mState)
-        {
-          case kStateInitial:
-            if (c == '/')
-            {
-                // Potentially a comment.
-                switch (peekChar())
-                {
-                  case '/':
-                    getChar();  // Eat '/'.
-                    mState = kStateLineComment;
-                    break;
-                  case '*':
-                    getChar();  // Eat '*'.
-                    mState = kStateBlockComment;
-                    break;
-                  default:
-                    // Not a comment.
-                    buf[nread++] = c;
-                    break;
-                }
-            } else
-            {
-                buf[nread++] = c;
-            }
-            break;
-
-          case kStateLineComment:
-            if (c == '\n')
-            {
-                buf[nread++] = c;
-                mState = kStateInitial;
-            }
-            break;
-
-          case kStateBlockComment:
-            if (c == '*' && (peekChar() == '/'))
-            {
-                getChar();   // Eat '/'.
-                buf[nread++] = ' ';  // Replace comment with whitespace.
-                mState = kStateInitial;
-            } else if (c == '\n')
-            {
-                // Line breaks are never skipped.
-                buf[nread++] = c;
-            }
-            break;
-
-          default:
-            assert(false);
-            break;
-        }
-    }
-
-    return nread;
-}
-
-int Input::getChar()
-{
-    if (eof()) return EOF;
-
-    const char* str = mString[mIndex];
-    int c = str[mSize++];
-
-    // Switch to next string if the current one is fully read.
-    int length = stringLength(mIndex);
-    // We never read from empty string.
-    assert(length != 0);
-    if (((length < 0) && (str[mSize] == '\0')) ||
-        ((length > 0) && (mSize == length)))
-        switchToNextString();
-
-    return c;
-}
-
-int Input::peekChar()
-{
-    // Save the current read position.
-    int index = mIndex;
-    int size = mSize;
-    int c = getChar();
-
-    // Restore read position.
-    mIndex = index;
-    mSize = size;
-    return c;
-}
-
-void Input::switchToNextString()
-{
-    assert(mIndex < mCount);
-
-    mSize = 0;
-    do
-    {
-        ++mIndex;
-    } while (!eof() && isStringEmpty(mIndex));
-}
-
-bool Input::isStringEmpty(int index)
-{
-    assert(index < mCount);
-
-    const char* str = mString[mIndex];
-    int length = stringLength(mIndex);
-    return (length == 0) || ((length < 0) && (str[0] == '\0'));
-}
-
-int Input::stringLength(int index)
+Input::Input() : count(0), string(0), length(0), index(0), buffer(0)
 {
-    assert(index < mCount);
-    return mLength ? mLength[index] : -1;
 }
 
 }  // namespace pp
diff --git a/src/compiler/preprocessor/new/Input.h b/src/compiler/preprocessor/new/Input.h
index 5a1b5d1a..478a9217 100644
--- a/src/compiler/preprocessor/new/Input.h
+++ b/src/compiler/preprocessor/new/Input.h
@@ -10,63 +10,19 @@
 namespace pp
 {
 
-// Reads the given set of strings into input buffer.
-// Strips comments.
-class Input
+// Holds lexer input.
+struct Input
 {
-  public:
-    Input(int count, const char* const string[], const int length[]);
-
-    enum Error
-    {
-        kErrorNone,
-        kErrorUnexpectedEOF
-    };
-    Error error() const { return mError; }
-
-    // Returns the index of string currently being scanned.
-    int stringIndex() const { return mIndex; }
-    // Returns true if EOF has reached.
-    bool eof() const;
-
-    // Reads up to bufSize characters into buf.
-    // Returns the number of characters read.
-    // It replaces each comment by a whitespace. It reads only one string
-    // at a time so that the lexer has opportunity to update the string number
-    // for meaningful diagnostic messages.
-    int read(char* buf, int bufSize);
-
-private:
-    enum State
-    {
-        kStateInitial,
-        kStateLineComment,
-        kStateBlockComment
-    };
-
-    int getChar();
-    int peekChar();
-    // Switches input buffer to the next non-empty string.
-    // This is called when the current string is fully read.
-    void switchToNextString();
-    // Returns true if the given string is empty.
-    bool isStringEmpty(int index);
-    // Return the length of the given string.
-    // Returns a negative value for null-terminated strings.
-    int stringLength(int index);
-
     // Input.
-    int mCount;
-    const char* const* mString;
-    const int* mLength;
+    int count;
+    const char* const* string;
+    const int* length;
 
     // Current read position.
-    int mIndex;   // Index of string currently being scanned.
-    int mSize;    // Size of string already scanned.
+    int index;  // Index of string currently being scanned.
+    void* buffer;  // Current buffer handle.
 
-    // Current error and state.
-    Error mError;
-    State mState;
+    Input();
 };
 
 }  // namespace pp
diff --git a/src/compiler/preprocessor/new/Lexer.cpp b/src/compiler/preprocessor/new/Lexer.cpp
index ca00cdb3..00a8d3a3 100644
--- a/src/compiler/preprocessor/new/Lexer.cpp
+++ b/src/compiler/preprocessor/new/Lexer.cpp
@@ -8,27 +8,30 @@
 
 #include <cassert>
 
-#include "Input.h"
-
 namespace pp
 {
 
-Lexer::Lexer() : mHandle(0), mLeadingSpace(false)
+Lexer::Lexer() : mHandle(0)
 {
 }
 
 Lexer::~Lexer()
 {
     destroyLexer();
+
+    // Make sure the lexer and associated buffer are deleted.
+    assert(mHandle == 0);
+    assert(mInput.buffer == 0);
 }
 
 bool Lexer::init(int count, const char* const string[], const int length[])
 {
-    assert((count >= 0) && (string));
-    if ((count < 0) || (!string))
-        return false;
+    assert((count >= 0) && string);
+
+    mInput.count = count;
+    mInput.string = string;
+    mInput.length = length;
 
-    mInput.reset(new Input(count, string, length));
     return initLexer();
 }
 
diff --git a/src/compiler/preprocessor/new/Lexer.h b/src/compiler/preprocessor/new/Lexer.h
index 33c66e69..3f705dfa 100644
--- a/src/compiler/preprocessor/new/Lexer.h
+++ b/src/compiler/preprocessor/new/Lexer.h
@@ -7,14 +7,12 @@
 #ifndef COMPILER_PREPROCESSOR_LEXER_H_
 #define COMPILER_PREPROCESSOR_LEXER_H_
 
-#include <memory>
-
+#include "Input.h"
 #include "pp_utils.h"
 
 namespace pp
 {
 
-class Input;
 struct Token;
 
 class Lexer
@@ -33,8 +31,7 @@ class Lexer
     void destroyLexer();
 
     void* mHandle;  // Lexer handle.
-    bool mLeadingSpace;
-    std::auto_ptr<Input> mInput;  // Input buffer.
+    Input mInput;  // Input buffer.
 };
 
 }  // namespace pp
diff --git a/src/compiler/preprocessor/new/pp.l b/src/compiler/preprocessor/new/pp.l
index b7a251b4..9e47e609 100644
--- a/src/compiler/preprocessor/new/pp.l
+++ b/src/compiler/preprocessor/new/pp.l
@@ -36,13 +36,12 @@ typedef pp::Token::Location YYLTYPE;
         yylloc->string = 0;      \
     } while(0);
 
-#define YY_INPUT(buf, result, maxSize) \
-    result = readInput(buf, maxSize, yyscanner);
-
-static int readInput(char* buf, int maxSize, yyscan_t scanner);
+// Suppress the default implementation of YY_INPUT which generated
+// compiler warnings.
+#define YY_INPUT
 %}
 
-%option noyywrap nounput never-interactive
+%option nounput never-interactive
 %option reentrant bison-bridge bison-locations
 %option prefix="pp"
 %option extra-type="pp::Input*"
@@ -116,7 +115,7 @@ FRACTIONAL_CONSTANT  ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".")
 }
 
 . {
-    yylval->push_back(yytext[0]);
+    yylval->assign(yytext, yyleng);
     return pp::Token::INVALID_CHARACTER;
 }
 
@@ -124,41 +123,59 @@ FRACTIONAL_CONSTANT  ({DIGIT}*"."{DIGIT}+)|({DIGIT}+".")
 
 %%
 
-int readInput(char* buf, int maxSize, yyscan_t scanner)
+int ppwrap(yyscan_t scanner)
 {
-    int nread = YY_NULL;
     pp::Input* input = yyget_extra(scanner);
-    while (!input->eof() &&
-           (input->error() == pp::Input::kErrorNone) &&
-           (nread == YY_NULL))
+
+    // Delete the current buffer before switching to the next one.
+    YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(input->buffer);
+    if (buffer != NULL)
     {
-        nread = input->read(buf, maxSize);
+        yy_delete_buffer(buffer, scanner);
+        input->buffer = NULL;
     }
-    return nread;
+
+    int index = std::min(input->index + 1, input->count);
+    if (index == input->count)
+        return 1;  // EOF reached.
+
+    int length = input->length ? input->length[index] : -1;
+    if (length < 0)  // NULL terminated string.
+        buffer = yy_scan_string(input->string[index], scanner);
+    else
+        buffer = yy_scan_bytes(input->string[index], length, scanner);
+
+    // TODO(alokp): Increment token location.
+    input->index = index;
+    input->buffer = buffer;
+    return 0;
 }
 
 namespace pp {
 
 int Lexer::lex(Token* token)
 {
+    bool leadingSpace = false;
     token->type = yylex(&token->value, &token->location, mHandle);
     while (token->type == ' ')
     {
-        mLeadingSpace = true;
+        leadingSpace = true;
         token->type = yylex(&token->value, &token->location, mHandle);
     }
-    token->setHasLeadingSpace(mLeadingSpace);
-    mLeadingSpace = false;
+    token->setHasLeadingSpace(leadingSpace);
 
     return token->type;
 }
 
 bool Lexer::initLexer()
 {
-    if ((mHandle == NULL) && yylex_init_extra(mInput.get(), &mHandle))
+    if ((mHandle == NULL) && yylex_init_extra(&mInput, &mHandle))
         return false;
 
-    yyrestart(0, mHandle);
+    // Setup first scan string.
+    mInput.index = -1;
+    ppwrap(mHandle);
+
     return true;
 }
 
@@ -167,6 +184,13 @@ void Lexer::destroyLexer()
     if (mHandle == NULL)
         return;
 
+    YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(mInput.buffer);
+    if (buffer != NULL)
+    {
+        yy_delete_buffer(buffer, mHandle);
+        mInput.buffer = NULL;
+    }
+ 
     yylex_destroy(mHandle);
     mHandle = NULL;
 }
diff --git a/src/compiler/preprocessor/new/pp_lex.cpp b/src/compiler/preprocessor/new/pp_lex.cpp
index 8debc69a..7b9f1d6f 100644
--- a/src/compiler/preprocessor/new/pp_lex.cpp
+++ b/src/compiler/preprocessor/new/pp_lex.cpp
@@ -334,9 +334,6 @@ void ppfree (void * ,yyscan_t yyscanner );
 
 /* Begin user sect3 */
 
-#define ppwrap(n) 1
-#define YY_SKIP_YYWRAP
-
 typedef unsigned char YY_CHAR;
 
 typedef int yy_state_type;
@@ -520,10 +517,9 @@ typedef pp::Token::Location YYLTYPE;
         yylloc->string = 0;      \
     } while(0);
 
-#define YY_INPUT(buf, result, maxSize) \
-    result = readInput(buf, maxSize, yyscanner);
-
-static int readInput(char* buf, int maxSize, yyscan_t scanner);
+// Suppress the default implementation of YY_INPUT which generated
+// compiler warnings.
+#define YY_INPUT
 
 #define INITIAL 0
 
@@ -970,7 +966,7 @@ YY_RULE_SETUP
 case 30:
 YY_RULE_SETUP
 {
-    yylval->push_back(yytext[0]);
+    yylval->assign(yytext, yyleng);
     return pp::Token::INVALID_CHARACTER;
 }
 	YY_BREAK
@@ -2116,41 +2112,59 @@ void ppfree (void * ptr , yyscan_t yyscanner)
 
 #define YYTABLES_NAME "yytables"
 
-int readInput(char* buf, int maxSize, yyscan_t scanner)
+int ppwrap(yyscan_t scanner)
 {
-    int nread = YY_NULL;
     pp::Input* input = ppget_extra(scanner);
-    while (!input->eof() &&
-           (input->error() == pp::Input::kErrorNone) &&
-           (nread == YY_NULL))
+
+    // Delete the current buffer before switching to the next one.
+    YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(input->buffer);
+    if (buffer != NULL)
     {
-        nread = input->read(buf, maxSize);
+        pp_delete_buffer(buffer,scanner);
+        input->buffer = NULL;
     }
-    return nread;
+
+    int index = std::min(input->index + 1, input->count);
+    if (index == input->count)
+        return 1;  // EOF reached.
+
+    int length = input->length ? input->length[index] : -1;
+    if (length < 0)  // NULL terminated string.
+        buffer = pp_scan_string(input->string[index],scanner);
+    else
+        buffer = pp_scan_bytes(input->string[index],length,scanner);
+
+    // TODO(alokp): Increment token location.
+    input->index = index;
+    input->buffer = buffer;
+    return 0;
 }
 
 namespace pp {
 
 int Lexer::lex(Token* token)
 {
+    bool leadingSpace = false;
     token->type = pplex(&token->value,&token->location,mHandle);
     while (token->type == ' ')
     {
-        mLeadingSpace = true;
+        leadingSpace = true;
         token->type = pplex(&token->value,&token->location,mHandle);
     }
-    token->setHasLeadingSpace(mLeadingSpace);
-    mLeadingSpace = false;
+    token->setHasLeadingSpace(leadingSpace);
 
     return token->type;
 }
 
 bool Lexer::initLexer()
 {
-    if ((mHandle == NULL) && pplex_init_extra(mInput.get(),&mHandle))
+    if ((mHandle == NULL) && pplex_init_extra(&mInput,&mHandle))
         return false;
 
-    pprestart(0,mHandle);
+    // Setup first scan string.
+    mInput.index = -1;
+    ppwrap(mHandle);
+
     return true;
 }
 
@@ -2159,6 +2173,13 @@ void Lexer::destroyLexer()
     if (mHandle == NULL)
         return;
 
+    YY_BUFFER_STATE buffer = static_cast<YY_BUFFER_STATE>(mInput.buffer);
+    if (buffer != NULL)
+    {
+        pp_delete_buffer(buffer,mHandle);
+        mInput.buffer = NULL;
+    }
+ 
     pplex_destroy(mHandle);
     mHandle = NULL;
 }
diff --git a/tests/preprocessor_tests/char_test.cpp b/tests/preprocessor_tests/char_test.cpp
index fa23d511..b54cbf76 100644
--- a/tests/preprocessor_tests/char_test.cpp
+++ b/tests/preprocessor_tests/char_test.cpp
@@ -5,6 +5,7 @@
 //
 
 #include <algorithm>
+#include <climits>
 
 #include "gtest/gtest.h"
 #include "Preprocessor.h"
@@ -101,7 +102,7 @@ TEST_P(CharTest, Identified)
 
 // Note +1 for the max-value in range. It is there because the max-value
 // not included in the range.
-INSTANTIATE_TEST_CASE_P(AllCharacters, CharTest,
-                        testing::Range(-127, 127 + 1));
+INSTANTIATE_TEST_CASE_P(All, CharTest,
+                        testing::Range(CHAR_MIN, CHAR_MAX + 1));
 
 #endif  // GTEST_HAS_PARAM_TEST
author	alokp@chromium.org <alokp@chromium.org@736b8ea6-26fd-11df-bfd4-992fa37f6226>	2012-04-24 23:07:34 +0000
committer	alokp@chromium.org <alokp@chromium.org@736b8ea6-26fd-11df-bfd4-992fa37f6226>	2012-04-24 23:07:34 +0000
commit	28182485765b0fb9cfc86abd0649f500ccb050c7 (patch)
tree	883e8b6efba22a5ced4164ecf020d2b057b16067
parent	23ff36a03d80588f25062fc625019a190ad9e621 (diff)
download	angle_dx11-28182485765b0fb9cfc86abd0649f500ccb050c7.tar.gz