// Copyright 2018 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "manifest_chunk_parser.h"

#include <assert.h>

#include <thread>
#include <unordered_map>

#include "lexer.h"
#include "metrics.h"
#include "thread_pool.h"
#include "util.h"

namespace manifest_chunk {

class ChunkParser {
  const LoadedFile& file_;
  const bool experimentalEnvvar_;
  Lexer lexer_;
  const char* chunk_end_ = nullptr;
  std::vector<ParserItem>* out_ = nullptr;
  Clump* current_clump_ = nullptr;

  Clump* MakeClump() {
    if (current_clump_)
      return current_clump_;
    current_clump_ = new Clump { file_ };
    out_->push_back(current_clump_);
    return current_clump_;
  }

  void OutItem(ParserItem item) {
    current_clump_ = nullptr;
    out_->push_back(std::move(item));
  }

  bool OutError(const std::string& err) {
    OutItem(new Error { err });
    return false;
  }

  bool LexerError(const std::string& message);
  bool ExpectToken(Lexer::Token expected);
  bool ParseLet(StringPiece* key, StringPiece* value);
  bool ParseFileInclude(bool new_scope);
  bool ParsePool();
  bool ParseDefault();
  bool ParseBinding();
  bool ParseRule();
  bool ParseEdge();

public:
  ChunkParser(const LoadedFile& file,
              StringPiece chunk_content,
              std::vector<ParserItem>* out,
              bool experimentalEnvvar)
      : file_(file),
        experimentalEnvvar_(experimentalEnvvar),
        lexer_(file.filename(), file.content(), chunk_content.data()),
        chunk_end_(chunk_content.data() + chunk_content.size()),
        out_(out) {}

  bool ParseChunk();
};

bool ChunkParser::LexerError(const std::string& message) {
  std::string err;
  lexer_.Error(message, &err);
  return OutError(err);
}

bool ChunkParser::ExpectToken(Lexer::Token expected) {
  Lexer::Token token = lexer_.ReadToken();
  if (token != expected) {
    std::string message = string("expected ") + Lexer::TokenName(expected);
    message += string(", got ") + Lexer::TokenName(token);
    message += Lexer::TokenErrorHint(expected);
    return LexerError(message);
  }
  return true;
}

bool ChunkParser::ParseLet(StringPiece* key, StringPiece* value) {
  if (!lexer_.ReadIdent(key))
    return LexerError("expected variable name");
  if (!ExpectToken(Lexer::EQUALS))
    return false;
  std::string err;
  if (!lexer_.ReadBindingValue(value, &err))
    return OutError(err);
  return true;
}

bool ChunkParser::ParseFileInclude(bool new_scope) {
  Include* include = new Include();
  include->new_scope_ = new_scope;
  std::string err;
  if (!lexer_.ReadPath(&include->path_, &err))
    return OutError(err);
  include->diag_pos_ = lexer_.GetLastTokenOffset();
  if (!ExpectToken(Lexer::NEWLINE))
    return false;
  // If new_scope == false, it would be possible to just ignore the next token.
  // If a Lexer::INDENT somehow was the next token, it would fail with
  // 'ninja: build.ninja:NNN: unexpected indent'. This might be a slightly more
  // helpful message.
  if (lexer_.PeekToken(Lexer::INDENT)) {
    if (!new_scope)
      return LexerError("indent after 'include' line is invalid.");
    for (;;) {
      if (lexer_.PeekToken(Lexer::CHDIR)) {
        break;
      }
      if (experimentalEnvvar_) {
        // Accept one or more "env foo = bar\n<indent><chdir>"
        StringPiece envkey;
        if (lexer_.ReadIdent(&envkey)) {
          if (envkey.compare("env")) {
            return LexerError("unexpected \"" + envkey.AsString() +
                              "\". Only 'chdir =' allowed here.");
          }
          StringPiece key;
          StringPiece val;
          if (!lexer_.ReadIdent(&key)) {
            return LexerError(
              "expected \"env VAR = value1 value2 value3\", only got \"env\"");
          }
          if (!key.compare("chdir")) {
            return LexerError("reserved word chdir: \"env chdir =\"");
          }
          if (!ExpectToken(Lexer::EQUALS)) {
            return false;  // ExpectToken() has already set Lexer error
          }
          std::string err;
          if (!lexer_.ReadBindingValue(&val, &err)) {
            return OutError(err);
          }

          const char* ws = " \t\r\n";
          std::string valstr = val.AsString();
          valstr.erase(valstr.find_last_not_of(ws) + 1);
          valstr.erase(0, valstr.find_first_not_of(ws));

          auto p = include->envvar.emplace(key.AsString(), valstr);
          if (!p.second) {
            return LexerError("duplicate env var");
          }
          if (!lexer_.PeekToken(Lexer::INDENT)) {
            return LexerError(
                "got \"env " + key.AsString() + " = " + valstr + "\" but missing chdir.");
          }
          continue;  // Successfully parsed an env statement.
        }
      }
      return LexerError("only 'chdir =' is allowed after 'subninja' line.");
    }
    if (!ExpectToken(Lexer::EQUALS))
      return LexerError("only 'chdir =' is allowed after 'subninja' line.");
    if (!lexer_.ReadPath(&include->chdir_, &err))
      return OutError(err);

    // Disallow '..' and '.' relative paths
    include->chdir_plus_slash_ = include->chdir_.str_.AsString();
    auto first_sep = include->chdir_plus_slash_.find('/');
    if (first_sep == std::string::npos) {
      first_sep = include->chdir_plus_slash_.size();
    }
#ifdef _WIN32
    auto first_win_sep = include->chdir_plus_slash_.find('\\');
    if (first_win_sep != std::string::npos && first_win_sep < first_sep) {
      first_sep = first_win_sep;
    }
#endif
    if ((first_sep == 1 && include->chdir_plus_slash_[0] == '.') ||
        (first_sep == 2 && !include->chdir_plus_slash_.compare(0, 2, "..")))
      return LexerError("invalid use of '.' or '..' in chdir");

    // The trailing '/' is added in manifest_parser.cc.
    OutItem(include);
    return true;
  }
  OutItem(include);
  return true;
}

bool ChunkParser::ParsePool() {
  Pool* pool = new Pool();

  StringPiece name;
  if (!lexer_.ReadIdent(&name))
    return LexerError("expected pool name");
  pool->name_ = name;

  if (!ExpectToken(Lexer::NEWLINE))
    return false;

  pool->parse_state_.pool_name_diag_pos = lexer_.GetLastTokenOffset();

  while (lexer_.PeekIndent()) {
    StringPiece key;
    StringPiece value;
    if (!ParseLet(&key, &value))
      return false;

    if (key == "depth") {
      pool->parse_state_.depth = value;
      pool->parse_state_.depth_diag_pos = lexer_.GetLastTokenOffset();
    } else {
      return LexerError("unexpected variable '" + key.AsString() + "'");
    }
  }

  if (pool->parse_state_.depth.empty())
    return LexerError("expected 'depth =' line");

  Clump* clump = MakeClump();
  pool->pos_ = clump->AllocNextPos();
  clump->pools_.push_back(pool);
  return true;
}

bool ChunkParser::ParseDefault() {
  bool first = true;
  while (true) {
    LexedPath path;
    std::string err;
    if (!lexer_.ReadPath(&path, &err))
      return OutError(err);
    if (path.str_.empty()) {
      if (first)
        return LexerError("expected target name");
      else
        break;
    }
    first = false;

    DefaultTarget* target = new DefaultTarget();
    target->parsed_path_ = std::move(path);
    target->diag_pos_ = lexer_.GetLastTokenOffset();

    Clump* clump = MakeClump();
    target->pos_ = clump->AllocNextPos();
    clump->default_targets_.push_back(target);
  }
  return ExpectToken(Lexer::NEWLINE);
}

static const HashedStrView kNinjaRequiredVersion { "ninja_required_version" };

bool ChunkParser::ParseBinding() {
  Binding* binding = new Binding();

  lexer_.UnreadToken();
  StringPiece name;
  if (!ParseLet(&name, &binding->parsed_value_))
    return false;
  binding->name_ = name;

  // Record a ninja_required_version binding specially. We want to do this
  // version check ASAP in case we encounter unexpected syntax. We can't do the
  // check immediately because the version string is evaluated and might need
  // bindings from an earlier chunk. We could probably do this version check as
  // part of ordinary scope setup while processing a Clump, but keeping it
  // separate guarantees that the version check is done early enough.
  if (binding->name_ == kNinjaRequiredVersion) {
    OutItem(new RequiredVersion { binding->parsed_value_ });
  }

  Clump* clump = MakeClump();
  binding->pos_ = clump->AllocNextPos();
  clump->bindings_.push_back(binding);
  return true;
}

static const HashedStrView kRspFile         { "rspfile" };
static const HashedStrView kRspFileContent  { "rspfile_content" };
static const HashedStrView kCommand         { "command" };

bool ChunkParser::ParseRule() {
  Rule* rule = new Rule();

  StringPiece rule_name;
  if (!lexer_.ReadIdent(&rule_name))
    return LexerError("expected rule name");
  rule->name_ = rule_name;

  if (!ExpectToken(Lexer::NEWLINE))
    return false;

  rule->parse_state_.rule_name_diag_pos = lexer_.GetLastTokenOffset();

  while (lexer_.PeekIndent()) {
    StringPiece key;
    StringPiece value;
    if (!ParseLet(&key, &value))
      return false;

    if (!Rule::IsReservedBinding(key)) {
      // Die on other keyvals for now; revisit if we want to add a scope here.
      // If we allow arbitrary key values here, we'll need to revisit how cycle
      // detection works when evaluating a rule variable.
      return LexerError("unexpected variable '" + key.AsString() + "'");
    }

    rule->bindings_.emplace_back(std::piecewise_construct,
                                 std::tuple<StringPiece>(key),
                                 std::tuple<const char*, size_t>(value.data(),
                                                                 value.size()));
  }

  if (static_cast<bool>(rule->GetBinding(kRspFile)) !=
      static_cast<bool>(rule->GetBinding(kRspFileContent))) {
    return LexerError("rspfile and rspfile_content need to be both specified");
  }

  if (rule->GetBinding(kCommand) == nullptr)
    return LexerError("expected 'command =' line");

  Clump* clump = MakeClump();
  rule->pos_ = clump->AllocNextPos();
  clump->rules_.push_back(rule);
  return true;
}

bool ChunkParser::ParseEdge() {
  Edge* edge = new Edge();

  auto parse_path_list = [this, edge](Edge::DeferredPathList::Type type, int& count) -> bool {
    const char* start_pos = lexer_.GetPos();
    while (true) {
      LexedPath path;
      std::string err;
      if (!lexer_.ReadPath(&path, &err))
        return OutError(err);
      if (path.str_.empty()) {
        // In the initial parsing pass, the manifest's bindings aren't ready
        // yet, so paths can't be evaluated. Rather than store the path itself
        // (wasting memory), store just enough information to parse the path
        // lists again once bindings are ready.
        edge->parse_state_.deferred_path_lists.push_back({
          start_pos, type, count,
        });
        return true;
      }
      count++;
    }
  };

  if (!parse_path_list(Edge::DeferredPathList::OUTPUT,
      edge->explicit_outs_))
    return false;

  // Add all implicit outs, counting how many as we go.
  if (lexer_.PeekToken(Lexer::PIPE)) {
    if (!parse_path_list(Edge::DeferredPathList::OUTPUT,
        edge->implicit_outs_))
      return false;
  }

  if (edge->explicit_outs_ + edge->implicit_outs_ == 0)
    return LexerError("expected path");

  if (!ExpectToken(Lexer::COLON))
    return false;

  StringPiece rule_name;
  if (!lexer_.ReadIdent(&rule_name))
    return LexerError("expected build command name");
  edge->parse_state_.rule_name = rule_name;
  edge->parse_state_.rule_name_diag_pos = lexer_.GetLastTokenOffset();

  if (!parse_path_list(Edge::DeferredPathList::INPUT,
      edge->explicit_deps_))
    return false;

  // Add all implicit deps, counting how many as we go.
  if (lexer_.PeekToken(Lexer::PIPE)) {
    if (!parse_path_list(Edge::DeferredPathList::INPUT,
        edge->implicit_deps_))
      return false;
  }

  // Add all order-only deps, counting how many as we go.
  if (lexer_.PeekToken(Lexer::PIPE2)) {
    if (!parse_path_list(Edge::DeferredPathList::INPUT,
        edge->order_only_deps_))
      return false;
  }

  // Add all validation deps, counting how many as we go.
  if (lexer_.PeekToken(Lexer::PIPEAT)) {
    if (!parse_path_list(Edge::DeferredPathList::VALIDATION,
        edge->validation_deps_))
      return false;
  }


  if (!ExpectToken(Lexer::NEWLINE))
    return false;

  while (lexer_.PeekIndent()) {
    StringPiece key;
    StringPiece val;
    if (!ParseLet(&key, &val))
      return false;

    std::tuple<const char*, size_t> val_ctor_params(val.data(), val.size());
    edge->unevaled_bindings_.emplace_back(std::piecewise_construct,
                                          std::tuple<StringPiece>(key),
                                          val_ctor_params);
  }

  edge->parse_state_.final_diag_pos = lexer_.GetLastTokenOffset();

  Clump* clump = MakeClump();
  edge->pos_ = clump->AllocNextPos();
  // Can't edge->onPosResolvedToScope(clump->pos_.scope.scope) right here, since
  // clump->pos_.scope.scope is not set until DfsParser::HandleClump().
  clump->edges_.push_back(edge);
  clump->edge_output_count_ += edge->explicit_outs_;
  return true;
}

bool ChunkParser::ParseChunk() {
  while (true) {
    if (lexer_.GetPos() >= chunk_end_) {
      assert(lexer_.GetPos() == chunk_end_ &&
             "lexer scanned beyond the end of a manifest chunk");
      return true;
    }

    Lexer::Token token = lexer_.ReadToken();
    bool success = true;
    switch (token) {
    case Lexer::INCLUDE:  success = ParseFileInclude(false); break;
    case Lexer::SUBNINJA: success = ParseFileInclude(true); break;
    case Lexer::POOL:     success = ParsePool(); break;
    case Lexer::DEFAULT:  success = ParseDefault(); break;
    case Lexer::IDENT:    success = ParseBinding(); break;
    case Lexer::RULE:     success = ParseRule(); break;
    case Lexer::BUILD:    success = ParseEdge(); break;
    case Lexer::NEWLINE:  break;
    case Lexer::ERROR:    return LexerError(lexer_.DescribeLastError());
    case Lexer::TNUL:     return LexerError("unexpected NUL byte");
    case Lexer::TEOF:
      assert(false && "EOF should have been detected before reading a token");
      break;
    default:
      return LexerError(std::string("unexpected ") + Lexer::TokenName(token));
    }
    if (!success) return false;
  }
  return false;  // not reached
}

void ParseChunk(const LoadedFile& file, StringPiece chunk_content,
                std::vector<ParserItem>* out, bool experimentalEnvvar) {
  ChunkParser parser(file, chunk_content, out, experimentalEnvvar);
  if (!parser.ParseChunk()) {
    assert(!out->empty());
    assert(out->back().kind == ParserItem::kError);
    assert(!out->back().u.error->msg_.empty());
  }
}

std::vector<StringPiece> SplitManifestIntoChunks(StringPiece input) {
  METRIC_RECORD(".ninja load : split manifest");

  // The lexer requires that a NUL character appear after the file's content.
  // Make the NUL implicit for the rest of the manifest parsing.
  //
  // In general, parsing a manifest chunk examines the terminating text after
  // the chunk's explicit end. For example, suppose we have two consecutive
  // chunks:
  //  - "build foo: gen\n  pool = mypool\n"
  //  - "build bar: gen\n"
  // The parser for the "foo" chunk will examine the start of "build" from the
  // "bar" chunk when it looks for another indented edge binding.
  assert(!input.empty() && input.back() == '\0' &&
         "input lacks a NUL terminator");
  input.remove_suffix(1);

  // For efficiency, avoid splitting small files. kChunkMinSize can be lowered
  // to 0 for testing.
  const size_t kChunkMinSize = 1024 * 1024;

  const size_t chunk_count = GetOptimalThreadPoolJobCount();
  const size_t chunk_size = std::max(kChunkMinSize,
                                     input.size() / chunk_count + 1);

  std::vector<StringPiece> result;
  result.reserve(chunk_count);

  size_t start = 0;
  while (start < input.size()) {
    size_t next = std::min(start + chunk_size, input.size());
    next = AdvanceToNextManifestChunk(input, next);
    result.push_back(input.substr(start, next - start));
    start = next;
  }

  return result;
}

} // namespace manifest_chunk