// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <allheaders.h>
#include <string> // for std::string

#include "include_gunit.h"

#include <tesseract/baseapi.h>
#include <tesseract/osdetect.h>
#include "colfind.h"
#include "log.h" // for LOG
#include "mutableiterator.h"
#include "pageres.h"
#include "tesseractclass.h"
#include "textlineprojection.h"

namespace tesseract {

// Minimum score for a STRONG_CHAIN textline.
// NOTE: Keep in sync with textlineprojection.cc.
const int kMinStrongTextValue = 6;

// The fixture for testing Tesseract.
class TextlineProjectionTest : public testing::Test {
protected:
  std::string OutputNameToPath(const std::string &name) {
    file::MakeTmpdir();
    return file::JoinPath(FLAGS_test_tmpdir, name);
  }

  TextlineProjectionTest() {
    src_pix_ = nullptr;
    bin_pix_ = nullptr;
    finder_ = nullptr;
    denorm_ = nullptr;
    projection_ = nullptr;
  }
  ~TextlineProjectionTest() override {
    src_pix_.destroy();
    bin_pix_.destroy();
    delete finder_;
  }

  void SetImage(const char *filename) {
    src_pix_.destroy();
    src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str());
    api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
    api_.SetPageSegMode(tesseract::PSM_AUTO_OSD);
    api_.SetImage(src_pix_);
  }

  // Ugly hacked-together function sets up projection_ and denorm_ by setting
  // up for auto pagelayout, setting up a ColumnFinder, running it, and
  // using accessors to get at the internal denorm and projection.
  // If the coordinates have been rotated, the denorm should match
  // correctly and transform coordinates back to the projection.
  // We throw away all the blocks, blobs etc, and test the projection with
  // the resultiterator from a separate BaseAPI run.
  void SetupProjection() {
    tesseract::TessdataManager mgr;
    auto osd_tess = std::make_unique<Tesseract>();
    OSResults osr;
    EXPECT_EQ(osd_tess->init_tesseract(TESSDATA_DIR, "", "osd", tesseract::OEM_TESSERACT_ONLY,
                                       nullptr, 0, nullptr, nullptr, false, &mgr),
              0);
    tesseract_ = std::make_unique<Tesseract>();
    EXPECT_EQ(tesseract_->init_tesseract(TESSDATA_DIR, "", "eng", tesseract::OEM_TESSERACT_ONLY,
                                         nullptr, 0, nullptr, nullptr, false, &mgr),
              0);
    bin_pix_ = api_.GetThresholdedImage();
    *tesseract_->mutable_pix_binary() = bin_pix_.clone();
    osd_tess->set_source_resolution(api_.tesseract()->source_resolution());
    tesseract_->set_source_resolution(api_.tesseract()->source_resolution());
    int width = pixGetWidth(bin_pix_);
    int height = pixGetHeight(bin_pix_);
    // First make a single block covering the whole image.
    auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
    block->set_right_to_left(false);
    BLOCK_LIST src_blocks;
    BLOCK_IT block_it(&src_blocks);
    block_it.add_to_end(block);
    Image photomask_pix = nullptr;
    // The blocks made by the ColumnFinder. Moved to blocks before return.
    BLOCK_LIST found_blocks;
    TO_BLOCK_LIST temp_blocks;
    finder_ =
        tesseract_->SetupPageSegAndDetectOrientation(tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess.get(),
                                                     &osr, &temp_blocks, &photomask_pix, nullptr);
    TO_BLOCK_IT to_block_it(&temp_blocks);
    TO_BLOCK *to_block = to_block_it.data();
    denorm_ = finder_->denorm();
    TO_BLOCK_LIST to_blocks;
    BLOBNBOX_LIST diacritic_blobs;
    EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block, photomask_pix, nullptr,
                                  nullptr, nullptr, &found_blocks, &diacritic_blobs, &to_blocks),
              0);
    projection_ = finder_->projection();
    photomask_pix.destroy();
  }

  // Helper evaluates the given box, expects the result to be greater_than
  // or !greater_than the target_value and provides diagnostics if not.
  void EvaluateBox(const TBOX &box, bool greater_or_equal, int target_value, const char *text,
                   const char *message) {
    int value = projection_->EvaluateBox(box, denorm_, false);
    if (greater_or_equal != (value > target_value)) {
      LOG(INFO) << "EvaluateBox too " << (greater_or_equal ? "low" : "high")
        << ":" << value << " vs " << target_value << " for " << message << " word '" << text << "' at:";
      box.print();
      value = projection_->EvaluateBox(box, denorm_, true);
    } else {
      LOG(INFO) << "EvaluateBox OK(" << value << ") for " << message << " word '" << text << "'";
    }
    if (greater_or_equal) {
      EXPECT_GE(value, target_value);
    } else {
      EXPECT_LT(value, target_value);
    }
  }

  // Helper evaluates the DistanceOfBoxFromBox function by expecting that
  // box should be nearer to true_box than false_box.
  void EvaluateDistance(const TBOX &box, const TBOX &true_box, const TBOX &false_box,
                        const char *text, const char *message) {
    int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
    int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
    if (false_dist <= true_dist) {
      LOG(INFO) << "Distance wrong:" << false_dist << " vs " << true_dist
        << " for " << message << " word '" << text << "' at:";
      true_box.print();
      projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
      projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
    } else {
      LOG(INFO) << "Distance OK(" << false_dist << " vs " << true_dist
        << ") for " << message << " word '" << text << "'";
    }
  }

  // Tests the projection on the word boxes of the given image.
  // line_height is the cap + descender size of the text.
  void VerifyBoxes(const char *imagefile, int line_height) {
    SetImage(imagefile);
    api_.Recognize(nullptr);
    SetupProjection();
    MutableIterator *it = api_.GetMutableIterator();
    do {
      char *text = it->GetUTF8Text(tesseract::RIL_WORD);
      const PAGE_RES_IT *pr_it = it->PageResIt();
      WERD_RES *word = pr_it->word();
      // The word_box refers to the internal, possibly rotated, coords.
      TBOX word_box = word->word->bounding_box();
      bool small_word = word_box.height() * 1.5 < line_height;
      bool tall_word = word_box.height() * 1.125 > line_height;
      // We pad small and tall words differently because ascenders and
      // descenders affect the position and size of the upper/lower boxes.
      int padding;
      if (small_word) {
        padding = word_box.height();
      } else if (tall_word) {
        padding = word_box.height() / 3;
      } else {
        padding = word_box.height() / 2;
      }
      // Test that the word box gets a good score.
      EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word");

      // Now test a displaced box, both above and below the word.
      TBOX upper_box(word_box);
      upper_box.set_bottom(word_box.top());
      upper_box.set_top(word_box.top() + padding);
      EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word");
      EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical");
      TBOX lower_box = word_box;
      lower_box.set_top(word_box.bottom());
      lower_box.set_bottom(word_box.bottom() - padding);
      if (tall_word) {
        lower_box.move(ICOORD(0, padding / 2));
      }
      EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
      EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");

      // Since some words have no text below and some words have no text above
      // check that at least one of the boxes satisfies BoxOutOfTextline.
      bool upper_or_lower_out_of_textline =
          projection_->BoxOutOfHTextline(upper_box, denorm_, false) ||
          projection_->BoxOutOfHTextline(lower_box, denorm_, false);
      if (!upper_or_lower_out_of_textline) {
        projection_->BoxOutOfHTextline(upper_box, denorm_, true);
        projection_->BoxOutOfHTextline(lower_box, denorm_, true);
      }
      EXPECT_TRUE(upper_or_lower_out_of_textline);

      // Now test DistanceOfBoxFromBox by faking a challenger word, and asking
      // that each pad box be nearer to its true textline than the
      // challenger. Due to the tight spacing of latin text, getting
      // the right position and size of these test boxes is quite fiddly.
      padding = line_height / 4;
      upper_box.set_top(upper_box.bottom() + padding);
      TBOX target_box(word_box);
      if (!small_word) {
        upper_box.move(ICOORD(0, -padding * 3 / 2));
      }
      target_box.set_top(upper_box.bottom());
      TBOX upper_challenger(upper_box);
      upper_challenger.set_bottom(upper_box.top());
      upper_challenger.set_top(upper_box.top() + word_box.height());
      EvaluateDistance(upper_box, target_box, upper_challenger, text, "Upper Word");
      if (tall_word) {
        lower_box.move(ICOORD(0, padding / 2));
      }
      lower_box.set_bottom(lower_box.top() - padding);
      target_box = word_box;
      target_box.set_bottom(lower_box.top());
      TBOX lower_challenger(lower_box);
      lower_challenger.set_top(lower_box.bottom());
      lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
      EvaluateDistance(lower_box, target_box, lower_challenger, text, "Lower Word");

      delete[] text;
    } while (it->Next(tesseract::RIL_WORD));
    delete it;
  }

  Image src_pix_;
  Image bin_pix_;
  BLOCK_LIST blocks_;
  std::string ocr_text_;
  tesseract::TessBaseAPI api_;
  std::unique_ptr<Tesseract> tesseract_;
  ColumnFinder *finder_;
  const DENORM *denorm_;
  const TextlineProjection *projection_;
};

// Tests all word boxes on an unrotated image.
TEST_F(TextlineProjectionTest, Unrotated) {
  VerifyBoxes("phototest.tif", 31);
}

// Tests character-level applyboxes on italic Times New Roman.
TEST_F(TextlineProjectionTest, Rotated) {
  VerifyBoxes("phototestrot.tif", 31);
}

} // namespace tesseract
