As of v2017R1, the text position objects returned when searching text through a PDF document using Qoppa PDF library jPDFProcess (using PDFPage.findText() method), are given in cropbox coordinates (i.e in view or display coordinates when the page is displayed on the screen). For most PDF documents, media box and crop box are the same. But when documents are cropped, part of the document can be “hidden” and some search results might not be visible on the displayed page. Here is some sample Java code allowing to determine if the text occurrences found are visible (within the crop box).
// Open the document PDFDocument inDoc = new PDFDocument ("c:/test_cropped.pdf", null); // Loop through the pages, searching for text for (int pageIx = 0; pageIx < inDoc.getPageCount(); ++pageIx) { // Get Current Page PDFPage page = inDoc.getPage (pageIx); // Search for the text in a page Vector<TextPosition> searchResults = page.findText("MyText", false, false); System.out.println ("Page " + pageIx + " - Found " + searchResults.size() + " instances"); // output page crop box Rectangle2D cropBox = page.getCropBox(); System.out.println("Crop Box " + cropBox); if (searchResults.size () > 0) { for (int count = 0; count < searchResults.size(); ++count) { // Get the position of the text TextPosition textPos = (TextPosition)searchResults.get (count); System.out.println("Search text bounds"); // the text position in given in cropbox / view coordinates // determine if the text result is visible (within the cropbox) // by looping through the point2d which are in crop box coordinates // and making sure they are within the cropbox bounds boolean textVisible = true; for(int i = 0; i < textPos.getQuadrilateral().length; i++) { Point2D point = textPos.getQuadrilateral()[i]; System.out.println(point); // determine if the point is in the crop box boolean xin = (0.0 <= point.getX()) && (point.getX() <= cropBox.getWidth()); boolean yin = (0.0 <= point.getY()) && (point.getY() <= cropBox.getHeight()); textVisible = textVisible && xin && yin; } System.out.println("Search text is visible (within cropbox) " + textVisible); } } |