Some test text!

Loading...
Guides
Text search

Search for text in a PDF in Java

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
PDFDoc doc = new PDFDoc(filename);
TextSearch txt_search = new TextSearch();
int mode = TextSearch.e_whole_word | TextSearch.e_page_stop;
String pattern = "";

//use regular expression to find credit card number
mode |= TextSearch.e_reg_expression | TextSearch.e_highlight;
txt_search.setMode(mode);
String new_pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
txt_search.setPattern(new_pattern);

//call Begin() method to initialize the text search.
txt_search.begin(doc, pattern, mode, -1, -1);
TextSearchResult result = txt_search.run();

if (result.getCode() == TextSearchResult.e_found) {
  //add a link annotation based on the location of the found instance
  Highlights hlts = result.getHighlights();
  hlts.begin(doc);
  while (hlts.hasNext()) {
    Page cur_page = doc.getPage(hlts.getCurrentPageNumber());
    double[] q = hlts.getCurrentQuads();
    int quad_count = q.length / 8;
    for (int i = 0; i < quad_count; ++i) {
      //assume each quad is an axis-aligned rectangle
      int offset = 8 * i;
      double x1 = Math.min(Math.min(Math.min(q[offset + 0], q[offset + 2]), q[offset + 4]), q[offset + 6]);
      double x2 = Math.max(Math.max(Math.max(q[offset + 0], q[offset + 2]), q[offset + 4]), q[offset + 6]);
      double y1 = Math.min(Math.min(Math.min(q[offset + 1], q[offset + 3]), q[offset + 5]), q[offset + 7]);
      double y2 = Math.max(Math.max(Math.max(q[offset + 1], q[offset + 3]), q[offset + 5]), q[offset + 7]);
      annots.Link hyper_link = annots.Link.create(doc, new Rect(x1, y1, x2, y2), Action.createURI(doc, "http://www.pdftron.com"));
      cur_page.annotPushBack(hyper_link);
    }
    hlts.next();
  }
}

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Support