< iOS samples

TextSearchTest - Swift

This sample shows how to use TextSearch to search text on PDF pages using regular expressions. TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations.

// Copyright (c) 2001-2017 by PDFTron Systems Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.

import PDFNet
import Foundation

// This sample illustrates the basic text search capabilities of PDFNet.

func runTextSearchTest() -> Int {
    return autoreleasepool {
        var ret: Int = 0
        let input_path: String? = Bundle.main.path(forResource: "credit card numbers", ofType: "pdf")
        do {
            try PTPDFNet.catchException {
                let doc: PTPDFDoc! = PTPDFDoc(filepath: input_path)
                let txt_search: PTTextSearch! = PTTextSearch()
                var mode = e_ptwhole_word.rawValue | e_ptpage_stop.rawValue
                var pattern = "joHn sMiTh"
                //call Begin() method to initialize the text search.
                txt_search.begin(doc, pattern: pattern, mode: mode, start_page: -1, end_page: -1)
                var step: Int = 0
                //call run() method iteratively to find all matching instances.
                while true {
                    let result: PTSearchResult! = txt_search.run()
                    if (result != nil) {
                        if step == 0 {
                            //step 0: found "John Smith"
                            //note that, here, 'ambient_string' and 'hlts' are not written to,
                            //as 'e_ambient_string' and 'e_highlight' are not set.
                            print("\(result.getMatch()!)'s credit card number is: ")
                            //now switch to using regular expressions to find John's credit card number
                            mode = txt_search.getMode()
                            mode |= e_ptreg_expression.rawValue | e_pthighlight.rawValue
                            pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"
                            //or "(\\d{4}-){3}\\d{4}"
                            step += 1
                        } else if step == 1 {
                            //step 1: found John's credit card number
                            print("  \(result.getMatch()!)")
                            //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
                            //output the highlight info of the credit card number.
                            let hlts: PTHighlights = result.getHighlights()
                            while hlts.hasNext() {
                                print("The current highlight is from page: \(hlts.getCurrentPageNumber())")
                            //see if there is an AMEX card number
                            pattern = "\\d{4}-\\d{6}-\\d{5}"
                            step += 1
                        } else if step == 2 {
                            //found an AMEX card number
                            print("There is an AMEX card number: \(result.getMatch()!)")
                            //change mode to find the owner of the credit card; supposedly, the owner's
                            //name proceeds the number
                            mode = txt_search.getMode()
                            mode |= e_ptsearch_up.rawValue
                            pattern = "[A-z]++ [A-z]++"
                            step += 1
                        } else if step == 3 {
                            //found the owner's name of the AMEX card
                            print("Is the owner's name: \(result.getMatch()!)?")
                            //add a link annotation based on the location of the found instance
                            let hlts: PTHighlights = result.getHighlights()
                            while hlts.hasNext() {
                                let cur_page: PTPage = doc.getPage(UInt32(hlts.getCurrentPageNumber()))
                                let quads: PTVectorQuadPoint = hlts.getCurrentQuads()
                                var i: Int = 0
                                while i < quads.size() {
                                    //assume each quad is an axis-aligned rectangle
                                    let q: PTQuadPoint = quads.get(Int32(i))
                                    let x1: Double = min(min(min(q.getP1().getX(), q.getP2().getX()), q.getP3().getX()), q.getP4().getX())
                                    let x2: Double = max(max(max(q.getP1().getX(), q.getP2().getX()), q.getP3().getX()), q.getP4().getX())
                                    let y1: Double = min(min(min(q.getP1().getY(), q.getP2().getY()), q.getP3().getY()), q.getP4().getY())
                                    let y2: Double = max(max(max(q.getP1().getY(), q.getP2().getY()), q.getP3().getY()), q.getP4().getY())
                                    let rect = PTPDFRect(x1: x1, y1: y1, x2: x2, y2: y2)
                                    let action = PTAction.createURI(doc.getSDFDoc(), uri: "http://www.pdftron.com")
                                    let hyper_link = PTLink.create(withAction: doc.getSDFDoc(), pos: rect, action: action)
                                    i += 1
                            doc.save(toFile: URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("credit card numbers_linked.pdf").path, flags: e_ptlinearized.rawValue)
                    } else if (result.isPageEnd()) {
                        //you can update your UI here if needed
                    } else {
        } catch let e as NSError {
            ret = 1
        return ret