< iOS samples

ElementReaderAdvTest - Swift

The sample shows how to use some of more advanced PDFNet features. The sample code illustrates how to extract text, paths, and images. The sample also shows how to do color conversion, image normalization, and how to process changes in the graphics state.

//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2017 by PDFTron Systems Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.
//---------------------------------------------------------------------------------------

import PDFNet
import Foundation

func ProcessPath(reader: PTElementReader, path: PTElement) {
    if path.isClippingPath() {
        print("This is a clipping path")
    }
    
    let pathData: PTPathData = path.getPathData()
    let data: NSMutableArray = pathData.getPoints()
    var opr: Data = pathData.getOperators()
    
    var opr_index: Int = 0
    let opr_end: Int = opr.count
    var data_index: Int = 0
    
    var x1: Double = 0.0
    var y1: Double = 0.0
    var x2: Double = 0.0
    var y2: Double = 0.0
    var x3: Double = 0.0
    var y3: Double = 0.0
    var str = ""
    
    // Use path.GetCTM() if you are interested in CTM (current transformation matrix).
    
    str += (" Path Data Points := \"")
    
    while opr_index < opr_end {
        switch PTPathSegmentType(rawValue: UInt32(opr[opr_index])) {
        case e_ptmoveto:
            x1 = data[data_index] as! Double
            data_index += 1
            y1 = data[data_index] as! Double
            data_index += 1
            str += String(format: "M%.5g %.5g", x1, y1)
        case e_ptlineto:
            x1 = data[data_index] as! Double
            data_index += 1
            y1 = data[data_index] as! Double
            data_index += 1
            str += String(format: " L%.5g %.5g", x1, y1)
        case e_ptcubicto:
            x1 = data[data_index] as! Double
            data_index += 1
            y1 = data[data_index] as! Double
            data_index += 1
            x2 = data[data_index] as! Double
            data_index += 1
            y2 = data[data_index] as! Double
            data_index += 1
            x3 = data[data_index] as! Double
            data_index += 1
            y3 = data[data_index] as! Double
            data_index += 1
            str += String(format: " C%.5g %.5g %.5g %.5g %.5g %.5g", x1, y1, x2, y2, x3, y3)
        case e_ptrect:
            x1 = data[data_index] as! Double
            data_index += 1
            y1 = data[data_index] as! Double
            data_index += 1
            let w = data[data_index] as! Double
            data_index += 1
            let h = data[data_index] as! Double
            data_index += 1
            x2 = x1 + w
            y2 = y1
            x3 = x2
            y3 = y1 + h
            let x4: Double = x1
            let y4: Double = y3
            str += String(format: "M%.5g %.5g L%.5g %.5g L%.5g %.5g L%.5g %.5g Z", x1, y1, x2, y2, x3, y3, x4, y4)
        case e_ptclosepath:
            str += (" Close Path")
        default:
            assert(false)
        }
        opr_index = opr_index + 1
    }
    
    str += ("\" ")
    
    let gs: PTGState = path.getGState()
    
    // Set Path State 0 (stroke, fill, fill-rule) -----------------------------------
    if path.isStroked() {
        str = str + ("Stroke path\n")
        if gs.getStrokeColorSpace().getType() == e_ptpattern {
            str = str + ("Path has associated pattern")
        }
        else {
            // Get stroke color (you can use PDFNet color conversion facilities)
            // let rgb: PTColorPt = gs.getStrokeColorSpace().convert2RGB(gs.getStrokeColor())
        }
    }
    else {
        // Do not stroke path
    }
    
    if path.isFilled() {
        str = str + ("Fill path")
        if gs.getFillColorSpace().getType() == e_ptpattern {
            str = str + ("Path has associated pattern")
        }
        else {
            // let rgb: PTColorPt = gs.getFillColorSpace().convert2RGB(gs.getFillColor())
        }
    }
    else {
        // Do not fill path
    }
    
    // Process any changes in graphics state  ---------------------------------
    
    let gs_itr: PTGSChangesIterator = reader.getChangesIterator()
    
    while gs_itr.hasNext() {
        switch PTGStateAttribute(rawValue: UInt32(gs_itr.current())) {
        case e_pttransform:
            // Get transform matrix for this element. Unlike path.GetCTM()
            // that return full transformation matrix gs.GetTransform() return
            // only the transformation matrix that was installed for this element.
            //
            // gs.getTransform()
            break
        case e_ptline_width:
            // gs.getLineWidth()
            break
        case e_ptline_cap:
            // gs.getLineCap()
            break
        case e_ptline_join:
            // gs.getLineJoin()
            break
        case e_ptflatness:
            break
        case e_ptmiter_limit:
            // gs.GetmiterLimit()
            break
        case e_ptdash_pattern:
            // let dashes: NSMutableArray = gs.getDashes()
            // gs.getPhase()
            break
        case e_ptfill_color:
            if gs.getFillColorSpace().getType() == e_ptpattern  && gs.getFillPattern().getType() != e_ptshading {
                //process the pattern data
                reader.patternBegin(true, reset_ctm_tfm: false)
                ProcessElements(reader: reader)
                reader.end()
            }
        default:
            break
        }
        gs_itr.next()
    }
    reader.clearChangeList()
    print("\(str)")
    
}

func ProcessText(page_reader: PTElementReader) {
    // Begin text element
    print("Begin Text Block:")
    
    while let element = page_reader.next() {
        switch element.getType() {
        case e_pttext_end:
            // Finish the text block
            print("End Text Block.")
            return
        case e_pttext_obj:
            let gs: PTGState = element.getGState()
            
            let cs_fill: PTColorSpace = gs.getFillColorSpace()
            let fill: PTColorPt = gs.getFillColor()
            
            let _: PTColorPt = cs_fill.convert2RGB(fill) // outColor
            
            let _: PTColorSpace = gs.getStrokeColorSpace()  // cs_stroke
            let _: PTColorPt = gs.getStrokeColor() // stroke
            
            let font: PTFont = gs.getFont()
            
            print("Font Name: \(font.getName()!)\n")
            
            // font.IsFixedWidth();
            // font.IsSerif();
            // font.IsSymbolic();
            // font.IsItalic();
            // ...
            
            // double font_size = gs.GetFontSize();
            // double word_spacing = gs.GetWordSpacing();
            // double char_spacing = gs.GetCharSpacing();
            // const UString* txt = element.GetTextString();
            
            if font.getType() == e_ptType3 {
                //type 3 font, process its data
                let itr: PTCharIterator = element.getCharIterator()
                while itr.hasNext() {
                    page_reader.type3FontBegin(itr.current(), resource_dict: nil)
                    ProcessElements(reader: page_reader)
                    page_reader.end()
                    itr.next()
                }
            }
            else {
                let text_mtx: PTMatrix2D = element.getTextMatrix()
                var x: Double
                var y: Double
                var char_code: UInt32
                
                var str = ""
                let itr: PTCharIterator = element.getCharIterator()
                while itr.hasNext() {
                    char_code = itr.current().getChar_code()
                    if char_code >= 32 || char_code <= 255 {
                        // Print if in ASCII range...
                        str += ("\(char_code)")
                    }
                    
                    x = itr.current().getX()   // character positioning information
                    y = itr.current().getY()
                    
                    // Use element.getCTM() if you are interested in the CTM
                    // (current transformation matrix).
                    let ctm: PTMatrix2D = element.getCTM()
                    
                    // To get the exact character positioning information you need to
                    // concatenate current text matrix with CTM and then multiply
                    // relative positioning coordinates with the resulting matrix.
                    let mtx: PTMatrix2D = text_mtx
                    mtx.concat(ctm.getM_a(), b: ctm.getM_b(), c: ctm.getM_c(), d: ctm.getM_d(), h: ctm.getM_h(), v: ctm.getM_v())
                    mtx.mult(PTPDFPoint(px: x, py: y))
                    
                    // Get glyph path...
                    //vector<UChar> oprs;
                    //vector<double> glyph_data;
                    //font.GetGlyphPath(char_code, oprs, glyph_data, false, 0);
                    itr.next()
                }
                print("\(str)")
            }
        default:
            break
        }
    }
}

func ProcessImage(image: PTElement) {
    let _: Bool = image.isImageMask()  // image_mask
    let _: Bool = image.isImageInterpolate()  // interpolate
    let width = image.getImageWidth()
    let height = image.getImageHeight()
    let out_data_sz = width * height * 3
    
    print("Image:  width=\"\(width)\" height=\"\(height)")
    
    //let mtx: PTMatrix2D = image.getCTM()    // image matrix (page positioning info)
    // You can use GetImageData to read the raw (decoded) image data
    //image.getBitsPerComponent()
    //image.getImageData()    // get raw image data
    // .... or use Image2RGB filter that converts every image to RGB format,
    // This should save you time since you don't need to deal with color conversions,
    // image up-sampling, decoding etc.
    
    let img_conv = PTImage2RGB(image_element: image)    // Extract and convert image to RGB 8-bpc format
    let reader: PTFilterReader = PTFilterReader(filter: img_conv)
    
    // A buffer used to keep image data.
    let _: Data = reader.read(UInt(out_data_sz)) // image_data_out
    //  &image_data_out.front() contains RGB image data.
    
    // Note that you don't need to read a whole image at a time. Alternatively
    // you can read a chuck at a time by repeatedly calling reader.Read(buf, buf_sz)
    // until the function returns 0.
}

func ProcessElements(reader: PTElementReader) {
    while let element = reader.next() {
        switch element.getType() {
        case e_ptpath:
            // Process path data...
            ProcessPath(reader: reader, path: element)
        case e_pttext_begin:
            // Process text block...
            ProcessText(page_reader: reader)
        case e_ptform:
            // Process form XObjects
            reader.formBegin()
            ProcessElements(reader: reader)
            reader.end()
        case e_ptimage:
            // Process Images
            ProcessImage(image: element)
        default:
            break
        }
    }
}

func runElementReaderAdvTest() -> Int {
    return autoreleasepool {
        var ret: Int = 0
        
        
        do {
            try PTPDFNet.catchException {
                // Extract text data from all pages in the document
                print("__________________________________________________")
                print("Extract page element information from all ")
                print("pages in the document.")
                
                let doc: PTPDFDoc = PTPDFDoc(filepath: Bundle.main.path(forResource: "newsletter", ofType: "pdf"))
                doc.initSecurityHandler()
                
                let page_begin: PTPageIterator = doc.getPageIterator(1)
                
                let page_reader: PTElementReader = PTElementReader()
                
                let itr: PTPageIterator = page_begin
                while itr.hasNext() {
                    print("Page \(itr.current().getIndex())----------------------------------------")
                    page_reader.begin(itr.current())
                    ProcessElements(reader: page_reader)
                    page_reader.end()
                    itr.next()
                }
                
                print("Done.")
            }
        } catch let e as NSError {
            print("\(e)")
            ret = 1
        }
        
        return ret
    }
}