//--------------------------------------------------------------------------------------- // Copyright (c) 2001-2008 by PDFTron Systems Inc. All Rights Reserved. // Consult legal.txt regarding legal and license information. //--------------------------------------------------------------------------------------- import java.util.Map; import java.util.TreeMap; import pdftron.Common.PDFNetException; import pdftron.PDF.Struct.*; import pdftron.PDF.*; import pdftron.SDF.Obj; //--------------------------------------------------------------------------------------- // This sample explores the structure and content of a tagged PDF document and dumps // the structure information to the console window. // // In tagged PDF documents StructTree acts as a central repository for information // related to a PDF document's logical structure. The tree consists of StructElement-s // and ContentItem-s which are leaf nodes of the structure tree. // // The sample can be extended to access and extract the marked-content elements such // as text and images. //--------------------------------------------------------------------------------------- public class LogicalStructureTest { static void PrintIndent(int indent) { System.out.println(); for (int i=0; i MCIDPageMap; //typedef map MCIDDocMap; // Used in code snippet 3. static void ProcessElements2(ElementReader reader, Map mcid_page_map) throws PDFNetException { Element element; while ((element = reader.next())!=null) // Read page contents { // In this sample we process only text, but the code can be extended // to handle paths, images, or any other Element type. int mcid = element.getStructMCID(); Integer key_mcid=new Integer(mcid); if (mcid>= 0 && element.getType() == Element.e_text) { String val = element.getTextString(); if (mcid_page_map.containsKey(key_mcid)) mcid_page_map.put(key_mcid, ((String)(mcid_page_map.get(key_mcid))+ val)) ; else mcid_page_map.put(key_mcid, val); } } } // Used in code snippet 3. static void ProcessStructElement2(SElement element, Map mcid_doc_map, int indent) throws PDFNetException { if (!element.isValid()) { return; } // Print out the type and title info, if any. PrintIndent(indent); System.out.print("<" + element.getType()); if (element.hasTitle()) { System.out.print(" title=\""+ element.getTitle() + "\""); } System.out.print(">"); int num = element.getNumKids(); for (int i=0; i"); } /** * @param args */ public static void main(String[] args) { PDFNet.initialize(); PDFNet.setResourcesPath("../../../resources"); // Relative path to the folder containing test files. String input_path = "../../TestFiles/"; // string output_path = "../../TestFiles/Output/"; try // Extract logical structure from a PDF document { PDFDoc doc=new PDFDoc((input_path + "tagged.pdf")); doc.initSecurityHandler(); System.out.println("____________________________________________________________"); System.out.println("Sample 1 - Traverse logical structure tree..."); { STree tree = doc.getStructTree(); if (tree.isValid()) { System.out.println("Document has a StructTree root."); for (int i=0; i