//--------------------------------------------------------------------------------------- // Copyright (c) 2001-2012 by PDFTron Systems Inc. All Rights Reserved. // Consult legal.txt regarding legal and license information. //--------------------------------------------------------------------------------------- using System; using System.Collections; using pdftron; using pdftron.Common; using pdftron.Filters; using pdftron.SDF; using pdftron.PDF; using pdftron.PDF.Struct; namespace LogicalStructureTestCS { //--------------------------------------------------------------------------------------- // This sample explores the structure and content of a tagged PDF document and dumps // the structure information to the console window. // // In tagged PDF documents StructTree acts as a central repository for information // related to a PDF document's logical structure. The tree consists of StructElement-s // and ContentItem-s which are leaf nodes of the structure tree. // // The sample can be extended to access and extract the marked-content elements such // as text and images. //--------------------------------------------------------------------------------------- class LogicalStructureTest { static void PrintIndent(int indent) { Console.WriteLine(); for (int i=0; i MCIDPageMap; //typedef map MCIDDocMap; // Used in code snippet 3. static void ProcessElements2(ElementReader reader, Hashtable mcid_page_map) { Element element; while ((element = reader.Next())!=null) // Read page contents { // In this sample we process only text, but the code can be extended // to handle paths, images, or any other Element type. int mcid = element.GetStructMCID(); if (mcid>= 0 && element.GetType() == Element.Type.e_text) { String val = element.GetTextString(); if (mcid_page_map.ContainsKey(mcid)) mcid_page_map[mcid] = ((String)(mcid_page_map[mcid])+ val); else mcid_page_map.Add(mcid, val); } } } // Used in code snippet 3. static void ProcessStructElement2(SElement element, Hashtable mcid_doc_map, int indent) { if (!element.IsValid()) { return; } // Print out the type and title info, if any. PrintIndent(indent); Console.Write("<" + element.GetType()); if (element.HasTitle()) { Console.Write(" title=\""+ element.GetTitle() + "\""); } Console.Write(">"); int num = element.GetNumKids(); for (int i=0; i"); } /// /// The main entry point for the application. /// static void Main(string[] args) { PDFNet.Initialize(); // Relative path to the folder containing test files. string input_path = "../../../../TestFiles/"; string output_path = "../../../../TestFiles/Output/"; try // Extract logical structure from a PDF document { PDFDoc doc = new PDFDoc(input_path + "tagged.pdf"); doc.InitSecurityHandler(); bool example1 = true; bool example2 = true; bool example3 = true; if (example1) { Console.WriteLine("____________________________________________________________"); Console.WriteLine("Sample 1 - Traverse logical structure tree..."); STree tree = doc.GetStructTree(); if (tree.IsValid()) { Console.WriteLine("Document has a StructTree root."); for (int i=0; i