Monday, 22 September 2014

Read Pdf Files data Using coordinates with ITextSharp and C# and VB.Net

Using C#


public string[] ReadPdfFiles(string filepath, int pageno, int cordinate1, int coordinate2, int coordinate3, int coordinate4)
        {
            PdfReader reader = new PdfReader(filepath);
            string text = string.Empty;
            string[] words = null;
            try
            {

                iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(cordinate1, coordinate2, coordinate3, coordinate4);
                RenderFilter[] renderFilter = new RenderFilter[1];
                renderFilter[0] = new RegionTextRenderFilter(rect);
                ITextExtractionStrategy textExtractionStrategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), renderFilter);
                text = PdfTextExtractor.GetTextFromPage(reader, pageno, textExtractionStrategy);
                words = text.Split('\n');
                return words;

            }
            catch (Exception Ex)
            {
                reader.Close();
                return words;
            }
            finally
            {
                reader.Close();
            }


        }


Using VB.Net


Public Function ReadPdfFiles(filepath As String, pageno As Integer, cordinate1 As Integer, coordinate2 As Integer, coordinate3 As Integer, coordinate4 As Integer) As String()


	Dim reader As New PdfReader(filepath)

	Dim text As String = String.Empty

	Dim words As String() = Nothing

	Try



		Dim rect As New iTextSharp.text.Rectangle(cordinate1, coordinate2, coordinate3, coordinate4)

		Dim renderFilter As RenderFilter() = New RenderFilter(0) {}

		renderFilter(0) = New RegionTextRenderFilter(rect)

		Dim textExtractionStrategy As ITextExtractionStrategy = New FilteredTextRenderListener(New LocationTextExtractionStrategy(), renderFilter)

		text = PdfTextExtractor.GetTextFromPage(reader, pageno, textExtractionStrategy)

		words = text.Split(ControlChars.Lf)



		Return words

	Catch Ex As Exception


		reader.Close()


		Return words
	Finally




		reader.Close()
	End Try



End Function

1 comment:

  1. The first and easiest way to vb.net read pdf text to expression web is to use the rasteredage page http://www.rasteredge.com/how-to/vb-net-imaging/pdf-html5-feature-annotate/.

    The vb.net add comments to pdf reader is not static, so you'll need to create an instance of the clas

    ReplyDelete