In this program, you will see complete steps to extraction content and metadata of the MS-Excel file by using TIKA OOXMLParser.
Sample File

Complete Example
import java.io.File; import java.io.FileInputStream; import java.io.IOException; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; import org.apache.tika.sax.BodyContentHandler; import org.xml.sax.SAXException; public class TikaMSExcelParserExample { public static void main(final String[] args) throws IOException, TikaException, SAXException { // detecting the file type BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); FileInputStream inputstream = new FileInputStream(new File("C:\\Users\\Saurabh Gupta\\Desktop\\TIKA\\TIKA-MS-EXCEL.xlsx")); ParseContext pcontext = new ParseContext(); // OOXml parser OOXMLParser msofficeparser = new OOXMLParser(); msofficeparser.parse(inputstream, handler, metadata, pcontext); System.out.println("Contents of the excel document:" + handler.toString()); System.out.println("Metadata of the excel document:"); String[] metadataNames = metadata.names(); for (String name : metadataNames) { System.out.println(name + ": " + metadata.get(name)); } } }
Output
Contents of the excel document:Sheet1
First Name Last Name DOB
Saurabh Gupta 10-Dec-85
Gaurav Kumar 12-May-86
Rahul Roi 12-Jun-10
Raghvendra Rana 5-Jan-95
Tanaya Jain 13-Mar-85
Metadata of the excel document:
date: 2019-11-23T00:25:08Z
extended-properties:AppVersion: 15.0300
meta:creation-date: 2006-09-16T00:00:00Z
extended-properties:Application: Microsoft Excel
extended-properties:Company:
Creation-Date: 2006-09-16T00:00:00Z
dcterms:created: 2006-09-16T00:00:00Z
custom:WorkbookGuid: e742a774-13a6-49b2-8ba3-1b6118163781
dcterms:modified: 2019-11-23T00:25:08Z
Last-Modified: 2019-11-23T00:25:08Z
Last-Save-Date: 2019-11-23T00:25:08Z
Application-Version: 15.0300
protected: false
meta:save-date: 2019-11-23T00:25:08Z
Application-Name: Microsoft Excel
modified: 2019-11-23T00:25:08Z
publisher:
Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
dc:publisher:
You must log in to post a comment.