# ExcelToXML

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
package com.jbn;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

public class ExcelToXML {
private static final DecimalFormat DECIMAL_FORMAT = new DecimalFormat("#");
private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd");

public List<List<String>> readExcelToList(String filePath, String sheetName) throws IOException {
try (FileInputStream fileInputStream = new FileInputStream(filePath)) {

Workbook workbook = filePath.toLowerCase().endsWith("xlsx") ? new XSSFWorkbook(fileInputStream)
: new HSSFWorkbook(fileInputStream);

List<List<String>> list = new ArrayList<>();
Sheet sheet = workbook.getSheet(sheetName);

if (sheet == null) {
throw new IllegalArgumentException("Sheet not found: " + sheetName);
}

int colNum = sheet.getRow(0).getLastCellNum();
for (int i = 0; i < sheet.getLastRowNum() + 1; i++) {
Row row = sheet.getRow(i);
if (row == null) {
continue;
}

List<String> listRow = new ArrayList<>();
for (int j = 0; j < colNum; j++) {
Cell cell = row.getCell(j, Row.MissingCellPolicy.CREATE_NULL_AS_BLANK);

String cellValue = formatCellValue(cell);

if (i == 0) {
cellValue = cellValue.toLowerCase();
}

listRow.add(cellValue);
}
list.add(listRow);
}

workbook.close();

return list;
}
}

public void writeListToXML(List<List<String>> list, String filePath)
throws ParserConfigurationException, IOException {

DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.newDocument();

doc.setXmlStandalone(true);
Element rootElement = doc.createElement("records");
doc.appendChild(rootElement);

List<String> tags = list.get(0);
System.out.println(tags);
for (int i = 1; i < list.size(); i++) {
Element recordElement = doc.createElement("record");
rootElement.appendChild(recordElement);

List<String> values = list.get(i);
System.out.println(values);
for (int j = 0; j < values.size(); j++) {
Element element = doc.createElement(sanitizeTagName(tags.get(j)));
element.setTextContent(values.get(j));
recordElement.appendChild(element);
}
}

try (FileOutputStream output = new FileOutputStream(filePath)) {
writeXml(doc, output);
}
}

private static String sanitizeTagName(String tagName) {
return tagName.replaceAll("[^\\w\\d]", "_");
}

private static String formatCellValue(Cell cell) {
if (cell == null)
return "";

switch (cell.getCellType()) {
case STRING:
return cell.getStringCellValue().trim();
case BOOLEAN:
return String.valueOf(cell.getBooleanCellValue());
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
return DATE_FORMAT.format(cell.getDateCellValue());
} else {
return DECIMAL_FORMAT.format(cell.getNumericCellValue());
}
case BLANK:
return "";
case ERROR:
return "Error";
default:
return "";
}
}

private static void writeXml(Document doc, OutputStream output) {

try {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(output);

transformer.transform(source, result);
} catch (Exception e) {
e.printStackTrace();
}

}

public static void main(String args[]) {
try {
ExcelToXML excelToXML = new ExcelToXML();
List<List<String>> list = excelToXML
.readExcelToList("D:\\study\\java\\excel-to-xml\\src\\main\\resources\\test.xlsx", "records");
excelToXML.writeListToXML(list, "D:\\study\\java\\excel-to-xml\\src\\main\\resources\\test.xml");
} catch (Exception e) {
e.printStackTrace();
}
}
}

# Reference

Apache POI Quick Guide

Baeldung Apache POI

Apache POI: Read and Write Excel File

Edited on