1 package org.opentrafficsim.editor;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.net.URI;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.Optional;
9
10 import javax.xml.parsers.DocumentBuilder;
11 import javax.xml.parsers.DocumentBuilderFactory;
12 import javax.xml.parsers.ParserConfigurationException;
13
14 import org.w3c.dom.Document;
15 import org.w3c.dom.Node;
16 import org.xml.sax.SAXException;
17
18 /**
19 * Utility class to read XSD or XML from URI. There are also methods to obtain certain information from a node.
20 * <p>
21 * Copyright (c) 2023-2024 Delft University of Technology, PO Box 5, 2600 AA, Delft, the Netherlands. All rights reserved. <br>
22 * BSD-style license. See <a href="https://opentrafficsim.org/docs/license.html">OpenTrafficSim License</a>.
23 * </p>
24 * @author <a href="https://github.com/wjschakel">Wouter Schakel</a>
25 */
26 public final class DocumentReader
27 {
28
29 /**
30 * Private constructor.
31 */
32 private DocumentReader()
33 {
34
35 }
36
37 /**
38 * Opens an XSD or XML file.
39 * @param file file.
40 * @return document, i.e. the root of the XSD file.
41 * @throws SAXException exception
42 * @throws IOException exception
43 * @throws ParserConfigurationException exception
44 */
45 public static Document open(final URI file) throws SAXException, IOException, ParserConfigurationException
46 {
47 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
48 // dbf.setXIncludeAware(true);
49 dbf.setIgnoringComments(true);
50 dbf.setIgnoringElementContentWhitespace(true);
51 DocumentBuilder db = dbf.newDocumentBuilder();
52 Document doc = db.parse(new File(file));
53 return doc;
54 }
55
56 /**
57 * Returns the attribute of a node. This is short for:
58 *
59 * <pre>
60 * Optional.ofNullable(node.hasAttributes() && node.getAttributes().getNamedItem(name) != null
61 * ? node.getAttributes().getNamedItem(name).getNodeValue() : null);
62 * </pre>
63 *
64 * @param node node.
65 * @param name attribute name.
66 * @return value of the attribute in the node.
67 */
68 public static Optional<String> getAttribute(final Node node, final String name)
69 {
70 return Optional.ofNullable(node.hasAttributes() && node.getAttributes().getNamedItem(name) != null
71 ? node.getAttributes().getNamedItem(name).getNodeValue() : null);
72 }
73
74 /**
75 * Returns a child node of specified type. It should be a type of which there may be only one.
76 * @param node node
77 * @param type child type, e.g. xsd:complexType.
78 * @return child node of specified type, empty if no such child.
79 */
80 public static Optional<Node> getChild(final Node node, final String type)
81 {
82 if (node.hasChildNodes())
83 {
84 for (int childIndex = 0; childIndex < node.getChildNodes().getLength(); childIndex++)
85 {
86 Node child = node.getChildNodes().item(childIndex);
87 if (child.getNodeName().equals(type))
88 {
89 return Optional.of(child);
90 }
91 }
92 }
93 return Optional.empty();
94 }
95
96 /**
97 * Returns child nodes of specified type.
98 * @param node node
99 * @param type child type, e.g. xsd:field.
100 * @return child nodes of specified type, empty {@code List} of no such child.
101 */
102 public static List<Node> getChildren(final Node node, final String type)
103 {
104 ArrayList<Node> children = new ArrayList<>();
105 if (node.hasChildNodes())
106 {
107 for (int childIndex = 0; childIndex < node.getChildNodes().getLength(); childIndex++)
108 {
109 Node child = node.getChildNodes().item(childIndex);
110 if (child.getNodeName().equals(type))
111 {
112 children.add(child);
113 }
114 }
115 }
116 return children;
117 }
118
119 /**
120 * Remove HTML tags from string.
121 * @param string input string
122 * @return string with HTML tags removed, or {@code null} if the input is {@code null}
123 */
124 public static String filterHtml(final String string)
125 {
126 return string == null ? null : string.replaceAll("\\<[^>]*>", "");
127 }
128
129 /**
130 * Types of annotation elements the {@code DocumentReader} can read. This is a combination of the element name (e.g.
131 * {@code xsd:appinfo}) and the source {@code name}.
132 *
133 * <pre>
134 * <xsd:sequence>
135 * <xsd:annotation>
136 * <xsd:appinfo source="name">annotates the sequence</xsd:appinfo>
137 * </xsd:annotation>
138 * </xsd:sequence>
139 * </pre>
140 */
141 public enum NodeAnnotation
142 {
143 /** Element xsd:documentation. */
144 DESCRIPTION("xsd:documentation", "description"),
145
146 /** Element xsd:appinfo. */
147 APPINFO_NAME("xsd:appinfo", "name"),
148
149 /** Element xsd:appinfo. */
150 APPINFO_PATTERN("xsd:appinfo", "pattern");
151
152 /** Element name. */
153 private final String elementName;
154
155 /** Source. */
156 private final String source;
157
158 /**
159 * Constructor.
160 * @param elementName element name
161 * @param source source name
162 */
163 NodeAnnotation(final String elementName, final String source)
164 {
165 this.elementName = elementName;
166 this.source = source;
167 }
168
169 /**
170 * Returns an annotation value. These are defined as below, for either xsd:appinfo or xsd:documentation. All space-like
171 * characters are replaced by blanks, and consecutive blanks are removed.
172 *
173 * <pre>
174 * <xsd:sequence>
175 * <xsd:annotation>
176 * <xsd:appinfo source="name">annotates the sequence</xsd:appinfo>
177 * </xsd:annotation>
178 * </xsd:sequence>
179 * </pre>
180 *
181 * @param node node, either xsd:element or xsd:attribute.
182 * @return annotation value, empty if not found.
183 */
184 public Optional<String> get(final Node node)
185 {
186 for (Node child : DocumentReader.getChildren(node, "xsd:annotation"))
187 {
188 for (Node annotation : DocumentReader.getChildren(child, this.elementName))
189 {
190 Optional<String> appInfoSource = DocumentReader.getAttribute(annotation, "source");
191 if (appInfoSource.isPresent() && appInfoSource.get().equals(this.source))
192 {
193 StringBuilder str = new StringBuilder();
194 for (int appIndex = 0; appIndex < annotation.getChildNodes().getLength(); appIndex++)
195 {
196 Node appInfo = annotation.getChildNodes().item(appIndex);
197 if (appInfo.getNodeName().equals("#text"))
198 {
199 str.append(appInfo.getNodeValue());
200 }
201 }
202 // tabs, line break, etc. to blanks, then remove consecutive blanks, then trailing/leading blanks
203 return Optional.of(str.toString().replaceAll("\\s", " ").replaceAll("\\s{2,}", " ").trim());
204 }
205 }
206 }
207 return Optional.empty();
208 }
209 }
210
211 }