public class XMLUtils
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
static class |
XMLUtils.XMLTag |
Modifier and Type | Method and Description |
---|---|
static java.lang.String |
escapeAttributeXML(java.lang.String in)
Returns a String in which some XML special characters have been
escaped.
|
static java.lang.String |
escapeElementXML(java.lang.String in)
Returns a String in which some the XML special characters have been
escaped: just the ones that need escaping in an element content.
|
static java.lang.String |
escapeTextAroundXMLTags(java.lang.String s) |
static java.lang.String |
escapeXML(java.lang.String in)
Returns a String in which all the XML special characters have been
escaped.
|
static int |
findSpace(java.lang.String haystack,
int begin)
return either the first space or the first nbsp
|
static java.util.List<org.w3c.dom.Element> |
getTagElementsFromFile(java.io.File f,
java.lang.String tag)
Returns the text content of all nodes in the given file with the given tag.
|
static java.util.List<Triple<java.lang.String,org.w3c.dom.Element,java.lang.String>> |
getTagElementTriplesFromFile(java.io.File f,
java.lang.String tag)
Returns the elements in the given file with the given tag associated with
the text content of the two previous siblings and two next siblings.
|
static java.util.List<Triple<java.lang.String,org.w3c.dom.Element,java.lang.String>> |
getTagElementTriplesFromFileNumBounded(java.io.File f,
java.lang.String tag,
int num)
Returns the elements in the given file with the given tag associated with
the text content of the previous and next siblings up to max numIncludedSiblings.
|
static java.util.List<Triple<java.lang.String,org.w3c.dom.Element,java.lang.String>> |
getTagElementTriplesFromFileNumBoundedSAXException(java.io.File f,
java.lang.String tag,
int numIncludedSiblings)
Returns the elements in the given file with the given tag associated with
the text content of the previous and next siblings up to max numIncludedSiblings.
|
static java.util.List<Triple<java.lang.String,org.w3c.dom.Element,java.lang.String>> |
getTagElementTriplesFromFileSAXException(java.io.File f,
java.lang.String tag)
Returns the elements in the given file with the given tag associated with
the text content of the two previous siblings and two next siblings.
|
static java.util.List<java.lang.String> |
getTextContentFromTagsFromFile(java.io.File f,
java.lang.String tag)
Returns the text content of all nodes in the given file with the given tag.
|
static javax.xml.parsers.DocumentBuilder |
getValidatingXmlParser(java.io.File schemaFile)
Returns a validating XML parser given an XSD (not DTD!).
|
static javax.xml.parsers.DocumentBuilder |
getXmlParser()
Returns a non-validating XML parser.
|
static boolean |
isBreaking(java.lang.String tag) |
static boolean |
isBreaking(XMLUtils.XMLTag tag) |
static void |
main(java.lang.String[] args)
Tests a few methods.
|
static XMLUtils.XMLTag |
parseTag(java.lang.String tagString) |
static XMLUtils.XMLTag |
readAndParseTag(java.io.Reader r) |
static org.w3c.dom.Document |
readDocumentFromFile(java.lang.String filename) |
static org.w3c.dom.Document |
readDocumentFromString(java.lang.String s) |
static java.lang.String |
readTag(java.io.Reader r)
Reads all text of the XML tag and returns it as a String.
|
static java.lang.String |
readUntilTag(java.io.Reader r)
Reads all text up to next XML tag and returns it as a String.
|
static javax.xml.parsers.DocumentBuilderFactory |
safeDocumentBuilderFactory() |
static java.lang.String |
stripTags(java.io.Reader r,
java.util.List<java.lang.Integer> mapBack,
boolean markLineBreaks) |
static java.lang.String |
unescapeStringForXML(java.lang.String s) |
public static javax.xml.parsers.DocumentBuilderFactory safeDocumentBuilderFactory()
public static java.util.List<java.lang.String> getTextContentFromTagsFromFile(java.io.File f, java.lang.String tag)
public static java.util.List<org.w3c.dom.Element> getTagElementsFromFile(java.io.File f, java.lang.String tag)
public static java.util.List<Triple<java.lang.String,org.w3c.dom.Element,java.lang.String>> getTagElementTriplesFromFile(java.io.File f, java.lang.String tag)
Triple<String, Element, String>
Targeted elements surrounded
by the text content of the two previous siblings and two next siblings.public static java.util.List<Triple<java.lang.String,org.w3c.dom.Element,java.lang.String>> getTagElementTriplesFromFileNumBounded(java.io.File f, java.lang.String tag, int num)
Triple<String, Element, String>
Targeted elements surrounded
by the text content of the two previous siblings and two next siblings.public static java.util.List<Triple<java.lang.String,org.w3c.dom.Element,java.lang.String>> getTagElementTriplesFromFileSAXException(java.io.File f, java.lang.String tag) throws org.xml.sax.SAXException
Triple<String, Element, String>
Targeted elements surrounded
by the text content of the two previous siblings and two next siblings.org.xml.sax.SAXException
- if tag doesn't exist in the file.public static java.util.List<Triple<java.lang.String,org.w3c.dom.Element,java.lang.String>> getTagElementTriplesFromFileNumBoundedSAXException(java.io.File f, java.lang.String tag, int numIncludedSiblings) throws org.xml.sax.SAXException
Triple<String, Element, String>
Targeted elements surrounded
by the text content of the two previous siblings and two next siblings.org.xml.sax.SAXException
- if tag doesn't exist in the file.public static javax.xml.parsers.DocumentBuilder getXmlParser()
public static javax.xml.parsers.DocumentBuilder getValidatingXmlParser(java.io.File schemaFile)
schemaFile
- File wit hXML schemapublic static java.lang.String stripTags(java.io.Reader r, java.util.List<java.lang.Integer> mapBack, boolean markLineBreaks)
r
- the reader to read the XML/HTML frommapBack
- a List of Integers mapping the positions in the result buffer
to positions in the original Reader, will be cleared on receiptpublic static boolean isBreaking(java.lang.String tag)
public static boolean isBreaking(XMLUtils.XMLTag tag)
public static java.lang.String readUntilTag(java.io.Reader r) throws java.io.IOException
java.io.IOException
public static XMLUtils.XMLTag readAndParseTag(java.io.Reader r) throws java.io.IOException
java.io.IOException
public static java.lang.String unescapeStringForXML(java.lang.String s)
public static java.lang.String escapeXML(java.lang.String in)
in
- The String to escapepublic static java.lang.String escapeElementXML(java.lang.String in)
in
- The String to escapepublic static java.lang.String escapeAttributeXML(java.lang.String in)
in
- The String to escapepublic static java.lang.String escapeTextAroundXMLTags(java.lang.String s)
public static int findSpace(java.lang.String haystack, int begin)
public static java.lang.String readTag(java.io.Reader r) throws java.io.IOException
r
- The reader to read from<TXT>
java.io.IOException
public static XMLUtils.XMLTag parseTag(java.lang.String tagString)
public static org.w3c.dom.Document readDocumentFromFile(java.lang.String filename) throws javax.xml.parsers.ParserConfigurationException, org.xml.sax.SAXException
javax.xml.parsers.ParserConfigurationException
org.xml.sax.SAXException
public static org.w3c.dom.Document readDocumentFromString(java.lang.String s) throws javax.xml.parsers.ParserConfigurationException, org.xml.sax.SAXException
javax.xml.parsers.ParserConfigurationException
org.xml.sax.SAXException
public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception