diff --git a/src/XMLDocument.h b/src/XMLDocument.h index 36e8c280c..b2e7d115b 100644 --- a/src/XMLDocument.h +++ b/src/XMLDocument.h @@ -34,6 +34,7 @@ #include +#include #include namespace digidoc { @@ -308,9 +309,13 @@ struct XMLDocument: public unique_free_d, public XMLNode d = {}; } - XMLDocument(const std::string &path, const XMLName &n = {}) noexcept - : XMLDocument(path.empty() ? nullptr : xmlParseFile(path.c_str()), n) - {} + XMLDocument(const std::string &path, const XMLName &n = {}) + { + if(path.empty()) + return; + if(std::ifstream f{path}) + *this = openStream(f, n); + } template static XMLDocument open(F &&f, const XMLName &name = {}, bool hugeFile = false) @@ -326,19 +331,42 @@ struct XMLDocument: public unique_free_d, public XMLNode } }, nullptr, &f, XML_CHAR_ENCODING_NONE)); ctxt->options |= XML_PARSE_NOENT|XML_PARSE_DTDLOAD|XML_PARSE_DTDATTR|XML_PARSE_NONET|XML_PARSE_NODICT; + if(hugeFile) + ctxt->options |= XML_PARSE_HUGE; #if LIBXML_VERSION >= 21300 ctxt->options |= XML_PARSE_NO_XXE; #else ctxt->loadsubset |= XML_DETECT_IDS|XML_COMPLETE_ATTRS; -#endif - if(hugeFile) - { - ctxt->options |= XML_PARSE_HUGE; -#if LIBXML_VERSION < 21300 - if(ctxt->sax) - ctxt->sax->entityDecl = nullptr; -#endif + if(ctxt->sax) { + ctxt->sax->entityDecl = [](void *ctx, const xmlChar *name, int type, + const xmlChar *publicId, const xmlChar *systemId, + xmlChar *content) noexcept { + auto *ctxt = static_cast(ctx); + auto *doc = ctxt->myDoc; + if(!doc) return; + if((ctxt->options & XML_PARSE_HUGE) && + (type == XML_INTERNAL_GENERAL_ENTITY || type == XML_INTERNAL_PARAMETER_ENTITY)) { + ctxt->wellFormed = 0; + xmlStopParser(ctxt); + return; + } + if(type == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddDocEntity(doc, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, (const xmlChar*)""); + else + xmlAddDocEntity(doc, name, type, publicId, systemId, content); + }; + ctxt->sax->resolveEntity = [](void*, const xmlChar*, const xmlChar*) noexcept -> xmlParserInputPtr { + return nullptr; + }; + ctxt->sax->getEntity = [](void *ctx, const xmlChar *name) noexcept -> xmlEntityPtr { + auto *ctxt = static_cast(ctx); + auto *ent = xmlGetDocEntity(ctxt->myDoc, name); + if(!ent && ctxt->myDoc) + ent = xmlAddDocEntity(ctxt->myDoc, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, (const xmlChar*)""); + return ent; + }; } +#endif auto result = xmlParseDocument(ctxt.get()); if(result != 0 || !ctxt->wellFormed) { diff --git a/src/XmlConf.cpp b/src/XmlConf.cpp index c3c31fcff..ec27e4e90 100644 --- a/src/XmlConf.cpp +++ b/src/XmlConf.cpp @@ -124,7 +124,9 @@ XmlConf::Private::Private(Conf *self, const string &path, string schema) auto XmlConf::Private::loadDoc(const string &path) const { LIBXML_TEST_VERSION - auto doc = XMLDocument(path, {"configuration"}); + XMLDocument doc; + try { doc = XMLDocument(path, {"configuration"}); } + catch(const Exception &) {} if(!doc) { WARN("Failed to parse configuration: %s", path.c_str()); diff --git a/test/data/xxe-sentinel-dtd.dtd b/test/data/xxe-sentinel-dtd.dtd new file mode 100644 index 000000000..e821c9ebc --- /dev/null +++ b/test/data/xxe-sentinel-dtd.dtd @@ -0,0 +1 @@ + diff --git a/test/data/xxe-sentinel.txt b/test/data/xxe-sentinel.txt new file mode 100644 index 000000000..6ffa85cd3 --- /dev/null +++ b/test/data/xxe-sentinel.txt @@ -0,0 +1 @@ +XXESENTINEL diff --git a/test/libdigidocpp_boost.cpp b/test/libdigidocpp_boost.cpp index d449ef5e4..2d295b10c 100644 --- a/test/libdigidocpp_boost.cpp +++ b/test/libdigidocpp_boost.cpp @@ -605,8 +605,8 @@ BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE(XMLTestSuite) BOOST_AUTO_TEST_CASE(XMLBomb) { - BOOST_CHECK_EQUAL(XMLDocument("xml-bomb-attr.xml"), false); - BOOST_CHECK_EQUAL(XMLDocument("xml-bomb-cont.xml"), false); + BOOST_CHECK_THROW(XMLDocument("xml-bomb-attr.xml"), Exception); + BOOST_CHECK_THROW(XMLDocument("xml-bomb-cont.xml"), Exception); if(std::fstream f{"xml-bomb-attr.xml"}) BOOST_CHECK_THROW(XMLDocument::openStream(f), Exception); if(std::fstream f{"xml-bomb-cont.xml"}) @@ -616,4 +616,70 @@ BOOST_AUTO_TEST_CASE(XMLBomb) if(std::fstream f{"xml-bomb-cont.xml"}) BOOST_CHECK_THROW(XMLDocument::openStream(f, {}, true), Exception); } +BOOST_AUTO_TEST_CASE(XMLXXE) +{ + BOOST_REQUIRE(fs::exists("xxe-sentinel.txt")); + BOOST_REQUIRE(fs::exists("xxe-sentinel-dtd.dtd")); + + // Absolute file:// URIs so the entity loader can resolve them even from a + // stream context (which has no base URL). Without protection these files + // would be loaded and XXESENTINEL would appear in the document; with + // protection they are blocked before any file access occurs. + auto fileUri = [](const char *name) { + auto s = fs::absolute(name).generic_string(); + return s.front() == '/' ? "file://" + s : "file:///" + s; + }; + const auto sentinelUri = fileUri("xxe-sentinel.txt"); + const auto dtdUri = fileUri("xxe-sentinel-dtd.dtd"); + + // Each payload would leak XXESENTINEL into if the relevant XXE + // vector is not blocked. + const std::string payloads[] = { + // General entity via local file + "" + "]>" + "&xxe;", + // General entity via network (blocked by XML_PARSE_NONET) + "" + "]>" + "&xxe;", + // External DTD subset that defines &sentinel; as XXESENTINEL + "" + "" + "&sentinel;", + // Parameter entity that injects the DTD to define &sentinel; + "" + "" + "%sentinel;" + "]>&sentinel;", + }; + + size_t payloadIndex = 0; + for(const auto &payload : payloads) { + for(bool huge : {false, true}) { + std::istringstream is{std::string(payload)}; + auto doc = XMLDocument::openStream(is, {}, huge); + xmlChar *raw = xmlNodeGetContent(doc.d); + std::string_view text = raw ? (const char*)raw : ""; + BOOST_CHECK(!text.contains("XXESENTINEL")); + xmlFree(raw); + } + + const auto path = (fs::temp_directory_path() / + ("libdigidocpp-xxe-payload-" + std::to_string(payloadIndex++) + ".xml")).string(); + std::ofstream out{path, std::ofstream::binary}; + BOOST_REQUIRE(out.is_open()); + out << payload; + out.close(); + BOOST_REQUIRE(out.good()); + + auto doc = XMLDocument(path); + xmlChar *raw = xmlNodeGetContent(doc.d); + std::string_view text = raw ? (const char*)raw : ""; + BOOST_CHECK(!text.contains("XXESENTINEL")); + xmlFree(raw); + fs::remove(path); + } +} BOOST_AUTO_TEST_SUITE_END()