From 9b61313581e117d8e6a2b5bfb2cd6f41b7474076 Mon Sep 17 00:00:00 2001 From: Herwig Hochleitner Date: Sun, 25 Nov 2018 15:33:56 +0100 Subject: [PATCH 01/23] integrate sax-js lib --- sax-js/build-updated.sh | 4 + sax-js/package.json | 9 + sax-js/sax.js | 1 + sax-js/webpack.config.js | 24 + .../clojure/clojure/data/xml/js/push.cljs | 77 + src/main/clojurescript/deps.cljs | 5 + .../resources/clojure/data/xml/sax.externs.js | 116 + src/main/resources/clojure/data/xml/sax.js | 4092 +++++++++++++++++ .../resources/clojure/data/xml/sax.min.js | 8 + 9 files changed, 4336 insertions(+) create mode 100755 sax-js/build-updated.sh create mode 100644 sax-js/package.json create mode 100644 sax-js/sax.js create mode 100644 sax-js/webpack.config.js create mode 100644 src/main/clojure/clojure/data/xml/js/push.cljs create mode 100644 src/main/clojurescript/deps.cljs create mode 100644 src/main/resources/clojure/data/xml/sax.externs.js create mode 100644 src/main/resources/clojure/data/xml/sax.js create mode 100644 src/main/resources/clojure/data/xml/sax.min.js diff --git a/sax-js/build-updated.sh b/sax-js/build-updated.sh new file mode 100755 index 0000000..95f3837 --- /dev/null +++ b/sax-js/build-updated.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +npm install --save-dev +./node_modules/.bin/webpack diff --git a/sax-js/package.json b/sax-js/package.json new file mode 100644 index 0000000..1776e39 --- /dev/null +++ b/sax-js/package.json @@ -0,0 +1,9 @@ +{ + "dependencies": { + "sax": "1.2.4" + }, + "devDependencies": { + "webpack": "4.26.0", + "webpack-cli": "3.1.2" + } +} diff --git a/sax-js/sax.js b/sax-js/sax.js new file mode 100644 index 0000000..9f72f89 --- /dev/null +++ b/sax-js/sax.js @@ -0,0 +1 @@ +module.exports = require("sax"); diff --git a/sax-js/webpack.config.js b/sax-js/webpack.config.js new file mode 100644 index 0000000..af37b32 --- /dev/null +++ b/sax-js/webpack.config.js @@ -0,0 +1,24 @@ +var buildPath = require("path").resolve("../src/main/resources/clojure/data/xml"); +module.exports = [{ + "mode": "none", + // "devtool": "source-map", + "entry": "./sax.js", + "node": false, + "output": { + "path": buildPath, + "filename": "sax.js", + "library": "CLOJURE_DATA_XML_SAX", + "libraryTarget": "var" + } +}, { + "mode": "production", + // "devtool": "source-map", + "entry": "./sax.js", + "node": false, + "output": { + "path": buildPath, + "filename": "sax.min.js", + "library": "CLOJURE_DATA_XML_SAX", + "libraryTarget": "var" + } +}]; diff --git a/src/main/clojure/clojure/data/xml/js/push.cljs b/src/main/clojure/clojure/data/xml/js/push.cljs new file mode 100644 index 0000000..6773010 --- /dev/null +++ b/src/main/clojure/clojure/data/xml/js/push.cljs @@ -0,0 +1,77 @@ +(ns clojure.data.xml.js.push + (:require + [clojure.data.xml.sax-js :as sax])) + +(defn parser [rfn init-state {:keys [strict trim normalize + lowercase xmlns position + strict-entities] + :or {strict true + trim false + normalize false + lowercase true + position true + strict-entities false + xmlns true}}] + (let [p (sax/parser strict #js {"trim" trim + "normalize" normalize + "lowercase" lowercase + "xmlns" xmlns + "position" position + "strictEntities" strict-entities}) + s (volatile! init-state)] + ;; OPEN TAG + (set! (.-onopentag p) + #(vswap! s rfn {:type :start + :name (.-name %) + :attributes (.-attributes %)})) + + ;; CLOSE TAG + (set! (.-onclosetag p) + #(vswap! s rfn {:type :end + :name %})) + + ;; GET TEXT + (set! (.-ontext p) + #(vswap! s rfn {:type :chars + :str %})) + + ;; CDATA HANDLING + (set! (.-oncdata p) + #(vswap! s rfn {:type :cdata + :str %})) + + ;; COMMENTS + (set! (.-oncomment p) + #(vswap! s rfn {:type :comment + :str %})) + + ;; END PARSING + (set! (.-onend p) + #(vswap! s rfn)) + + ;; ERROR + (set! (.-onerror p) + #(do + (vswap! s rfn {:type :error + :error %}) + (vswap! s rfn))) + + (fn + ([] + (.close p) + @s) + ([source-part] + (.write p source-part))))) + +(comment + + (let [p (parser conj [] {})] + (p "") + (p)) + + ) + diff --git a/src/main/clojurescript/deps.cljs b/src/main/clojurescript/deps.cljs new file mode 100644 index 0000000..cab2fd7 --- /dev/null +++ b/src/main/clojurescript/deps.cljs @@ -0,0 +1,5 @@ +{:foreign-libs [{:file "clojure/data/xml/sax.js" + :file-min "clojure/data/xml/sax.min.js" + :provides ["clojure.data.xml.sax-js"] + :global-exports {clojure.data.xml.sax-js CLOJURE_DATA_XML_SAX}}] + :externs ["clojure/data/xml/sax.externs.js"]} diff --git a/src/main/resources/clojure/data/xml/sax.externs.js b/src/main/resources/clojure/data/xml/sax.externs.js new file mode 100644 index 0000000..06a7150 --- /dev/null +++ b/src/main/resources/clojure/data/xml/sax.externs.js @@ -0,0 +1,116 @@ +/** + * @const + */ +var sax = function() {}; + +/** + * @constructor + * @dict + */ +sax._Node = function() {}; + +/** + * @constructor + */ +sax.SAXParser = function() {}; + +/** + * @constructor + */ +sax.SAXStream = function() {}; + +/** + * @this {null} + * @return {sax.SAXStream} + */ +sax.createStream = function() {}; + +/** + * @this {null} + * @return {sax.SAXParser} + */ +sax.parser = function() {}; + +/** + * @this {sax.SAXParser} + * @return {null} + */ +sax.SAXParser.prototype.write = function() {}; + +/** + * @this {sax.SAXParser} + * @return {null} + */ +sax.SAXParser.prototype.close = function() {}; + +/** + * @this {sax.SAXParser} + * @return {null} + */ +sax.SAXParser.prototype.flush = function() {}; + +/** + * @this {sax.SAXParser} + * @return {null} + */ +sax.SAXParser.prototype.resume = function() {}; + +/** + * @this {sax.SAXParser} + * @return {null} + */ +sax.SAXParser.prototype.end = function() {}; + +/** + * @this {sax.SAXParser} + */ +sax.SAXParser.prototype.onopentag = function() {}; + +/** + * @this {sax.SAXParser} + * @param {sax._Node} + * @type {function()} + */ +sax.SAXParser.prototype.onclosetag = function() {}; + +/** + * @this {sax.SAXParser} + * @param {sax._Node} + * @type {function()} + */ +sax.SAXParser.prototype.ontext = function() {}; + +/** + * @this {sax.SAXParser} + * @param {sax._Node} + * @type {function()} + */ +sax.SAXParser.prototype.onopencdata = function() {}; + +/** + * @this {sax.SAXParser} + * @param {sax._Node} + * @type {function()} + */ +sax.SAXParser.prototype.oncdata = function() {}; + +/** + * @this {sax.SAXParser} + * @param {sax._Node} + * @type {function()} + */ +sax.SAXParser.prototype.onclosecdata = function() {}; + +/** + * @this {sax.SAXParser} + * @param {sax._Node} + * @type {function()} + */ +sax.SAXParser.prototype.onend = function() {}; + +/** + * @this {sax.SAXParser} + * @param {sax._Node} + * @type {function()} + */ +sax.SAXParser.prototype.onerror = function() {}; diff --git a/src/main/resources/clojure/data/xml/sax.js b/src/main/resources/clojure/data/xml/sax.js new file mode 100644 index 0000000..56a894e --- /dev/null +++ b/src/main/resources/clojure/data/xml/sax.js @@ -0,0 +1,4092 @@ +var CLOJURE_DATA_XML_SAX = +/******/ (function(modules) { // webpackBootstrap +/******/ // The module cache +/******/ var installedModules = {}; +/******/ +/******/ // The require function +/******/ function __webpack_require__(moduleId) { +/******/ +/******/ // Check if module is in cache +/******/ if(installedModules[moduleId]) { +/******/ return installedModules[moduleId].exports; +/******/ } +/******/ // Create a new module (and put it into the cache) +/******/ var module = installedModules[moduleId] = { +/******/ i: moduleId, +/******/ l: false, +/******/ exports: {} +/******/ }; +/******/ +/******/ // Execute the module function +/******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__); +/******/ +/******/ // Flag the module as loaded +/******/ module.l = true; +/******/ +/******/ // Return the exports of the module +/******/ return module.exports; +/******/ } +/******/ +/******/ +/******/ // expose the modules object (__webpack_modules__) +/******/ __webpack_require__.m = modules; +/******/ +/******/ // expose the module cache +/******/ __webpack_require__.c = installedModules; +/******/ +/******/ // define getter function for harmony exports +/******/ __webpack_require__.d = function(exports, name, getter) { +/******/ if(!__webpack_require__.o(exports, name)) { +/******/ Object.defineProperty(exports, name, { enumerable: true, get: getter }); +/******/ } +/******/ }; +/******/ +/******/ // define __esModule on exports +/******/ __webpack_require__.r = function(exports) { +/******/ if(typeof Symbol !== 'undefined' && Symbol.toStringTag) { +/******/ Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' }); +/******/ } +/******/ Object.defineProperty(exports, '__esModule', { value: true }); +/******/ }; +/******/ +/******/ // create a fake namespace object +/******/ // mode & 1: value is a module id, require it +/******/ // mode & 2: merge all properties of value into the ns +/******/ // mode & 4: return value when already ns object +/******/ // mode & 8|1: behave like require +/******/ __webpack_require__.t = function(value, mode) { +/******/ if(mode & 1) value = __webpack_require__(value); +/******/ if(mode & 8) return value; +/******/ if((mode & 4) && typeof value === 'object' && value && value.__esModule) return value; +/******/ var ns = Object.create(null); +/******/ __webpack_require__.r(ns); +/******/ Object.defineProperty(ns, 'default', { enumerable: true, value: value }); +/******/ if(mode & 2 && typeof value != 'string') for(var key in value) __webpack_require__.d(ns, key, function(key) { return value[key]; }.bind(null, key)); +/******/ return ns; +/******/ }; +/******/ +/******/ // getDefaultExport function for compatibility with non-harmony modules +/******/ __webpack_require__.n = function(module) { +/******/ var getter = module && module.__esModule ? +/******/ function getDefault() { return module['default']; } : +/******/ function getModuleExports() { return module; }; +/******/ __webpack_require__.d(getter, 'a', getter); +/******/ return getter; +/******/ }; +/******/ +/******/ // Object.prototype.hasOwnProperty.call +/******/ __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); }; +/******/ +/******/ // __webpack_public_path__ +/******/ __webpack_require__.p = ""; +/******/ +/******/ +/******/ // Load entry module and return exports +/******/ return __webpack_require__(__webpack_require__.s = 0); +/******/ }) +/************************************************************************/ +/******/ ([ +/* 0 */ +/***/ (function(module, exports, __webpack_require__) { + +module.exports = __webpack_require__(1); + + +/***/ }), +/* 1 */ +/***/ (function(module, exports, __webpack_require__) { + +;(function (sax) { // wrapper for non-node envs + sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } + sax.SAXParser = SAXParser + sax.SAXStream = SAXStream + sax.createStream = createStream + + // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. + // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), + // since that's the earliest that a buffer overrun could occur. This way, checks are + // as rare as required, but as often as necessary to ensure never crossing this bound. + // Furthermore, buffers are only tested at most once per write(), so passing a very + // large string into write() might have undesirable effects, but this is manageable by + // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme + // edge case, result in creating at most one complete copy of the string passed in. + // Set to Infinity to have unlimited buffers. + sax.MAX_BUFFER_LENGTH = 64 * 1024 + + var buffers = [ + 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', + 'procInstName', 'procInstBody', 'entity', 'attribName', + 'attribValue', 'cdata', 'script' + ] + + sax.EVENTS = [ + 'text', + 'processinginstruction', + 'sgmldeclaration', + 'doctype', + 'comment', + 'opentagstart', + 'attribute', + 'opentag', + 'closetag', + 'opencdata', + 'cdata', + 'closecdata', + 'error', + 'end', + 'ready', + 'script', + 'opennamespace', + 'closenamespace' + ] + + function SAXParser (strict, opt) { + if (!(this instanceof SAXParser)) { + return new SAXParser(strict, opt) + } + + var parser = this + clearBuffers(parser) + parser.q = parser.c = '' + parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH + parser.opt = opt || {} + parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags + parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase' + parser.tags = [] + parser.closed = parser.closedRoot = parser.sawRoot = false + parser.tag = parser.error = null + parser.strict = !!strict + parser.noscript = !!(strict || parser.opt.noscript) + parser.state = S.BEGIN + parser.strictEntities = parser.opt.strictEntities + parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES) + parser.attribList = [] + + // namespaces form a prototype chain. + // it always points at the current tag, + // which protos to its parent tag. + if (parser.opt.xmlns) { + parser.ns = Object.create(rootNS) + } + + // mostly just for error reporting + parser.trackPosition = parser.opt.position !== false + if (parser.trackPosition) { + parser.position = parser.line = parser.column = 0 + } + emit(parser, 'onready') + } + + if (!Object.create) { + Object.create = function (o) { + function F () {} + F.prototype = o + var newf = new F() + return newf + } + } + + if (!Object.keys) { + Object.keys = function (o) { + var a = [] + for (var i in o) if (o.hasOwnProperty(i)) a.push(i) + return a + } + } + + function checkBufferLength (parser) { + var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) + var maxActual = 0 + for (var i = 0, l = buffers.length; i < l; i++) { + var len = parser[buffers[i]].length + if (len > maxAllowed) { + // Text/cdata nodes can get big, and since they're buffered, + // we can get here under normal conditions. + // Avoid issues by emitting the text node now, + // so at least it won't get any bigger. + switch (buffers[i]) { + case 'textNode': + closeText(parser) + break + + case 'cdata': + emitNode(parser, 'oncdata', parser.cdata) + parser.cdata = '' + break + + case 'script': + emitNode(parser, 'onscript', parser.script) + parser.script = '' + break + + default: + error(parser, 'Max buffer length exceeded: ' + buffers[i]) + } + } + maxActual = Math.max(maxActual, len) + } + // schedule the next check for the earliest possible buffer overrun. + var m = sax.MAX_BUFFER_LENGTH - maxActual + parser.bufferCheckPosition = m + parser.position + } + + function clearBuffers (parser) { + for (var i = 0, l = buffers.length; i < l; i++) { + parser[buffers[i]] = '' + } + } + + function flushBuffers (parser) { + closeText(parser) + if (parser.cdata !== '') { + emitNode(parser, 'oncdata', parser.cdata) + parser.cdata = '' + } + if (parser.script !== '') { + emitNode(parser, 'onscript', parser.script) + parser.script = '' + } + } + + SAXParser.prototype = { + end: function () { end(this) }, + write: write, + resume: function () { this.error = null; return this }, + close: function () { return this.write(null) }, + flush: function () { flushBuffers(this) } + } + + var Stream + try { + Stream = __webpack_require__(!(function webpackMissingModule() { var e = new Error("Cannot find module 'stream'"); e.code = 'MODULE_NOT_FOUND'; throw e; }())).Stream + } catch (ex) { + Stream = function () {} + } + + var streamWraps = sax.EVENTS.filter(function (ev) { + return ev !== 'error' && ev !== 'end' + }) + + function createStream (strict, opt) { + return new SAXStream(strict, opt) + } + + function SAXStream (strict, opt) { + if (!(this instanceof SAXStream)) { + return new SAXStream(strict, opt) + } + + Stream.apply(this) + + this._parser = new SAXParser(strict, opt) + this.writable = true + this.readable = true + + var me = this + + this._parser.onend = function () { + me.emit('end') + } + + this._parser.onerror = function (er) { + me.emit('error', er) + + // if didn't throw, then means error was handled. + // go ahead and clear error, so we can write again. + me._parser.error = null + } + + this._decoder = null + + streamWraps.forEach(function (ev) { + Object.defineProperty(me, 'on' + ev, { + get: function () { + return me._parser['on' + ev] + }, + set: function (h) { + if (!h) { + me.removeAllListeners(ev) + me._parser['on' + ev] = h + return h + } + me.on(ev, h) + }, + enumerable: true, + configurable: false + }) + }) + } + + SAXStream.prototype = Object.create(Stream.prototype, { + constructor: { + value: SAXStream + } + }) + + SAXStream.prototype.write = function (data) { + if (typeof Buffer === 'function' && + typeof Buffer.isBuffer === 'function' && + Buffer.isBuffer(data)) { + if (!this._decoder) { + var SD = __webpack_require__(2).StringDecoder + this._decoder = new SD('utf8') + } + data = this._decoder.write(data) + } + + this._parser.write(data.toString()) + this.emit('data', data) + return true + } + + SAXStream.prototype.end = function (chunk) { + if (chunk && chunk.length) { + this.write(chunk) + } + this._parser.end() + return true + } + + SAXStream.prototype.on = function (ev, handler) { + var me = this + if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) { + me._parser['on' + ev] = function () { + var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments) + args.splice(0, 0, ev) + me.emit.apply(me, args) + } + } + + return Stream.prototype.on.call(me, ev, handler) + } + + // this really needs to be replaced with character classes. + // XML allows all manner of ridiculous numbers and digits. + var CDATA = '[CDATA[' + var DOCTYPE = 'DOCTYPE' + var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' + var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/' + var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } + + // http://www.w3.org/TR/REC-xml/#NT-NameStartChar + // This implementation works on strings, a single character at a time + // as such, it cannot ever support astral-plane characters (10000-EFFFF) + // without a significant breaking change to either this parser, or the + // JavaScript language. Implementation of an emoji-capable xml parser + // is left as an exercise for the reader. + var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ + + var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ + + var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ + var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ + + function isWhitespace (c) { + return c === ' ' || c === '\n' || c === '\r' || c === '\t' + } + + function isQuote (c) { + return c === '"' || c === '\'' + } + + function isAttribEnd (c) { + return c === '>' || isWhitespace(c) + } + + function isMatch (regex, c) { + return regex.test(c) + } + + function notMatch (regex, c) { + return !isMatch(regex, c) + } + + var S = 0 + sax.STATE = { + BEGIN: S++, // leading byte order mark or whitespace + BEGIN_WHITESPACE: S++, // leading whitespace + TEXT: S++, // general stuff + TEXT_ENTITY: S++, // & and such. + OPEN_WAKA: S++, // < + SGML_DECL: S++, // + SCRIPT: S++, //