1 
  2 ////////////////////////////////////////////////////////////////////////////////
  3 /**
  4  * @fileOverview Copyright (C) 2009 www.webis.de<br/>
  5  * Package creation, needs to be placed in global scope first order.
  6  * @author Alexander Kuemmel alexander.kuemmel@uni-weimar.de
  7  * @author Christian Fricke christian.fricke@uni-weimar.de
  8  */
  9 
 10 if(typeof(de) === "undefined"){ de = {}; }
 11 
 12 if(typeof(de.aitools) === "undefined"){ de.aitools = {}; }
 13 
 14 if(typeof(de.aitools.js) === "undefined"){
 15   /**
 16    * @static
 17    * @namespace de.aitools.js contains the aitools javascript implementations
 18    */
 19   de.aitools.js = {};
 20 }
 21 
 22 // PackageCreate.js
 23 ////////////////////////////////////////////////////////////////////////////////
 24 
 25 ////////////////////////////////////////////////////////////////////////////////
 26 // Decomposition.js
 27 // Copyright (C) 2009 www.webis.de
 28 
 29 /**
 30  * Contains some Delimiter classes to be used in decomposition.
 31  * @see de.aitools.js.decompose
 32  * @author Christof Braeutigam christof.braeutigam@uni-weimar.de
 33  * @static
 34  */
 35 de.aitools.js.Delimiter = {
 36   // INFO: Auch Kombinationen wie ?!? werden mit dem [...]+ als ein Delimiter
 37   //       erkannt, das koennte machmal nicht im Sinne des Erfinders sein.
 38   //       Den Backspace gibt es als escape sequence (\b) aber nicht als regex,
 39   //       dort ist \b eine Wortgrenze.
 40   CommonDelimiter             : " <>|!\"$%&/()=?`#'+-*~@}][{\t\n\r\f\b:;.,\"",
 41   CommonDelimiterRe           : /[\]\[ <>|!\"$%&\/()=?`#'+\-*~@}{\t\n\r\f:;.,]+/,
 42   WordDelimiter               : " !?.,;:\"",
 43   WordDelimiterRe             : /[ !?.,;:\"]+/,
 44   SectionDelimiter            : " !?.,;:",
 45   SectionDelimiterRe          : /[ !?.,;:]+/,
 46   SentenceDelimiter           : "!?.;:",
 47   SentenceDelimiterRe         : /[!?.;:]+/,
 48   NumberDelimiter             : " .!?\t\n\r",
 49   NumberDelimiterRe           : /[ .!?\t\n\r]+/,
 50   AdditionalNumberDelimiter   : "%$",
 51   AdditionalNumberDelimiterRe : /[%$]+/,
 52   NGramDemlimiterAll          : " -.,;<>|!\"$%&\/()=?`#'+*~@}][{\t\n\r\f\b_",
 53   NGramDemlimiterAllRe        : /[ \-.,;<>|!\"$%&\/()=?`#'+*~@}\]\[{\t\n\r\f_]+/,
 54   Digits                      : "0123456789",
 55   DigitsRe                    : /[0123456789]+/,
 56   Blank                       : " ",
 57   BlankRe                     : / +/,
 58   PeriodComma                 : ".,",
 59   PeriodCommare               : /[.,]+/
 60 };
 61 
 62 /**
 63  * Decomposes the given text with respect to the given delimiters.
 64  * @see de.aitools.js.Delimiter
 65  * @param {string} text Text to decompose.
 66  * @param {regex} delimiters The delimiters given as JS-regex.
 67  * @returns {array} The decomposed text as an array of strings.
 68  * @author Christof Braeutigam christof.braeutigam@uni-weimar.de
 69  * @static
 70  */
 71 de.aitools.js.decompose = function (text, delimiters) {
 72   if (delimiters === undefined) {
 73     delimiters = de.aitools.js.Delimiter.CommonDelimiterRe;
 74   }
 75   return text.split(delimiters);
 76 };
 77 
 78 // Decomposition.js
 79 ////////////////////////////////////////////////////////////////////////////////
 80