nregex.cpp

00001 // LICENSE: (Please see the file COPYING for details)
00002 //
00003 // NUS - Nemesis Utilities System: A C++ application development framework 
00004 // Copyright (C) 2006, 2008 Otavio Rodolfo Piske
00005 //
00006 //  This file is part of NUS
00007 //
00008 //  This library is free software; you can redistribute it and/or
00009 //  modify it under the terms of the GNU Lesser General Public
00010 //  License as published by the Free Software Foundation version 2.1
00011 //  of the License.
00012 //
00013 //  This library is distributed in the hope that it will be useful,
00014 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 //  Lesser General Public License for more details.
00017 //
00018 //  You should have received a copy of the GNU Lesser General Public
00019 //  License along with this library; if not, write to the Free Software
00020 //  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00021 //
00022 #include "nregex.h"
00023 
00024 const nint32 NRegex::NO_MATCH = -1;
00025 const nint32 NRegex::MATCH_COUNT_MAX = 2048;
00026 
00027 NRegex::NRegex(const NString &reg, const NString &str)
00028       : m_reg(),
00029       m_str(str),
00030       m_offsets()
00031 {
00032       int reg_ret = 0;
00033 
00034       reg_ret = regcomp(&m_reg , reg.toChar(), REG_EXTENDED | REG_NOSUB);
00035       if (reg_ret != 0) {
00036             throw NException(getError(reg_ret), NException::BASE);
00037       }
00038 }
00039 
00040 NString NRegex::getError(int retval) {
00041       NString ret;      
00042 
00043       switch (retval) { 
00044             case REG_BADBR: 
00045                   ret = "[REG_BADBR] Invalid use of back reference operator";
00046                   break;
00047             case REG_BADPAT:
00048                   ret = "[REG_BADPAT] Invalid use of pattern operators such as group or list";
00049                   break;
00050             case REG_BADRPT:
00051                         ret = "Invalid  use  of  repetition operators such as "
00052                   "using `*' as the first character.";
00053                   break;
00054             case REG_EBRACE:
00055                   ret = "[REG_EBRACE] Un-matched brace interval operators.";
00056                   break;
00057             case REG_EBRACK:
00058                   ret = "[REG_EBRACK] Un-matched bracket list operators.";
00059                   break;
00060             case REG_ECOLLATE:
00061                   ret = "[REG_ECOLLATE] Invalid collating element.";
00062                   break;
00063             case REG_ECTYPE:
00064                   ret = "[REG_ECTYPE] Unknown character class name.";
00065                   break;
00066 #ifndef __APPLE__
00067             case REG_EEND:
00068                   ret = "[REG_EEND] Non specific error. This is not "
00069                   "defined by POSIX.2.";
00070                   break;
00071 #endif // __APPLE__
00072             case REG_EESCAPE:
00073                   ret = "[REG_EESCAPE] Trailing backslash.";
00074                   break;
00075             case REG_EPAREN:
00076                   ret = "[REG_EPAREN] Un-matched parenthesis group operators.";
00077                   break;
00078             case REG_ERANGE:
00079                   ret = "[REG_ERANGE] Invalid use of the range operator, "
00080                   "eg. the ending point  of  the range occurs prior to "
00081                   "the starting point.";
00082                   break;
00083 #ifndef __APPLE__
00084             case REG_ESIZE:
00085                   ret = "[REG_ESIZE] Compiled  regular  expression  "
00086                   "requires  a pattern buffer larger than 64Kb.  This is "
00087                   "not defined by POSIX.2.";
00088                   break;
00089 #endif // __APPLE__
00090             case REG_ESPACE:
00091                   ret = "[REG_ESPACE] The regex routines ran out of memory.";
00092                   break;
00093             case REG_ESUBREG:
00094                   ret = "[REG_ESUBREG] Invalid back reference to a subexpression";
00095                   break;
00096       }
00097 
00098       
00099       ret = ret + ": " + getDetailedError(retval);
00100       return ret;
00101 }
00102 
00103 NString NRegex::getDetailedError(int retval) {
00104       size_t len;
00105       char *buffer = NULL;
00106       NString ret;
00107 
00108       len = regerror (retval, &m_reg, NULL, 0);
00109       buffer = (char *) malloc (len);     
00110       
00111       if (!buffer) {
00112             //NWarning::print() << "Not enough memory for detailed error";
00113             
00114             return ret;
00115       }
00116 
00117       memset(buffer, 0, len);
00118       regerror (retval, &m_reg, buffer, len);
00119       ret.set(buffer);  
00120       free(buffer);
00121       
00122       return ret;
00123 }
00124 
00125 
00126 bool NRegex::match() { 
00127       int ret = 0;
00128       int i = 0;
00129       
00130       ret = regexec(&m_reg, m_str.toChar(), 0, NULL, 0);
00131 
00132       if (ret == 0) {
00133             return true;
00134       }
00135       else { 
00136             if (ret = REG_NOMATCH) {
00137                   return false;
00138             }
00139       }
00140 
00141       throw NException(getError(ret), NException::BASE);
00142 }
00143 
00144 // TODO: broken. fix it.
00145 /*
00146 NList<NRegex::MatchOffset> NRegex::pos() {
00147       int ret = 0;
00148       int i = 0;
00149       int start_pos = 0;
00150       regmatch_t match;
00151       MatchOffset off;
00152       const char *tmp_str = m_str.toChar();
00153 
00154       memset(&off, 0, sizeof(off));
00155       memset(&match, 0, sizeof(match));
00156       do {
00157             ret = regexec(&m_reg, tmp_str + off.start, 1, &match, REG_NOTBOL);
00158             
00159             if (ret != 0) { 
00160                   if (ret = REG_NOMATCH) {
00161                         break;
00162                   }
00163             
00164                   throw NException(getError(ret), NException::BASE);
00165             }
00166 
00167             printf("aaaaa %i | %i\n", match.rm_so, match.rm_eo);
00168 
00169       
00170             off.start += off.start + match.rm_so;
00171             off.end += off.start + match.rm_eo;
00172 
00173             m_offsets.append(off);
00174             i++;
00175       } while (ret == 0);
00176       
00177       return m_offsets;
00178 }
00179 */

Generated on Wed Mar 5 23:10:35 2008 for NemesisUtilitiesSystem by  doxygen 1.5.4