xs_regex.h (3928B)
1 /* copyright (c) 2022 - 2025 grunfink et al. / MIT license */ 2 3 #ifndef _XS_REGEX_H 4 5 #define _XS_REGEX_H 6 7 int xs_regex_match(const char *str, const char *rx); 8 xs_list *xs_regex_split_n(const char *str, const char *rx, int count); 9 #define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL) 10 xs_list *xs_regex_select_n(const char *str, const char *rx, int count); 11 #define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL) 12 xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count); 13 #define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL) 14 #define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count) 15 #define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL) 16 17 #ifdef XS_IMPLEMENTATION 18 19 #ifdef __TINYC__ 20 /* fix a compilation error in tcc */ 21 #define _REGEX_NELTS(n) 22 #endif 23 24 #include <regex.h> 25 26 xs_list *xs_regex_split_n(const char *str, const char *rx, int count) 27 /* splits str using regex as a separator, at most count times. 28 Always returns a list: 29 len == 0: regcomp error 30 len == 1: full string (no matches) 31 len == odd: first part [ separator / next part ]... 32 */ 33 { 34 regex_t re; 35 regmatch_t rm; 36 int offset = 0; 37 xs_list *list = xs_list_new(); 38 const char *p = str; 39 40 if (regcomp(&re, rx, REG_EXTENDED)) 41 return list; 42 43 while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) { 44 /* add first the leading part of the string */ 45 if (xs_is_string(p)) 46 list = xs_list_append_nstr(list, p, rm.rm_so); 47 else 48 list = xs_list_append(list, ""); 49 50 if (xs_is_string(p + rm.rm_so)) 51 list = xs_list_append_nstr(list, p + rm.rm_so, rm.rm_eo - rm.rm_so); 52 else 53 list = xs_list_append(list, ""); 54 55 /* move forward */ 56 offset += rm.rm_eo; 57 58 count--; 59 } 60 61 /* add the rest of the string */ 62 list = xs_list_append(list, p); 63 64 regfree(&re); 65 66 return list; 67 } 68 69 70 xs_list *xs_regex_select_n(const char *str, const char *rx, int count) 71 /* selects all matches and return them as a list */ 72 { 73 xs_list *list = xs_list_new(); 74 xs *split = NULL; 75 const xs_val *v; 76 int n = 0; 77 78 /* split */ 79 split = xs_regex_split_n(str, rx, count); 80 81 /* now iterate to get only the 'separators' (odd ones) */ 82 xs_list_foreach(split, v) { 83 if (n & 0x1) 84 list = xs_list_append(list, v); 85 86 n++; 87 } 88 89 return list; 90 } 91 92 93 xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count) 94 /* replaces all matches with the rep string. If it contains unescaped &, 95 they are replaced with the match */ 96 { 97 xs_str_bld b = { 0 }; 98 xs *split = xs_regex_split_n(str, rx, count); 99 const xs_val *v; 100 int n = 0; 101 int pholder = !!strchr(rep, '&'); 102 103 xs_list_foreach(split, v) { 104 if (n & 0x1) { 105 if (pholder) { 106 /* rep has a placeholder; process char by char */ 107 const char *p = rep; 108 109 while (*p) { 110 if (*p == '&') 111 xs_str_bld_cat(&b, v); 112 else { 113 if (*p == '\\') 114 p++; 115 116 if (!*p) 117 break; 118 119 xs_str_bld_cat(&b, (char[2]){ *p }); 120 } 121 122 p++; 123 } 124 } 125 else 126 xs_str_bld_cat(&b, rep); 127 } 128 else 129 xs_str_bld_cat(&b, v); 130 131 n++; 132 } 133 134 xs_free(str); 135 136 return b.data; 137 } 138 139 140 int xs_regex_match(const char *str, const char *rx) 141 /* returns if str matches the regex at least once */ 142 { 143 xs *l = xs_regex_select_n(str, rx, 1); 144 145 return xs_list_len(l) == 1; 146 } 147 148 149 #endif /* XS_IMPLEMENTATION */ 150 151 #endif /* XS_REGEX_H */