#include "header.h"
Go to the source code of this file.
Defines | |
| #define | unless(C) if(!(C)) |
| #define | CREATE_SIZE 1 |
Functions | |
| symbol * | create_s (void) |
| void | lose_s (symbol *p) |
| int | skip_utf8 (const symbol *p, int c, int lb, int l, int n) |
| static int | get_utf8 (const symbol *p, int c, int l, int *slot) |
| static int | get_b_utf8 (const symbol *p, int c, int lb, int *slot) |
| int | in_grouping_U (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
| int | in_grouping_b_U (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
| int | out_grouping_U (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
| int | out_grouping_b_U (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
| int | in_grouping (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
| int | in_grouping_b (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
| int | out_grouping (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
| int | out_grouping_b (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
| int | eq_s (struct SN_env *z, int s_size, const symbol *s) |
| int | eq_s_b (struct SN_env *z, int s_size, const symbol *s) |
| int | eq_v (struct SN_env *z, const symbol *p) |
| int | eq_v_b (struct SN_env *z, const symbol *p) |
| int | find_among (struct SN_env *z, const struct among *v, int v_size) |
| int | find_among_b (struct SN_env *z, const struct among *v, int v_size) |
| static symbol * | increase_size (symbol *p, int n) |
| int | replace_s (struct SN_env *z, int c_bra, int c_ket, int s_size, const symbol *s, int *adjptr) |
| static int | slice_check (struct SN_env *z) |
| int | slice_from_s (struct SN_env *z, int s_size, const symbol *s) |
| int | slice_from_v (struct SN_env *z, const symbol *p) |
| int | slice_del (struct SN_env *z) |
| int | insert_s (struct SN_env *z, int bra, int ket, int s_size, const symbol *s) |
| int | insert_v (struct SN_env *z, int bra, int ket, const symbol *p) |
| symbol * | slice_to (struct SN_env *z, symbol *p) |
| symbol * | assign_to (struct SN_env *z, symbol *p) |
| #define CREATE_SIZE 1 |
Definition at line 5 of file utilities.c.
Referenced by create_s().
| #define unless | ( | C | ) | if(!(C)) |
Definition at line 3 of file utilities.c.
Referenced by in_grouping_b_U(), in_grouping_U(), out_grouping(), out_grouping_b(), out_grouping_b_U(), out_grouping_U(), and replace_s().
Definition at line 441 of file utilities.c.
References CAPACITY, increase_size(), SN_env::l, memmove, NULL, SN_env::p, and SET_SIZE.
| symbol* create_s | ( | void | ) |
Definition at line 7 of file utilities.c.
References CAPACITY, CREATE_SIZE, HEAD, malloc, NULL, SN_env::p, and SET_SIZE.
Referenced by replace_s(), and SN_create_env().
Definition at line 189 of file utilities.c.
References SN_env::c, SN_env::l, memcmp(), and SN_env::p.
Referenced by eq_v(), porter_ISO_8859_1_stem(), porter_UTF_8_stem(), r_is_reserved_word(), r_postlude(), and r_prelude().
Definition at line 194 of file utilities.c.
References SN_env::c, SN_env::lb, memcmp(), and SN_env::p.
Referenced by eq_v_b(), french_ISO_8859_1_stem(), french_UTF_8_stem(), portuguese_ISO_8859_1_stem(), portuguese_UTF_8_stem(), r_adjectival(), r_append_U_to_stems_ending_with_d_or_g(), r_attached_pronoun(), r_case_ending(), r_check_vowel_harmony(), r_e_ending(), r_en_ending(), r_main_suffix(), r_mark_ki(), r_mark_suffix_with_optional_n_consonant(), r_mark_suffix_with_optional_s_consonant(), r_mark_suffix_with_optional_y_consonant(), r_mark_yken(), r_other_endings(), r_other_suffix(), r_perfective_gerund(), r_possessive(), r_residual_form(), r_residual_suffix(), r_standard_suffix(), r_step_0(), r_Step_1c(), r_Step_2(), r_Step_4(), r_Step_5(), r_Step_5a(), r_Step_5b(), r_t_plural(), r_tidy(), r_tidy_up(), r_un_accent(), r_verb(), r_verb_suffix(), r_VI(), r_vowel_suffix(), r_y_verb_suffix(), russian_KOI8_R_stem(), and russian_UTF_8_stem().
Definition at line 203 of file utilities.c.
References eq_s_b(), and SIZE.
Referenced by r_tidy(), and r_undouble().
Definition at line 207 of file utilities.c.
References SN_env::c, among::function, i, SN_env::l, SN_env::p, among::result, among::s, among::s_size, and among::substring_i.
Referenced by r_exception1(), r_mark_regions(), r_postlude(), and r_prelude().
{
int i = 0;
int j = v_size;
int c = z->c; int l = z->l;
symbol * q = z->p + c;
const struct among * w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while(1) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; /* smaller */
w = v + k;
{
int i2; for (i2 = common; i2 < w->s_size; i2++) {
if (c + common == l) { diff = -1; break; }
diff = q[common] - w->s[i2];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1) {
if (i > 0) break; /* v->s has been inspected */
if (j == i) break; /* only one item in v */
/* - but now we need to go round once more to get
v->s inspected. This looks messy, but is actually
the optimal approach. */
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1) {
w = v + i;
if (common_i >= w->s_size) {
z->c = c + w->s_size;
if (w->function == 0) return w->result;
{
int res = w->function(z);
z->c = c + w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}
Definition at line 267 of file utilities.c.
References SN_env::c, among::function, i, SN_env::lb, SN_env::p, among::result, among::s, among::s_size, and among::substring_i.
Referenced by r_adjectival(), r_adjective(), r_attached_pronoun(), r_case(), r_case_ending(), r_case_other(), r_case_special(), r_combo_suffix(), r_consonant_pair(), r_derivational(), r_double(), r_exception2(), r_factive(), r_i_plural(), r_i_verb_suffix(), r_instrum(), r_LONG(), r_main_suffix(), r_mark_cAsInA(), r_mark_DA(), r_mark_DAn(), r_mark_DUr(), r_mark_lAr(), r_mark_lArI(), r_mark_nA(), r_mark_ncA(), r_mark_ndA(), r_mark_ndAn(), r_mark_nU(), r_mark_nUn(), r_mark_nUz(), r_mark_possessives(), r_mark_sUn(), r_mark_sUnUz(), r_mark_yA(), r_mark_yDU(), r_mark_ylA(), r_mark_ymUs_(), r_mark_ysA(), r_mark_yUm(), r_mark_yUz(), r_noun(), r_other_endings(), r_other_suffix(), r_owned(), r_particle_etc(), r_perfective_gerund(), r_plur_owner(), r_plural(), r_possessive(), r_post_process_last_consonants(), r_reflexive(), r_residual_form(), r_residual_suffix(), r_sing_owner(), r_standard_suffix(), r_step_0(), r_Step_1a(), r_Step_1b(), r_Step_2(), r_Step_3(), r_Step_4(), r_Step_5(), r_t_plural(), r_tidy_up(), r_un_double(), r_undouble(), r_v_ending(), r_verb(), r_verb_suffix(), r_vowel_suffix(), and r_y_verb_suffix().
{
int i = 0;
int j = v_size;
int c = z->c; int lb = z->lb;
symbol * q = z->p + c - 1;
const struct among * w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while(1) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
w = v + k;
{
int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) {
if (c - common == lb) { diff = -1; break; }
diff = q[- common] - w->s[i2];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1) {
if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1) {
w = v + i;
if (common_i >= w->s_size) {
z->c = c - w->s_size;
if (w->function == 0) return w->result;
{
int res = w->function(z);
z->c = c - w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}
| static int get_b_utf8 | ( | const symbol * | p, | |
| int | c, | |||
| int | lb, | |||
| int * | slot | |||
| ) | [static] |
Definition at line 77 of file utilities.c.
Referenced by in_grouping_b_U(), and out_grouping_b_U().
{
int b0, b1;
if (c <= lb) return 0;
b0 = p[--c];
if (b0 < 0x80 || c == lb) { /* 1000 0000 */
* slot = b0; return 1;
}
b1 = p[--c];
if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
* slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
}
* slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
}
| static int get_utf8 | ( | const symbol * | p, | |
| int | c, | |||
| int | l, | |||
| int * | slot | |||
| ) | [static] |
Definition at line 63 of file utilities.c.
Referenced by in_grouping_U(), and out_grouping_U().
| int in_grouping | ( | struct SN_env * | z, | |
| const unsigned char * | s, | |||
| int | min, | |||
| int | max, | |||
| int | repeat | |||
| ) |
Definition at line 141 of file utilities.c.
References SN_env::c, SN_env::l, and SN_env::p.
Referenced by porter_ISO_8859_1_stem(), r_mark_regions(), and r_prelude().
| int in_grouping_b | ( | struct SN_env * | z, | |
| const unsigned char * | s, | |||
| int | min, | |||
| int | max, | |||
| int | repeat | |||
| ) |
Definition at line 153 of file utilities.c.
References SN_env::c, SN_env::lb, and SN_env::p.
Referenced by r_case_ending(), r_main_suffix(), r_particle_etc(), r_shortv(), r_standard_suffix(), r_Step_2(), r_t_plural(), r_tidy(), r_VI(), and r_vowel_suffix().
| int in_grouping_b_U | ( | struct SN_env * | z, | |
| const unsigned char * | s, | |||
| int | min, | |||
| int | max, | |||
| int | repeat | |||
| ) |
Definition at line 103 of file utilities.c.
References SN_env::c, get_b_utf8(), SN_env::lb, SN_env::p, and unless.
Referenced by r_case_ending(), r_main_suffix(), r_mark_sU(), r_mark_suffix_with_optional_n_consonant(), r_mark_suffix_with_optional_s_consonant(), r_mark_suffix_with_optional_U_vowel(), r_mark_suffix_with_optional_y_consonant(), r_mark_yU(), r_particle_etc(), r_shortv(), r_standard_suffix(), r_Step_2(), r_t_plural(), r_tidy(), r_VI(), and r_vowel_suffix().
| int in_grouping_U | ( | struct SN_env * | z, | |
| const unsigned char * | s, | |||
| int | min, | |||
| int | max, | |||
| int | repeat | |||
| ) |
Definition at line 91 of file utilities.c.
References SN_env::c, get_utf8(), SN_env::l, SN_env::p, and unless.
Referenced by porter_UTF_8_stem(), r_mark_regions(), and r_prelude().
Definition at line 324 of file utilities.c.
References CAPACITY, HEAD, lose_s(), NULL, and realloc.
Referenced by assign_to(), replace_s(), and slice_to().
Definition at line 405 of file utilities.c.
References SN_env::bra, SN_env::ket, and replace_s().
Referenced by r_append_U_to_stems_ending_with_d_or_g(), and r_Step_1b().
Definition at line 414 of file utilities.c.
References SN_env::bra, SN_env::ket, replace_s(), and SIZE.
| void lose_s | ( | symbol * | p | ) |
Definition at line 17 of file utilities.c.
References free, HEAD, and NULL.
Referenced by increase_size(), slice_to(), and SN_close_env().
| int out_grouping | ( | struct SN_env * | z, | |
| const unsigned char * | s, | |||
| int | min, | |||
| int | max, | |||
| int | repeat | |||
| ) |
Definition at line 165 of file utilities.c.
References SN_env::c, SN_env::l, SN_env::p, and unless.
Referenced by porter_ISO_8859_1_stem(), and r_mark_regions().
| int out_grouping_b | ( | struct SN_env * | z, | |
| const unsigned char * | s, | |||
| int | min, | |||
| int | max, | |||
| int | repeat | |||
| ) |
Definition at line 177 of file utilities.c.
References SN_env::c, SN_env::lb, SN_env::p, and unless.
Referenced by r_case_ending(), r_e_ending(), r_en_ending(), r_i_verb_suffix(), r_main_suffix(), r_residual_suffix(), r_shortv(), r_standard_suffix(), r_Step_1a(), r_Step_1b(), r_Step_1c(), r_tidy(), r_un_accent(), r_undouble(), and r_verb_suffix().
| int out_grouping_b_U | ( | struct SN_env * | z, | |
| const unsigned char * | s, | |||
| int | min, | |||
| int | max, | |||
| int | repeat | |||
| ) |
Definition at line 127 of file utilities.c.
References SN_env::c, get_b_utf8(), SN_env::lb, SN_env::p, and unless.
Referenced by r_append_U_to_stems_ending_with_d_or_g(), r_case_ending(), r_check_vowel_harmony(), r_e_ending(), r_en_ending(), r_i_verb_suffix(), r_main_suffix(), r_mark_suffix_with_optional_U_vowel(), r_residual_suffix(), r_shortv(), r_standard_suffix(), r_Step_1a(), r_Step_1b(), r_Step_1c(), r_tidy(), r_un_accent(), r_undouble(), and r_verb_suffix().
| int out_grouping_U | ( | struct SN_env * | z, | |
| const unsigned char * | s, | |||
| int | min, | |||
| int | max, | |||
| int | repeat | |||
| ) |
Definition at line 115 of file utilities.c.
References SN_env::c, get_utf8(), SN_env::l, SN_env::p, and unless.
Referenced by porter_UTF_8_stem(), r_mark_regions(), and r_more_than_one_syllable_word().
| int replace_s | ( | struct SN_env * | z, | |
| int | c_bra, | |||
| int | c_ket, | |||
| int | s_size, | |||
| const symbol * | s, | |||
| int * | adjptr | |||
| ) |
Definition at line 343 of file utilities.c.
References SN_env::c, CAPACITY, create_s(), increase_size(), SN_env::l, memmove, NULL, SN_env::p, SET_SIZE, SIZE, and unless.
Referenced by insert_s(), insert_v(), slice_from_s(), and SN_set_current().
{
int adjustment;
int len;
if (z->p == NULL) {
z->p = create_s();
if (z->p == NULL) return -1;
}
adjustment = s_size - (c_ket - c_bra);
len = SIZE(z->p);
if (adjustment != 0) {
if (adjustment + len > CAPACITY(z->p)) {
z->p = increase_size(z->p, adjustment + len);
if (z->p == NULL) return -1;
}
memmove(z->p + c_ket + adjustment,
z->p + c_ket,
(len - c_ket) * sizeof(symbol));
SET_SIZE(z->p, adjustment + len);
z->l += adjustment;
if (z->c >= c_ket)
z->c += adjustment;
else
if (z->c > c_bra)
z->c = c_bra;
}
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
if (adjptr != NULL)
*adjptr = adjustment;
return 0;
}
| int skip_utf8 | ( | const symbol * | p, | |
| int | c, | |||
| int | lb, | |||
| int | l, | |||
| int | n | |||
| ) |
Definition at line 30 of file utilities.c.
Referenced by english_UTF_8_stem(), porter_UTF_8_stem(), r_case_ending(), r_consonant_pair(), r_is_reserved_word(), r_mark_regions(), r_mark_suffix_with_optional_n_consonant(), r_mark_suffix_with_optional_s_consonant(), r_mark_suffix_with_optional_U_vowel(), r_mark_suffix_with_optional_y_consonant(), r_postlude(), r_prelude(), r_standard_suffix(), r_Step_1a(), r_Step_1b(), r_tidy(), r_un_double(), and r_undouble().
{
int b;
if (n >= 0) {
for (; n > 0; n--) {
if (c >= l) return -1;
b = p[c++];
if (b >= 0xC0) { /* 1100 0000 */
while (c < l) {
b = p[c];
if (b >= 0xC0 || b < 0x80) break;
/* break unless b is 10------ */
c++;
}
}
}
} else {
for (; n < 0; n++) {
if (c <= lb) return -1;
b = p[--c];
if (b >= 0x80) { /* 1000 0000 */
while (c > lb) {
b = p[c];
if (b >= 0xC0) break; /* 1100 0000 */
c--;
}
}
}
}
return c;
}
| static int slice_check | ( | struct SN_env * | z | ) | [static] |
Definition at line 375 of file utilities.c.
References SN_env::bra, debug, SN_env::ket, SN_env::l, NULL, SN_env::p, and SIZE.
Referenced by slice_from_s(), and slice_to().
| int slice_del | ( | struct SN_env * | z | ) |
Definition at line 401 of file utilities.c.
References slice_from_s().
Referenced by portuguese_ISO_8859_1_stem(), portuguese_UTF_8_stem(), r_adjectival(), r_adjective(), r_attached_pronoun(), r_case(), r_case_ending(), r_case_other(), r_consonant_pair(), r_derivational(), r_e_ending(), r_en_ending(), r_factive(), r_i_plural(), r_i_verb_suffix(), r_instrum(), r_main_suffix(), r_noun(), r_other_endings(), r_other_suffix(), r_owned(), r_particle_etc(), r_perfective_gerund(), r_plur_owner(), r_plural(), r_possessive(), r_prelude(), r_reflexive(), r_residual_form(), r_residual_suffix(), r_sing_owner(), r_standard_suffix(), r_stem_nominal_verb_suffixes(), r_stem_noun_suffixes(), r_stem_suffix_chain_before_ki(), r_step_0(), r_Step_1a(), r_Step_1b(), r_Step_2(), r_Step_3(), r_Step_4(), r_Step_5(), r_Step_5a(), r_Step_5b(), r_t_plural(), r_tidy(), r_tidy_up(), r_un_double(), r_undouble(), r_verb(), r_verb_suffix(), r_vowel_suffix(), r_y_verb_suffix(), russian_KOI8_R_stem(), and russian_UTF_8_stem().
{
return slice_from_s(z, 0, 0);
}
Definition at line 392 of file utilities.c.
References SN_env::bra, SN_env::ket, NULL, replace_s(), and slice_check().
Referenced by french_ISO_8859_1_stem(), french_UTF_8_stem(), porter_ISO_8859_1_stem(), porter_UTF_8_stem(), r_attached_pronoun(), r_case_other(), r_case_special(), r_combo_suffix(), r_exception1(), r_main_suffix(), r_other_suffix(), r_owned(), r_plur_owner(), r_plural(), r_possessive(), r_post_process_last_consonants(), r_postlude(), r_prelude(), r_residual_form(), r_residual_suffix(), r_sing_owner(), r_standard_suffix(), r_step_0(), r_Step_1a(), r_Step_1b(), r_Step_1c(), r_Step_2(), r_Step_3(), r_un_accent(), r_v_ending(), slice_del(), and slice_from_v().
Definition at line 397 of file utilities.c.
References SIZE, and slice_from_s().
{
return slice_from_s(z, SIZE(p), p);
}
Definition at line 423 of file utilities.c.
References SN_env::bra, CAPACITY, increase_size(), SN_env::ket, lose_s(), memmove, NULL, SN_env::p, SET_SIZE, and slice_check().
Referenced by r_tidy(), and r_undouble().
1.7.1