#include <limits.h>
#include "api.h"
Go to the source code of this file.
Data Structures | |
struct | among |
Defines | |
#define | MAXINT INT_MAX |
#define | MININT INT_MIN |
#define | HEAD 2*sizeof(int) |
#define | SIZE(p) ((int *)(p))[-1] |
#define | SET_SIZE(p, n) ((int *)(p))[-1] = n |
#define | CAPACITY(p) ((int *)(p))[-2] |
Functions | |
symbol * | create_s (void) |
void | lose_s (symbol *p) |
int | skip_utf8 (const symbol *p, int c, int lb, int l, int n) |
int | in_grouping_U (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
int | in_grouping_b_U (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
int | out_grouping_U (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
int | out_grouping_b_U (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
int | in_grouping (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
int | in_grouping_b (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
int | out_grouping (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
int | out_grouping_b (struct SN_env *z, const unsigned char *s, int min, int max, int repeat) |
int | eq_s (struct SN_env *z, int s_size, const symbol *s) |
int | eq_s_b (struct SN_env *z, int s_size, const symbol *s) |
int | eq_v (struct SN_env *z, const symbol *p) |
int | eq_v_b (struct SN_env *z, const symbol *p) |
int | find_among (struct SN_env *z, const struct among *v, int v_size) |
int | find_among_b (struct SN_env *z, const struct among *v, int v_size) |
int | replace_s (struct SN_env *z, int c_bra, int c_ket, int s_size, const symbol *s, int *adjustment) |
int | slice_from_s (struct SN_env *z, int s_size, const symbol *s) |
int | slice_from_v (struct SN_env *z, const symbol *p) |
int | slice_del (struct SN_env *z) |
int | insert_s (struct SN_env *z, int bra, int ket, int s_size, const symbol *s) |
int | insert_v (struct SN_env *z, int bra, int ket, const symbol *p) |
symbol * | slice_to (struct SN_env *z, symbol *p) |
symbol * | assign_to (struct SN_env *z, symbol *p) |
void | debug (struct SN_env *z, int number, int line_count) |
#define CAPACITY | ( | p | ) | ((int *)(p))[-2] |
Definition at line 13 of file header.h.
Referenced by assign_to(), create_s(), increase_size(), replace_s(), and slice_to().
#define HEAD 2*sizeof(int) |
Definition at line 9 of file header.h.
Referenced by create_s(), increase_size(), and lose_s().
#define SET_SIZE | ( | p, | ||
n | ||||
) | ((int *)(p))[-1] = n |
Definition at line 12 of file header.h.
Referenced by assign_to(), create_s(), replace_s(), and slice_to().
#define SIZE | ( | p | ) | ((int *)(p))[-1] |
Definition at line 11 of file header.h.
Referenced by eq_v(), eq_v_b(), insert_v(), replace_s(), slice_check(), and slice_from_v().
Definition at line 441 of file utilities.c.
References CAPACITY, increase_size(), SN_env::l, memmove, NULL, SN_env::p, and SET_SIZE.
symbol* create_s | ( | void | ) |
Definition at line 7 of file utilities.c.
References CAPACITY, CREATE_SIZE, HEAD, malloc, NULL, SN_env::p, and SET_SIZE.
Referenced by replace_s(), and SN_create_env().
void debug | ( | struct SN_env * | z, | |
int | number, | |||
int | line_count | |||
) |
Definition at line 189 of file utilities.c.
References SN_env::c, SN_env::l, memcmp(), and SN_env::p.
Referenced by eq_v(), porter_ISO_8859_1_stem(), porter_UTF_8_stem(), r_is_reserved_word(), r_postlude(), and r_prelude().
Definition at line 194 of file utilities.c.
References SN_env::c, SN_env::lb, memcmp(), and SN_env::p.
Referenced by eq_v_b(), french_ISO_8859_1_stem(), french_UTF_8_stem(), portuguese_ISO_8859_1_stem(), portuguese_UTF_8_stem(), r_adjectival(), r_append_U_to_stems_ending_with_d_or_g(), r_attached_pronoun(), r_case_ending(), r_check_vowel_harmony(), r_e_ending(), r_en_ending(), r_main_suffix(), r_mark_ki(), r_mark_suffix_with_optional_n_consonant(), r_mark_suffix_with_optional_s_consonant(), r_mark_suffix_with_optional_y_consonant(), r_mark_yken(), r_other_endings(), r_other_suffix(), r_perfective_gerund(), r_possessive(), r_residual_form(), r_residual_suffix(), r_standard_suffix(), r_step_0(), r_Step_1c(), r_Step_2(), r_Step_4(), r_Step_5(), r_Step_5a(), r_Step_5b(), r_t_plural(), r_tidy(), r_tidy_up(), r_un_accent(), r_verb(), r_verb_suffix(), r_VI(), r_vowel_suffix(), r_y_verb_suffix(), russian_KOI8_R_stem(), and russian_UTF_8_stem().
Definition at line 203 of file utilities.c.
References eq_s_b(), and SIZE.
Referenced by r_tidy(), and r_undouble().
Definition at line 207 of file utilities.c.
References SN_env::c, among::function, i, SN_env::l, SN_env::p, among::result, among::s, among::s_size, and among::substring_i.
Referenced by r_exception1(), r_mark_regions(), r_postlude(), and r_prelude().
{ int i = 0; int j = v_size; int c = z->c; int l = z->l; symbol * q = z->p + c; const struct among * w; int common_i = 0; int common_j = 0; int first_key_inspected = 0; while(1) { int k = i + ((j - i) >> 1); int diff = 0; int common = common_i < common_j ? common_i : common_j; /* smaller */ w = v + k; { int i2; for (i2 = common; i2 < w->s_size; i2++) { if (c + common == l) { diff = -1; break; } diff = q[common] - w->s[i2]; if (diff != 0) break; common++; } } if (diff < 0) { j = k; common_j = common; } else { i = k; common_i = common; } if (j - i <= 1) { if (i > 0) break; /* v->s has been inspected */ if (j == i) break; /* only one item in v */ /* - but now we need to go round once more to get v->s inspected. This looks messy, but is actually the optimal approach. */ if (first_key_inspected) break; first_key_inspected = 1; } } while(1) { w = v + i; if (common_i >= w->s_size) { z->c = c + w->s_size; if (w->function == 0) return w->result; { int res = w->function(z); z->c = c + w->s_size; if (res) return w->result; } } i = w->substring_i; if (i < 0) return 0; } }
Definition at line 267 of file utilities.c.
References SN_env::c, among::function, i, SN_env::lb, SN_env::p, among::result, among::s, among::s_size, and among::substring_i.
Referenced by r_adjectival(), r_adjective(), r_attached_pronoun(), r_case(), r_case_ending(), r_case_other(), r_case_special(), r_combo_suffix(), r_consonant_pair(), r_derivational(), r_double(), r_exception2(), r_factive(), r_i_plural(), r_i_verb_suffix(), r_instrum(), r_LONG(), r_main_suffix(), r_mark_cAsInA(), r_mark_DA(), r_mark_DAn(), r_mark_DUr(), r_mark_lAr(), r_mark_lArI(), r_mark_nA(), r_mark_ncA(), r_mark_ndA(), r_mark_ndAn(), r_mark_nU(), r_mark_nUn(), r_mark_nUz(), r_mark_possessives(), r_mark_sUn(), r_mark_sUnUz(), r_mark_yA(), r_mark_yDU(), r_mark_ylA(), r_mark_ymUs_(), r_mark_ysA(), r_mark_yUm(), r_mark_yUz(), r_noun(), r_other_endings(), r_other_suffix(), r_owned(), r_particle_etc(), r_perfective_gerund(), r_plur_owner(), r_plural(), r_possessive(), r_post_process_last_consonants(), r_reflexive(), r_residual_form(), r_residual_suffix(), r_sing_owner(), r_standard_suffix(), r_step_0(), r_Step_1a(), r_Step_1b(), r_Step_2(), r_Step_3(), r_Step_4(), r_Step_5(), r_t_plural(), r_tidy_up(), r_un_double(), r_undouble(), r_v_ending(), r_verb(), r_verb_suffix(), r_vowel_suffix(), and r_y_verb_suffix().
{ int i = 0; int j = v_size; int c = z->c; int lb = z->lb; symbol * q = z->p + c - 1; const struct among * w; int common_i = 0; int common_j = 0; int first_key_inspected = 0; while(1) { int k = i + ((j - i) >> 1); int diff = 0; int common = common_i < common_j ? common_i : common_j; w = v + k; { int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) { if (c - common == lb) { diff = -1; break; } diff = q[- common] - w->s[i2]; if (diff != 0) break; common++; } } if (diff < 0) { j = k; common_j = common; } else { i = k; common_i = common; } if (j - i <= 1) { if (i > 0) break; if (j == i) break; if (first_key_inspected) break; first_key_inspected = 1; } } while(1) { w = v + i; if (common_i >= w->s_size) { z->c = c - w->s_size; if (w->function == 0) return w->result; { int res = w->function(z); z->c = c - w->s_size; if (res) return w->result; } } i = w->substring_i; if (i < 0) return 0; } }
int in_grouping | ( | struct SN_env * | z, | |
const unsigned char * | s, | |||
int | min, | |||
int | max, | |||
int | repeat | |||
) |
Definition at line 141 of file utilities.c.
References SN_env::c, SN_env::l, and SN_env::p.
Referenced by porter_ISO_8859_1_stem(), r_mark_regions(), and r_prelude().
int in_grouping_b | ( | struct SN_env * | z, | |
const unsigned char * | s, | |||
int | min, | |||
int | max, | |||
int | repeat | |||
) |
Definition at line 153 of file utilities.c.
References SN_env::c, SN_env::lb, and SN_env::p.
Referenced by r_case_ending(), r_main_suffix(), r_particle_etc(), r_shortv(), r_standard_suffix(), r_Step_2(), r_t_plural(), r_tidy(), r_VI(), and r_vowel_suffix().
int in_grouping_b_U | ( | struct SN_env * | z, | |
const unsigned char * | s, | |||
int | min, | |||
int | max, | |||
int | repeat | |||
) |
Definition at line 103 of file utilities.c.
References SN_env::c, get_b_utf8(), SN_env::lb, SN_env::p, and unless.
Referenced by r_case_ending(), r_main_suffix(), r_mark_sU(), r_mark_suffix_with_optional_n_consonant(), r_mark_suffix_with_optional_s_consonant(), r_mark_suffix_with_optional_U_vowel(), r_mark_suffix_with_optional_y_consonant(), r_mark_yU(), r_particle_etc(), r_shortv(), r_standard_suffix(), r_Step_2(), r_t_plural(), r_tidy(), r_VI(), and r_vowel_suffix().
int in_grouping_U | ( | struct SN_env * | z, | |
const unsigned char * | s, | |||
int | min, | |||
int | max, | |||
int | repeat | |||
) |
Definition at line 91 of file utilities.c.
References SN_env::c, get_utf8(), SN_env::l, SN_env::p, and unless.
Referenced by porter_UTF_8_stem(), r_mark_regions(), and r_prelude().
Definition at line 405 of file utilities.c.
References SN_env::bra, SN_env::ket, and replace_s().
Referenced by r_append_U_to_stems_ending_with_d_or_g(), and r_Step_1b().
Definition at line 414 of file utilities.c.
References SN_env::bra, SN_env::ket, replace_s(), and SIZE.
void lose_s | ( | symbol * | p | ) |
Definition at line 17 of file utilities.c.
References free, HEAD, and NULL.
Referenced by increase_size(), slice_to(), and SN_close_env().
int out_grouping | ( | struct SN_env * | z, | |
const unsigned char * | s, | |||
int | min, | |||
int | max, | |||
int | repeat | |||
) |
Definition at line 165 of file utilities.c.
References SN_env::c, SN_env::l, SN_env::p, and unless.
Referenced by porter_ISO_8859_1_stem(), and r_mark_regions().
int out_grouping_b | ( | struct SN_env * | z, | |
const unsigned char * | s, | |||
int | min, | |||
int | max, | |||
int | repeat | |||
) |
Definition at line 177 of file utilities.c.
References SN_env::c, SN_env::lb, SN_env::p, and unless.
Referenced by r_case_ending(), r_e_ending(), r_en_ending(), r_i_verb_suffix(), r_main_suffix(), r_residual_suffix(), r_shortv(), r_standard_suffix(), r_Step_1a(), r_Step_1b(), r_Step_1c(), r_tidy(), r_un_accent(), r_undouble(), and r_verb_suffix().
int out_grouping_b_U | ( | struct SN_env * | z, | |
const unsigned char * | s, | |||
int | min, | |||
int | max, | |||
int | repeat | |||
) |
Definition at line 127 of file utilities.c.
References SN_env::c, get_b_utf8(), SN_env::lb, SN_env::p, and unless.
Referenced by r_append_U_to_stems_ending_with_d_or_g(), r_case_ending(), r_check_vowel_harmony(), r_e_ending(), r_en_ending(), r_i_verb_suffix(), r_main_suffix(), r_mark_suffix_with_optional_U_vowel(), r_residual_suffix(), r_shortv(), r_standard_suffix(), r_Step_1a(), r_Step_1b(), r_Step_1c(), r_tidy(), r_un_accent(), r_undouble(), and r_verb_suffix().
int out_grouping_U | ( | struct SN_env * | z, | |
const unsigned char * | s, | |||
int | min, | |||
int | max, | |||
int | repeat | |||
) |
Definition at line 115 of file utilities.c.
References SN_env::c, get_utf8(), SN_env::l, SN_env::p, and unless.
Referenced by porter_UTF_8_stem(), r_mark_regions(), and r_more_than_one_syllable_word().
int replace_s | ( | struct SN_env * | z, | |
int | c_bra, | |||
int | c_ket, | |||
int | s_size, | |||
const symbol * | s, | |||
int * | adjustment | |||
) |
Definition at line 343 of file utilities.c.
References SN_env::c, CAPACITY, create_s(), increase_size(), SN_env::l, memmove, NULL, SN_env::p, SET_SIZE, SIZE, and unless.
Referenced by insert_s(), insert_v(), slice_from_s(), and SN_set_current().
{ int adjustment; int len; if (z->p == NULL) { z->p = create_s(); if (z->p == NULL) return -1; } adjustment = s_size - (c_ket - c_bra); len = SIZE(z->p); if (adjustment != 0) { if (adjustment + len > CAPACITY(z->p)) { z->p = increase_size(z->p, adjustment + len); if (z->p == NULL) return -1; } memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol)); SET_SIZE(z->p, adjustment + len); z->l += adjustment; if (z->c >= c_ket) z->c += adjustment; else if (z->c > c_bra) z->c = c_bra; } unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); if (adjptr != NULL) *adjptr = adjustment; return 0; }
int skip_utf8 | ( | const symbol * | p, | |
int | c, | |||
int | lb, | |||
int | l, | |||
int | n | |||
) |
Definition at line 30 of file utilities.c.
Referenced by english_UTF_8_stem(), porter_UTF_8_stem(), r_case_ending(), r_consonant_pair(), r_is_reserved_word(), r_mark_regions(), r_mark_suffix_with_optional_n_consonant(), r_mark_suffix_with_optional_s_consonant(), r_mark_suffix_with_optional_U_vowel(), r_mark_suffix_with_optional_y_consonant(), r_postlude(), r_prelude(), r_standard_suffix(), r_Step_1a(), r_Step_1b(), r_tidy(), r_un_double(), and r_undouble().
{ int b; if (n >= 0) { for (; n > 0; n--) { if (c >= l) return -1; b = p[c++]; if (b >= 0xC0) { /* 1100 0000 */ while (c < l) { b = p[c]; if (b >= 0xC0 || b < 0x80) break; /* break unless b is 10------ */ c++; } } } } else { for (; n < 0; n++) { if (c <= lb) return -1; b = p[--c]; if (b >= 0x80) { /* 1000 0000 */ while (c > lb) { b = p[c]; if (b >= 0xC0) break; /* 1100 0000 */ c--; } } } } return c; }
int slice_del | ( | struct SN_env * | z | ) |
Definition at line 401 of file utilities.c.
References slice_from_s().
Referenced by portuguese_ISO_8859_1_stem(), portuguese_UTF_8_stem(), r_adjectival(), r_adjective(), r_attached_pronoun(), r_case(), r_case_ending(), r_case_other(), r_consonant_pair(), r_derivational(), r_e_ending(), r_en_ending(), r_factive(), r_i_plural(), r_i_verb_suffix(), r_instrum(), r_main_suffix(), r_noun(), r_other_endings(), r_other_suffix(), r_owned(), r_particle_etc(), r_perfective_gerund(), r_plur_owner(), r_plural(), r_possessive(), r_prelude(), r_reflexive(), r_residual_form(), r_residual_suffix(), r_sing_owner(), r_standard_suffix(), r_stem_nominal_verb_suffixes(), r_stem_noun_suffixes(), r_stem_suffix_chain_before_ki(), r_step_0(), r_Step_1a(), r_Step_1b(), r_Step_2(), r_Step_3(), r_Step_4(), r_Step_5(), r_Step_5a(), r_Step_5b(), r_t_plural(), r_tidy(), r_tidy_up(), r_un_double(), r_undouble(), r_verb(), r_verb_suffix(), r_vowel_suffix(), r_y_verb_suffix(), russian_KOI8_R_stem(), and russian_UTF_8_stem().
{ return slice_from_s(z, 0, 0); }
Definition at line 392 of file utilities.c.
References SN_env::bra, SN_env::ket, NULL, replace_s(), and slice_check().
Referenced by french_ISO_8859_1_stem(), french_UTF_8_stem(), porter_ISO_8859_1_stem(), porter_UTF_8_stem(), r_attached_pronoun(), r_case_other(), r_case_special(), r_combo_suffix(), r_exception1(), r_main_suffix(), r_other_suffix(), r_owned(), r_plur_owner(), r_plural(), r_possessive(), r_post_process_last_consonants(), r_postlude(), r_prelude(), r_residual_form(), r_residual_suffix(), r_sing_owner(), r_standard_suffix(), r_step_0(), r_Step_1a(), r_Step_1b(), r_Step_1c(), r_Step_2(), r_Step_3(), r_un_accent(), r_v_ending(), slice_del(), and slice_from_v().
Definition at line 397 of file utilities.c.
References SIZE, and slice_from_s().
{ return slice_from_s(z, SIZE(p), p); }
Definition at line 423 of file utilities.c.
References SN_env::bra, CAPACITY, increase_size(), SN_env::ket, lose_s(), memmove, NULL, SN_env::p, SET_SIZE, and slice_check().
Referenced by r_tidy(), and r_undouble().