Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Typedefs | Functions

spgtextproc.c File Reference

#include "postgres.h"
#include "access/spgist.h"
#include "catalog/pg_type.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/pg_locale.h"
Include dependency graph for spgtextproc.c:

Go to the source code of this file.

Data Structures

struct  spgNodePtr

Defines

#define SPGIST_MAX_PREFIX_LENGTH   Max((int) (BLCKSZ - 256 * 16 - 100), 32)

Typedefs

typedef struct spgNodePtr spgNodePtr

Functions

Datum spg_text_config (PG_FUNCTION_ARGS)
static Datum formTextDatum (const char *data, int datalen)
static int commonPrefix (const char *a, const char *b, int lena, int lenb)
static bool searchChar (Datum *nodeLabels, int nNodes, uint8 c, int *i)
Datum spg_text_choose (PG_FUNCTION_ARGS)
static int cmpNodePtr (const void *a, const void *b)
Datum spg_text_picksplit (PG_FUNCTION_ARGS)
Datum spg_text_inner_consistent (PG_FUNCTION_ARGS)
Datum spg_text_leaf_consistent (PG_FUNCTION_ARGS)

Define Documentation

#define SPGIST_MAX_PREFIX_LENGTH   Max((int) (BLCKSZ - 256 * 16 - 100), 32)

Definition at line 41 of file spgtextproc.c.

Referenced by spg_text_picksplit().


Typedef Documentation

typedef struct spgNodePtr spgNodePtr

Function Documentation

static int cmpNodePtr ( const void *  a,
const void *  b 
) [static]

Definition at line 260 of file spgtextproc.c.

References spgNodePtr::c.

Referenced by spg_text_picksplit().

{
    const spgNodePtr *aa = (const spgNodePtr *) a;
    const spgNodePtr *bb = (const spgNodePtr *) b;

    if (aa->c == bb->c)
        return 0;
    else if (aa->c > bb->c)
        return 1;
    else
        return -1;
}

static int commonPrefix ( const char *  a,
const char *  b,
int  lena,
int  lenb 
) [static]

Definition at line 95 of file spgtextproc.c.

References i.

Referenced by spg_text_choose(), and spg_text_picksplit().

{
    int         i = 0;

    while (i < lena && i < lenb && *a == *b)
    {
        a++;
        b++;
        i++;
    }

    return i;
}

static Datum formTextDatum ( const char *  data,
int  datalen 
) [static]

Definition at line 70 of file spgtextproc.c.

References palloc(), PointerGetDatum, SET_VARSIZE, SET_VARSIZE_SHORT, VARATT_SHORT_MAX, VARHDRSZ, and VARHDRSZ_SHORT.

Referenced by spg_text_choose(), and spg_text_picksplit().

{
    char       *p;

    p = (char *) palloc(datalen + VARHDRSZ);

    if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
    {
        SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
        if (datalen)
            memcpy(p + VARHDRSZ_SHORT, data, datalen);
    }
    else
    {
        SET_VARSIZE(p, datalen + VARHDRSZ);
        memcpy(p + VARHDRSZ, data, datalen);
    }

    return PointerGetDatum(p);
}

static bool searchChar ( Datum nodeLabels,
int  nNodes,
uint8  c,
int *  i 
) [static]

Definition at line 115 of file spgtextproc.c.

References DatumGetUInt8.

Referenced by spg_text_choose().

{
    int         StopLow = 0,
                StopHigh = nNodes;

    while (StopLow < StopHigh)
    {
        int         StopMiddle = (StopLow + StopHigh) >> 1;
        uint8       middle = DatumGetUInt8(nodeLabels[StopMiddle]);

        if (c < middle)
            StopHigh = StopMiddle;
        else if (c > middle)
            StopLow = StopMiddle + 1;
        else
        {
            *i = StopMiddle;
            return true;
        }
    }

    *i = StopHigh;
    return false;
}

Datum spg_text_choose ( PG_FUNCTION_ARGS   ) 

Definition at line 141 of file spgtextproc.c.

References spgChooseOut::addNode, spgChooseIn::allTheSame, commonPrefix(), spgChooseIn::datum, DatumGetTextPP, formTextDatum(), spgChooseIn::hasPrefix, i, spgChooseIn::level, spgChooseOut::matchNode, spgChooseIn::nNodes, spgChooseIn::nodeLabels, NULL, PG_GETARG_POINTER, PG_RETURN_VOID, spgChooseIn::prefixDatum, spgChooseOut::result, spgChooseOut::resultType, searchChar(), spgChooseOut::splitTuple, UInt8GetDatum, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

{
    spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
    spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
    text       *inText = DatumGetTextPP(in->datum);
    char       *inStr = VARDATA_ANY(inText);
    int         inSize = VARSIZE_ANY_EXHDR(inText);
    uint8       nodeChar = '\0';
    int         i = 0;
    int         commonLen = 0;

    /* Check for prefix match, set nodeChar to first byte after prefix */
    if (in->hasPrefix)
    {
        text       *prefixText = DatumGetTextPP(in->prefixDatum);
        char       *prefixStr = VARDATA_ANY(prefixText);
        int         prefixSize = VARSIZE_ANY_EXHDR(prefixText);

        commonLen = commonPrefix(inStr + in->level,
                                 prefixStr,
                                 inSize - in->level,
                                 prefixSize);

        if (commonLen == prefixSize)
        {
            if (inSize - in->level > commonLen)
                nodeChar = *(uint8 *) (inStr + in->level + commonLen);
            else
                nodeChar = '\0';
        }
        else
        {
            /* Must split tuple because incoming value doesn't match prefix */
            out->resultType = spgSplitTuple;

            if (commonLen == 0)
            {
                out->result.splitTuple.prefixHasPrefix = false;
            }
            else
            {
                out->result.splitTuple.prefixHasPrefix = true;
                out->result.splitTuple.prefixPrefixDatum =
                    formTextDatum(prefixStr, commonLen);
            }
            out->result.splitTuple.nodeLabel =
                UInt8GetDatum(*(prefixStr + commonLen));

            if (prefixSize - commonLen == 1)
            {
                out->result.splitTuple.postfixHasPrefix = false;
            }
            else
            {
                out->result.splitTuple.postfixHasPrefix = true;
                out->result.splitTuple.postfixPrefixDatum =
                    formTextDatum(prefixStr + commonLen + 1,
                                  prefixSize - commonLen - 1);
            }

            PG_RETURN_VOID();
        }
    }
    else if (inSize > in->level)
    {
        nodeChar = *(uint8 *) (inStr + in->level);
    }
    else
    {
        nodeChar = '\0';
    }

    /* Look up nodeChar in the node label array */
    if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
    {
        /*
         * Descend to existing node.  (If in->allTheSame, the core code will
         * ignore our nodeN specification here, but that's OK.  We still have
         * to provide the correct levelAdd and restDatum values, and those are
         * the same regardless of which node gets chosen by core.)
         */
        out->resultType = spgMatchNode;
        out->result.matchNode.nodeN = i;
        out->result.matchNode.levelAdd = commonLen + 1;
        if (inSize - in->level - commonLen - 1 > 0)
            out->result.matchNode.restDatum =
                formTextDatum(inStr + in->level + commonLen + 1,
                              inSize - in->level - commonLen - 1);
        else
            out->result.matchNode.restDatum =
                formTextDatum(NULL, 0);
    }
    else if (in->allTheSame)
    {
        /*
         * Can't use AddNode action, so split the tuple.  The upper tuple has
         * the same prefix as before and uses an empty node label for the
         * lower tuple.  The lower tuple has no prefix and the same node
         * labels as the original tuple.
         */
        out->resultType = spgSplitTuple;
        out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
        out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
        out->result.splitTuple.nodeLabel = UInt8GetDatum('\0');
        out->result.splitTuple.postfixHasPrefix = false;
    }
    else
    {
        /* Add a node for the not-previously-seen nodeChar value */
        out->resultType = spgAddNode;
        out->result.addNode.nodeLabel = UInt8GetDatum(nodeChar);
        out->result.addNode.nodeN = i;
    }

    PG_RETURN_VOID();
}

Datum spg_text_config ( PG_FUNCTION_ARGS   ) 

Definition at line 53 of file spgtextproc.c.

References spgConfigOut::canReturnData, spgConfigOut::labelType, spgConfigOut::longValuesOK, PG_GETARG_POINTER, PG_RETURN_VOID, and spgConfigOut::prefixType.

{
    /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
    spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);

    cfg->prefixType = TEXTOID;
    cfg->labelType = CHAROID;
    cfg->canReturnData = true;
    cfg->longValuesOK = true;   /* suffixing will shorten long values */
    PG_RETURN_VOID();
}

Datum spg_text_inner_consistent ( PG_FUNCTION_ARGS   ) 

Definition at line 367 of file spgtextproc.c.

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, datumCopy(), DatumGetPointer, DatumGetTextPP, DatumGetUInt8, elog, ERROR, spgInnerConsistentIn::hasPrefix, i, lc_collate_is_c(), spgInnerConsistentIn::level, spgInnerConsistentOut::levelAdds, memcmp(), Min, spgInnerConsistentIn::nkeys, spgInnerConsistentOut::nNodes, spgInnerConsistentIn::nNodes, spgInnerConsistentIn::nodeLabels, spgInnerConsistentOut::nodeNumbers, NULL, palloc(), PG_GET_COLLATION, PG_GETARG_POINTER, PG_RETURN_VOID, PointerGetDatum, spgInnerConsistentIn::prefixDatum, spgInnerConsistentIn::reconstructedValue, spgInnerConsistentOut::reconstructedValues, spgInnerConsistentIn::scankeys, SET_VARSIZE, ScanKeyData::sk_argument, ScanKeyData::sk_strategy, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

{
    spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
    spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
    bool        collate_is_c = lc_collate_is_c(PG_GET_COLLATION());
    text       *reconstrText = NULL;
    int         maxReconstrLen = 0;
    text       *prefixText = NULL;
    int         prefixSize = 0;
    int         i;

    /*
     * Reconstruct values represented at this tuple, including parent data,
     * prefix of this tuple if any, and the node label if any.  in->level
     * should be the length of the previously reconstructed value, and the
     * number of bytes added here is prefixSize or prefixSize + 1.
     *
     * Note: we assume that in->reconstructedValue isn't toasted and doesn't
     * have a short varlena header.  This is okay because it must have been
     * created by a previous invocation of this routine, and we always emit
     * long-format reconstructed values.
     */
    Assert(in->level == 0 ? DatumGetPointer(in->reconstructedValue) == NULL :
    VARSIZE_ANY_EXHDR(DatumGetPointer(in->reconstructedValue)) == in->level);

    maxReconstrLen = in->level + 1;
    if (in->hasPrefix)
    {
        prefixText = DatumGetTextPP(in->prefixDatum);
        prefixSize = VARSIZE_ANY_EXHDR(prefixText);
        maxReconstrLen += prefixSize;
    }

    reconstrText = palloc(VARHDRSZ + maxReconstrLen);
    SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);

    if (in->level)
        memcpy(VARDATA(reconstrText),
               VARDATA(DatumGetPointer(in->reconstructedValue)),
               in->level);
    if (prefixSize)
        memcpy(((char *) VARDATA(reconstrText)) + in->level,
               VARDATA_ANY(prefixText),
               prefixSize);
    /* last byte of reconstrText will be filled in below */

    /*
     * Scan the child nodes.  For each one, complete the reconstructed value
     * and see if it's consistent with the query.  If so, emit an entry into
     * the output arrays.
     */
    out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
    out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
    out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
    out->nNodes = 0;

    for (i = 0; i < in->nNodes; i++)
    {
        uint8       nodeChar = DatumGetUInt8(in->nodeLabels[i]);
        int         thisLen;
        bool        res = true;
        int         j;

        /* If nodeChar is zero, don't include it in data */
        if (nodeChar == '\0')
            thisLen = maxReconstrLen - 1;
        else
        {
            ((char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
            thisLen = maxReconstrLen;
        }

        for (j = 0; j < in->nkeys; j++)
        {
            StrategyNumber strategy = in->scankeys[j].sk_strategy;
            text       *inText;
            int         inSize;
            int         r;

            /*
             * If it's a collation-aware operator, but the collation is C, we
             * can treat it as non-collation-aware.  With non-C collation we
             * need to traverse whole tree :-( so there's no point in making
             * any check here.
             */
            if (strategy > 10)
            {
                if (collate_is_c)
                    strategy -= 10;
                else
                    continue;
            }

            inText = DatumGetTextPP(in->scankeys[j].sk_argument);
            inSize = VARSIZE_ANY_EXHDR(inText);

            r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
                       Min(inSize, thisLen));

            switch (strategy)
            {
                case BTLessStrategyNumber:
                case BTLessEqualStrategyNumber:
                    if (r > 0)
                        res = false;
                    break;
                case BTEqualStrategyNumber:
                    if (r != 0 || inSize < thisLen)
                        res = false;
                    break;
                case BTGreaterEqualStrategyNumber:
                case BTGreaterStrategyNumber:
                    if (r < 0)
                        res = false;
                    break;
                default:
                    elog(ERROR, "unrecognized strategy number: %d",
                         in->scankeys[j].sk_strategy);
                    break;
            }

            if (!res)
                break;          /* no need to consider remaining conditions */
        }

        if (res)
        {
            out->nodeNumbers[out->nNodes] = i;
            out->levelAdds[out->nNodes] = thisLen - in->level;
            SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
            out->reconstructedValues[out->nNodes] =
                datumCopy(PointerGetDatum(reconstrText), false, -1);
            out->nNodes++;
        }
    }

    PG_RETURN_VOID();
}

Datum spg_text_leaf_consistent ( PG_FUNCTION_ARGS   ) 

Definition at line 507 of file spgtextproc.c.

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, DatumGetPointer, DatumGetTextP, DatumGetTextPP, elog, ERROR, spgLeafConsistentIn::leafDatum, spgLeafConsistentOut::leafValue, spgLeafConsistentIn::level, memcmp(), Min, spgLeafConsistentIn::nkeys, NULL, palloc(), PG_GET_COLLATION, PG_GETARG_POINTER, PG_RETURN_BOOL, pg_verifymbstr(), PointerGetDatum, spgLeafConsistentOut::recheck, spgLeafConsistentIn::reconstructedValue, spgLeafConsistentIn::scankeys, SET_VARSIZE, ScanKeyData::sk_argument, ScanKeyData::sk_strategy, VARDATA, VARDATA_ANY, VARHDRSZ, VARSIZE_ANY_EXHDR, and varstr_cmp().

{
    spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
    spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
    int         level = in->level;
    text       *leafValue,
               *reconstrValue = NULL;
    char       *fullValue;
    int         fullLen;
    bool        res;
    int         j;

    /* all tests are exact */
    out->recheck = false;

    leafValue = DatumGetTextPP(in->leafDatum);

    if (DatumGetPointer(in->reconstructedValue))
        reconstrValue = DatumGetTextP(in->reconstructedValue);

    Assert(level == 0 ? reconstrValue == NULL :
           VARSIZE_ANY_EXHDR(reconstrValue) == level);

    /* Reconstruct the full string represented by this leaf tuple */
    fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
    if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
    {
        fullValue = VARDATA(reconstrValue);
        out->leafValue = PointerGetDatum(reconstrValue);
    }
    else
    {
        text       *fullText = palloc(VARHDRSZ + fullLen);

        SET_VARSIZE(fullText, VARHDRSZ + fullLen);
        fullValue = VARDATA(fullText);
        if (level)
            memcpy(fullValue, VARDATA(reconstrValue), level);
        if (VARSIZE_ANY_EXHDR(leafValue) > 0)
            memcpy(fullValue + level, VARDATA_ANY(leafValue),
                   VARSIZE_ANY_EXHDR(leafValue));
        out->leafValue = PointerGetDatum(fullText);
    }

    /* Perform the required comparison(s) */
    res = true;
    for (j = 0; j < in->nkeys; j++)
    {
        StrategyNumber strategy = in->scankeys[j].sk_strategy;
        text       *query = DatumGetTextPP(in->scankeys[j].sk_argument);
        int         queryLen = VARSIZE_ANY_EXHDR(query);
        int         r;

        if (strategy > 10)
        {
            /* Collation-aware comparison */
            strategy -= 10;

            /* If asserts enabled, verify encoding of reconstructed string */
            Assert(pg_verifymbstr(fullValue, fullLen, false));

            r = varstr_cmp(fullValue, Min(queryLen, fullLen),
                           VARDATA_ANY(query), Min(queryLen, fullLen),
                           PG_GET_COLLATION());
        }
        else
        {
            /* Non-collation-aware comparison */
            r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
        }

        if (r == 0)
        {
            if (queryLen > fullLen)
                r = -1;
            else if (queryLen < fullLen)
                r = 1;
        }

        switch (strategy)
        {
            case BTLessStrategyNumber:
                res = (r < 0);
                break;
            case BTLessEqualStrategyNumber:
                res = (r <= 0);
                break;
            case BTEqualStrategyNumber:
                res = (r == 0);
                break;
            case BTGreaterEqualStrategyNumber:
                res = (r >= 0);
                break;
            case BTGreaterStrategyNumber:
                res = (r > 0);
                break;
            default:
                elog(ERROR, "unrecognized strategy number: %d",
                     in->scankeys[j].sk_strategy);
                res = false;
                break;
        }

        if (!res)
            break;              /* no need to consider remaining conditions */
    }

    PG_RETURN_BOOL(res);
}

Datum spg_text_picksplit ( PG_FUNCTION_ARGS   ) 

Definition at line 274 of file spgtextproc.c.

References spgNodePtr::c, cmpNodePtr(), commonPrefix(), spgNodePtr::d, DatumGetTextPP, spgPickSplitIn::datums, formTextDatum(), spgPickSplitOut::hasPrefix, spgNodePtr::i, i, spgPickSplitOut::leafTupleDatums, spgPickSplitOut::mapTuplesToNodes, Min, spgPickSplitOut::nNodes, spgPickSplitOut::nodeLabels, spgPickSplitIn::nTuples, NULL, palloc(), PG_GETARG_POINTER, PG_RETURN_VOID, spgPickSplitOut::prefixDatum, qsort, SPGIST_MAX_PREFIX_LENGTH, UInt8GetDatum, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

{
    spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
    spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
    text       *text0 = DatumGetTextPP(in->datums[0]);
    int         i,
                commonLen;
    spgNodePtr *nodes;

    /* Identify longest common prefix, if any */
    commonLen = VARSIZE_ANY_EXHDR(text0);
    for (i = 1; i < in->nTuples && commonLen > 0; i++)
    {
        text       *texti = DatumGetTextPP(in->datums[i]);
        int         tmp = commonPrefix(VARDATA_ANY(text0),
                                       VARDATA_ANY(texti),
                                       VARSIZE_ANY_EXHDR(text0),
                                       VARSIZE_ANY_EXHDR(texti));

        if (tmp < commonLen)
            commonLen = tmp;
    }

    /*
     * Limit the prefix length, if necessary, to ensure that the resulting
     * inner tuple will fit on a page.
     */
    commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);

    /* Set node prefix to be that string, if it's not empty */
    if (commonLen == 0)
    {
        out->hasPrefix = false;
    }
    else
    {
        out->hasPrefix = true;
        out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
    }

    /* Extract the node label (first non-common byte) from each value */
    nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);

    for (i = 0; i < in->nTuples; i++)
    {
        text       *texti = DatumGetTextPP(in->datums[i]);

        if (commonLen < VARSIZE_ANY_EXHDR(texti))
            nodes[i].c = *(uint8 *) (VARDATA_ANY(texti) + commonLen);
        else
            nodes[i].c = '\0';  /* use \0 if string is all common */
        nodes[i].i = i;
        nodes[i].d = in->datums[i];
    }

    /*
     * Sort by label bytes so that we can group the values into nodes.  This
     * also ensures that the nodes are ordered by label value, allowing the
     * use of binary search in searchChar.
     */
    qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);

    /* And emit results */
    out->nNodes = 0;
    out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
    out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
    out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);

    for (i = 0; i < in->nTuples; i++)
    {
        text       *texti = DatumGetTextPP(nodes[i].d);
        Datum       leafD;

        if (i == 0 || nodes[i].c != nodes[i - 1].c)
        {
            out->nodeLabels[out->nNodes] = UInt8GetDatum(nodes[i].c);
            out->nNodes++;
        }

        if (commonLen < VARSIZE_ANY_EXHDR(texti))
            leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
                                  VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
        else
            leafD = formTextDatum(NULL, 0);

        out->leafTupleDatums[nodes[i].i] = leafD;
        out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
    }

    PG_RETURN_VOID();
}