Skip to content

Commit 8095dc0

Browse files
authored
Vendor snowball package (#1116)
* vendor snowball
1 parent a5bf92c commit 8095dc0

19 files changed

+2069
-61
lines changed

hackage-server.cabal

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ extra-source-files:
6363
tests/unpack-checks/LANGUAGE-GHC-9.2/Main.hs
6464
tests/unpack-checks/LANGUAGE-GHC-9.2/Setup.hs
6565
tests/unpack-checks/LANGUAGE-GHC-9.2/LANGUAGE-GHC.cabal
66+
libstemmer_c/src_c/stem_ISO_8859_1_english.h
67+
libstemmer_c/include/libstemmer.h
68+
libstemmer_c/runtime/api.h
69+
libstemmer_c/runtime/header.h
70+
libstemmer_c/LICENSE
71+
src/Distribution/Server/Util/NLP/LICENSE
6672

6773
source-repository head
6874
type: git
@@ -359,6 +365,7 @@ library lib-server
359365
Distribution.Server.Features.StaticFiles
360366
Distribution.Server.Features.ServerIntrospect
361367
Distribution.Server.Features.Sitemap
368+
Distribution.Server.Util.NLP.Snowball
362369

363370
if flag(debug)
364371
cpp-options: -DDEBUG
@@ -418,8 +425,12 @@ library lib-server
418425
, xss-sanitize ^>= 0.3.6
419426

420427
if !flag(minimal)
421-
build-depends: snowball ^>= 1.0
422-
, tokenize ^>= 0.3
428+
build-depends: tokenize ^>= 0.3
429+
430+
c-sources: libstemmer_c/src_c/stem_ISO_8859_1_english.c
431+
libstemmer_c/runtime/api.c
432+
libstemmer_c/runtime/utilities.c
433+
libstemmer_c/libstemmer/libstemmer.c
423434

424435
if flag(cabal-parsers)
425436
build-depends: cabal-parsers ^>= 0

libstemmer_c/LICENSE

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
Copyright (c) 2002, Richard Boulton
2+
All rights reserved.
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are met:
6+
7+
* Redistributions of source code must retain the above copyright notice,
8+
this list of conditions and the following disclaimer.
9+
10+
* Redistributions in binary form must reproduce the above copyright notice,
11+
this list of conditions and the following disclaimer in the documentation
12+
and/or other materials provided with the distribution.
13+
14+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
18+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24+
POSSIBILITY OF SUCH DAMAGE.

libstemmer_c/include/libstemmer.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
/* Make header file work when included from C++ */
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
typedef unsigned char sb_symbol;
8+
9+
struct SN_env * english_ISO_8859_1_stemmer_new();
10+
11+
void english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env);
12+
13+
const sb_symbol * english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size);
14+
15+
int english_ISO_8859_1_stemmer_length(struct SN_env * sn_env);
16+
17+
#ifdef __cplusplus
18+
}
19+
#endif
20+

libstemmer_c/libstemmer/libstemmer.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
#include <stdlib.h>
3+
#include <string.h>
4+
#include "../include/libstemmer.h"
5+
#include "../runtime/api.h"
6+
#include "../src_c/stem_ISO_8859_1_english.h"
7+
8+
extern struct SN_env *
9+
english_ISO_8859_1_stemmer_new()
10+
{
11+
struct SN_env * sn_env = english_ISO_8859_1_create_env();
12+
if (sn_env == NULL)
13+
{
14+
english_ISO_8859_1_stemmer_delete(sn_env);
15+
return NULL;
16+
}
17+
18+
return sn_env;
19+
}
20+
21+
void
22+
english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env)
23+
{
24+
if (sn_env == 0) return;
25+
english_ISO_8859_1_close_env(sn_env);
26+
}
27+
28+
const sb_symbol *
29+
english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size)
30+
{
31+
int ret;
32+
if (SN_set_current(sn_env, size, (const symbol *)(word)))
33+
{
34+
sn_env->l = 0;
35+
return NULL;
36+
}
37+
ret = english_ISO_8859_1_stem(sn_env);
38+
if (ret < 0) return NULL;
39+
sn_env->p[sn_env->l] = 0;
40+
return (const sb_symbol *)(sn_env->p);
41+
}
42+
43+
int
44+
english_ISO_8859_1_stemmer_length(struct SN_env * sn_env)
45+
{
46+
return sn_env->l;
47+
}

libstemmer_c/runtime/api.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
2+
#include <stdlib.h> /* for calloc, free */
3+
#include "header.h"
4+
5+
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
6+
{
7+
struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
8+
if (z == NULL) return NULL;
9+
z->p = create_s();
10+
if (z->p == NULL) goto error;
11+
if (S_size)
12+
{
13+
int i;
14+
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
15+
if (z->S == NULL) goto error;
16+
17+
for (i = 0; i < S_size; i++)
18+
{
19+
z->S[i] = create_s();
20+
if (z->S[i] == NULL) goto error;
21+
}
22+
}
23+
24+
if (I_size)
25+
{
26+
z->I = (int *) calloc(I_size, sizeof(int));
27+
if (z->I == NULL) goto error;
28+
}
29+
30+
if (B_size)
31+
{
32+
z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
33+
if (z->B == NULL) goto error;
34+
}
35+
36+
return z;
37+
error:
38+
SN_close_env(z, S_size);
39+
return NULL;
40+
}
41+
42+
extern void SN_close_env(struct SN_env * z, int S_size)
43+
{
44+
if (z == NULL) return;
45+
if (S_size)
46+
{
47+
int i;
48+
for (i = 0; i < S_size; i++)
49+
{
50+
lose_s(z->S[i]);
51+
}
52+
free(z->S);
53+
}
54+
free(z->I);
55+
free(z->B);
56+
if (z->p) lose_s(z->p);
57+
free(z);
58+
}
59+
60+
extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
61+
{
62+
int err = replace_s(z, 0, z->l, size, s, NULL);
63+
z->c = 0;
64+
return err;
65+
}
66+

libstemmer_c/runtime/api.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
2+
typedef unsigned char symbol;
3+
4+
/* Or replace 'char' above with 'short' for 16 bit characters.
5+
6+
More precisely, replace 'char' with whatever type guarantees the
7+
character width you need. Note however that sizeof(symbol) should divide
8+
HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
9+
there is an alignment problem. In the unlikely event of a problem here,
10+
consult Martin Porter.
11+
12+
*/
13+
14+
struct SN_env {
15+
symbol * p;
16+
int c; int l; int lb; int bra; int ket;
17+
symbol * * S;
18+
int * I;
19+
unsigned char * B;
20+
};
21+
22+
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
23+
extern void SN_close_env(struct SN_env * z, int S_size);
24+
25+
extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
26+

libstemmer_c/runtime/header.h

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
2+
#include <limits.h>
3+
4+
#include "api.h"
5+
6+
#define MAXINT INT_MAX
7+
#define MININT INT_MIN
8+
9+
#define HEAD 2*sizeof(int)
10+
11+
#define SIZE(p) ((int *)(p))[-1]
12+
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
13+
#define CAPACITY(p) ((int *)(p))[-2]
14+
15+
struct among
16+
{ int s_size; /* number of chars in string */
17+
const symbol * s; /* search string */
18+
int substring_i;/* index to longest matching substring */
19+
int result; /* result of the lookup */
20+
int (* function)(struct SN_env *);
21+
};
22+
23+
extern symbol * create_s(void);
24+
extern void lose_s(symbol * p);
25+
26+
extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
27+
28+
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
29+
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
30+
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
31+
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
32+
33+
extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
34+
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
35+
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
36+
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
37+
38+
extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
39+
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
40+
extern int eq_v(struct SN_env * z, const symbol * p);
41+
extern int eq_v_b(struct SN_env * z, const symbol * p);
42+
43+
extern int find_among(struct SN_env * z, const struct among * v, int v_size);
44+
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
45+
46+
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
47+
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
48+
extern int slice_from_v(struct SN_env * z, const symbol * p);
49+
extern int slice_del(struct SN_env * z);
50+
51+
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
52+
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
53+
54+
extern symbol * slice_to(struct SN_env * z, symbol * p);
55+
extern symbol * assign_to(struct SN_env * z, symbol * p);
56+
57+
extern void debug(struct SN_env * z, int number, int line_count);
58+

0 commit comments

Comments
 (0)