operators.candidates.context.ContextAggregator
class operators.candidates.context.ContextAggregator
operators.candidates.extractor.DateSpanFeaturizer
class operators.candidates.extractor.DateSpanFeaturizer(field, col_suffix=None)
operators.candidates.extractor.DocEntityDictSpanFeaturizer
class operators.candidates.extractor.DocEntityDictSpanFeaturizer(entitydictpath, docentitydictpath, field, ignorecase=False, linkentities=True, colsuffix=None)
operators.candidates.extractor.EmailAddressSpanFeaturizer
class operators.candidates.extractor.EmailAddressSpanFeaturizer(field, col_suffix=None)
operators.candidates.extractor.EmptySpanFeaturizer
class operators.candidates.extractor.EmptySpanFeaturizer(field, col_suffix=None)
operators.candidates.extractor.EntityDictRegexSpanFeaturizer
class operators.candidates.extractor.EntityDictRegexSpanFeaturizer(entitydictpath, field, ignorecase=False, linkentities=True, col_suffix=None)
operators.candidates.extractor.EntityDictSpanFeaturizer
class operators.candidates.extractor.EntityDictSpanFeaturizer(entitydictpath, field, ignorecase=False, linkentities=True, col_suffix=None)
operators.candidates.extractor.HardCodedSpanFeaturizer
class operators.candidates.extractor.HardCodedSpanFeaturizer(charstarts, charends, contextuids, spanfields, initiallabels, spanentities, col_suffix=None)
operators.candidates.extractor.NumericSpanFeaturizer
class operators.candidates.extractor.NumericSpanFeaturizer(field, col_suffix=None)
operators.candidates.extractor.ParagraphSpanFeaturizer
class operators.candidates.extractor.ParagraphSpanFeaturizer(field, col_suffix=None)
operators.candidates.extractor.RegexSpanFeaturizer
class operators.candidates.extractor.RegexSpanFeaturizer(regex, field, ignorecase=False, capturegroup=0, col_suffix=None)
operators.candidates.extractor.SpansFileFeaturizer
class operators.candidates.extractor.SpansFileFeaturizer(path, sourcetype='CSV', colsuffix=None)
operators.candidates.extractor.USCurrencySpanFeaturizer
class operators.candidates.extractor.USCurrencySpanFeaturizer(field, col_suffix=None)
operators.candidates.extractor_spacy.NounChunkSpanFeaturizer
class operators.candidates.extractorspacy.NounChunkSpanFeaturizer(field, **spacyspan_kwargs)
operators.candidates.extractor_spacy.SpacyNERSpanFeaturizer
class operators.candidates.extractorspacy.SpacyNERSpanFeaturizer(tag, field, matchlongest=True, spacyspankwargs)
operators.candidates.extractor_spacy.TagSpanFeaturizer
class operators.candidates.extractorspacy.TagSpanFeaturizer(field, tag, attr='enttype', matchlongest=True, spacyspankwargs)
operators.candidates.extractor_spacy.TokenSpanFeaturizer
class operators.candidates.extractorspacy.TokenSpanFeaturizer(field, tokenizer='spacy', **spacyspan_kwargs)
operators.candidates.span_preview.SpanPreviewPreprocessor
class operators.candidates.spanpreview.SpanPreviewPreprocessor(charwindow=200, feature_suffix='')
operators.embedding.EmbeddingCandidateFeaturizer
class operators.embedding.EmbeddingCandidateFeaturizer(field, candidatefield, targetfield=None, embedding_type='simcse')
operators.embedding.EmbeddingFeaturizer
class operators.embedding.EmbeddingFeaturizer(field, targetfield=None, embeddingtype='simcse')
operators.spacy.NounChunkFeaturizer
class operators.spacy.NounChunkFeaturizer(field, targetfield=None, **spacyspan_kwargs)
operators.spacy.SentenceFeaturizer
class operators.spacy.SentenceFeaturizer(field, targetfield=None, **spacyspan_kwargs)
operators.spacy.SpacyPreprocessor
class operators.spacy.SpacyPreprocessor(field, targetfield='doc', model='encorewebsm', disable=None, spacy_kwargs)
operators.spacy.SpacyTokenizer
class operators.spacy.SpacyTokenizer(textfield, tokensfield=None)
operators.spacy.VerbPhraseFeaturizer
class operators.spacy.VerbPhraseFeaturizer(field, targetfield=None, minlength=2, spacyspankwargs)
operators.special_char.AsciiCharFilter
class operators.special_char.AsciiCharFilter(field)
operators.special_char.LatinCharFilter
class operators.special_char.LatinCharFilter(field)
operators.truncate.TruncatePreprocessor
class operators.truncate.TruncatePreprocessor(field, target_field=None, length=5000, by='words')
operators.whitespace.WhitespacePreprocessor
class operators.whitespace.WhitespacePreprocessor(fields, toreplace=None, outputfield_suffix='')