API Reference¶
DeepSearch Toolkit
artifacts
¶
artifact_manager
¶
ARTF_META_FILENAME = os.getenv('DEEPSEARCH_ARTIFACT_META_FILENAME', default='meta.info')
module-attribute
¶
ARTF_META_URL_FIELD = os.getenv('DEEPSEARCH_ARTIFACT_URL_FIELD', default='static_url')
module-attribute
¶
DFLT_ARTFCT_CACHE_DIR = os.getenv('DEEPSEARCH_ARTIFACT_CACHE', default=Path(platformdirs.user_cache_dir('deepsearch', 'ibm')) / 'artifact_cache')
module-attribute
¶
DFLT_ARTFCT_INDEX_DIR = os.getenv('DEEPSEARCH_ARTIFACT_INDEX', default=os.getcwd())
module-attribute
¶
ArtifactManager
¶
HitStrategy
¶
__init__(index=None, cache=None)
¶
download_artifact_to_cache(artifact_name, unpack_archives=True, hit_strategy=HitStrategy.OVERWRITE, with_progress_bar=False)
¶
get_artifact_path_in_cache(artifact_name)
¶
get_artifacts_in_cache()
¶
get_artifacts_in_index()
¶
get_cache_path()
¶
get_index_path()
¶
chemistry
¶
queries
¶
molecules
¶
CHEMVECDB_COLLECTIONS = {MolQueryType.SIMILARITY: 'patcid_tanimoto', MolQueryType.SUBSTRUCTURE: 'patcid_substructure'}
module-attribute
¶
MolIdType
¶
MolQueryLang
¶
MolQueryType
¶
MoleculeQuery(query, query_type, query_lang=MolQueryLang.SMILES, num_items=10)
¶
Use the vector database in Deep Search for querying molecules
by substructure or similarity.
The result is contained in the molecules
output of the response.
MoleculesInPatentsQuery(patents, num_items=10, partial_lookup=False)
¶
List all molecules contained in a list of patents.
The result is contained in the molecules
output of the response.
PatentsWithMoleculesQuery(molecules, num_items=10)
¶
List all patents containing any of the input molecules.
The result is contained in the patents
output of the response.
cps
¶
__all__ = ['CpsApi', 'CpsApiClient']
module-attribute
¶
CpsApi
¶
data_catalogs: CpsApiDataCatalogs
instance-attribute
¶
data_indices: CpsApiDataIndices
instance-attribute
¶
documents: DSApiDocuments
instance-attribute
¶
elastic: CpsApiElastic
instance-attribute
¶
knowledge_graphs: CpsApiKnowledgeGraphs
instance-attribute
¶
projects: CpsApiProjects
instance-attribute
¶
queries: CpsApiQueries
instance-attribute
¶
tasks: CpsApiTasks
instance-attribute
¶
uploader: DSApiUploader
instance-attribute
¶
__init__(client)
¶
from_env(profile_name=None)
classmethod
¶
Create an API object resolving the required settings from the environment if possible, otherwise from a stored profile.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
profile_name |
Optional[str]
|
profile to use if resolution from environment not possible. Defaults to None (active profile). |
None
|
Returns:
Name | Type | Description |
---|---|---|
CpsApi |
CpsApi
|
the created API object |
from_settings(settings)
classmethod
¶
Create an API object from the provided settings.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
settings |
ProfileSettings
|
the settings to use. |
required |
Returns:
Name | Type | Description |
---|---|---|
CpsApi |
CpsApi
|
the created API object |
refresh_token(admin=False)
¶
Refresh access token
Parameters:
Name | Type | Description | Default |
---|---|---|---|
admin |
bool
|
controls whether an admin token should be requested. Defaults to False. |
False
|
Raises:
Type | Description |
---|---|
RuntimeError
|
raised in case API Key or User is invalid |
CpsApiClient
¶
data_indices
¶
utils
¶
logger = logging.getLogger(__name__)
module-attribute
¶
process_external_cos(api, coords, s3_coordinates, progress_bar=False)
¶
Individual files are processed before upload.
process_local_file(api, coords, local_file, progress_bar=False, conv_settings=None, target_settings=None)
¶
Individual files are uploaded for conversion and storage in data index.
process_url_input(api, coords, urls, url_chunk_size, progress_bar=False)
¶
Individual urls are uploaded for conversion and storage in data index.
upload_files(api, coords, url=None, local_file=None, s3_coordinates=None, conv_settings=None, target_settings=None, url_chunk_size=1)
¶
Orchestrate document conversion and upload to an index in a project
kg
¶
workflow
¶
wf_functions
¶
run(wf, config)
¶
Run the workflow against the given KG :param workflow: Workflow object :type workflow: Workflow :param config: Knowledge Graph API Configuration :type config: Configuration :returns workflow results
validate(wf, config)
¶
Validate the workflow DAG :param workflow: Workflow object :type workflow: Workflow :param config: Knowledge Graph API Configuration :type config: Configuration
workflow
¶
Workflow
¶
__add__(workflow)
¶__and__(workflow)
¶__init__(starting_node=None)
¶__mul__(workflow)
¶__or__(workflow)
¶as_output(limit=None)
¶Set node type as output :param limit: Response limit :type limit: int
combine(*workflows)
¶Combine result
:param *workflows
: Nodes to combine
:type *workflows
: List['Workflow']
edge_traversal(edges=[], include=[])
¶Traverse edges :param edges: The edges to traverse :type edges: List[str] :param include: Include nodes in operation :type include: List['Workflow']
filter(filter_type='cut-off', field_operation='==', field_value='', include=[])
¶Filter values :param filter_type: Filter type. Possible values "cut-off", "field-value" :type filter_type: str :param field_operation: The field operation to use if filter type is "field-value". Possible values "<", "==", ">" :type field_operation: str :param field_value: The field value to filter by :type field_value: str :param include: Include nodes in operation :type include: List['Workflow']
filter_categories(*categories, include=[])
¶Filter node type by category :param categories: the categories to filter :type categories: List[str] :param include: Include nodes in operation :type include: List['Workflow']
get_operations()
¶Return workflow operations
intersect(*workflows)
¶Intersect result
:param *workflows
: Nodes to intersect
:type *workflows
: List['Workflow']
matrix_function(matrix_function='abs', include=[])
¶Run result through matrix function :param matrix_function: Scalar function to use. Possible values "e^A", "cosh", "sinh" :type matrix_function: str :param include: Include nodes in operation :type include: List['Workflow']
multiply(*workflows)
¶Multiply result
:param *workflows
: Nodes to multiply
:type *workflows
: List['Workflow']
negate(*workflows)
¶Negate result
:param *workflows
: Nodes to negate
:type *workflows
: List['Workflow']
normalize(normalize_type='RENORMALIZE_L2', include=[])
¶Normalize result :param normalize_type: Normalize type to use. Possible values "RENORMALIZE_L1", "RENORMALIZE_L2", "RENORMALIZE_LINF" :type normalize_type: str :param include: Include nodes in operation :type include: List['Workflow']
pearson_traversal(edges=[], include=[])
¶Traverse edges using pearson traversal :param edges: The edges to traverse :type edges: List[str] :param include: Include nodes in operation :type include: List['Workflow']
scalar_function(scalar_function='abs', include=[])
¶Run result through scalar function :param scalar_function: Scalar function to use. Possible values "uniform", "abs", "inv", "sigmoid", "softmax" :type scalar_function: str :param include: Include nodes in operation :type include: List['Workflow']
search_nodes_by_approximation(*args, tolerance=0.8, include=[])
¶Search nodes where the arguments are approximate
:param *args
: the search arguments
:type *args
: List[str]
:param tolerance: the tolerance
:type tolerance: float
:param include: Include nodes in operation
:type include: List['Workflow']
search_nodes_by_db_id_pair(*args, include=[])
¶Search nodes that contain the db\id pair
:param *args
: the db\id pairs in format {"_db": "db value", "_id": "id value"}
:type *args
: List[str]
:param include: Include nodes in operation
:type include: List['Workflow']
search_nodes_by_index(indices=[], weights=[], include=[])
¶Search nodes by index :param indices: the indices to search :type indices: str :param weights: the weight to search :type weights: float :param include: Include nodes in operation :type include: List['Workflow']
search_nodes_by_regex(*args, include=[])
¶Search nodes by regex that match args
:param *args
: the search arguments
:type *args
: List[str]
:param include: Include nodes in operation
:type include: List['Workflow']
search_nodes_containing(*args, include=[])
¶Search nodes that contain the args
:param *args
: the search arguments
:type *args
: List[str]
:param include: Include nodes in operation
:type include: List['Workflow']
search_nodes_equal(*args, include=[])
¶Search nodes that equal the args
:param *args
: the search arguments
:type *args
: List[str]
:param include: Include nodes in operation
:type include: List['Workflow']
search_nodes_in_category(*categories, include=[])
¶Search nodes in categories :param categories: the categories to search :type categories: List[str] :param include: Include nodes in operation :type include: List['Workflow']
set_to_field_value(field_name='', include=[])
¶Set node to field value :param field_name: The field name :type field_name: str :param include: Include nodes in operation :type include: List['Workflow']
split(times=1)
¶Add children to node :param times: Number of children to add :type times: int :returns node childs
sum(*workflows)
¶Sum result
:param *workflows
: Nodes to sum
:type *workflows
: List['Workflow']
to_json(indent=2)
¶Return workflow as json string :param indent: result indentation :type indent: int
queries
¶
ConstrainedWeight = Annotated[float, Field(strict=True, ge=0.0, le=1.0, multiple_of=0.1)]
module-attribute
¶
DataQuery(search_query, *, source=None, aggregations=None, highlight=None, sort=None, limit=20, search_after=None, coordinates)
¶
Fts(search_query, collection_name, kg)
¶
RAGQuery(question, *, project, data_source, retr_k=10, rerank=False, text_weight=0.1, model_id=None, prompt_template=None, gen_params=None, gen_ctx_extr_method='window', gen_ctx_window_size=5000, gen_ctx_window_lead_weight=0.5, return_prompt=False, chunk_refs=None, gen_timeout=None)
¶
Create a RAG query
Parameters:
Name | Type | Description | Default |
---|---|---|---|
question |
str
|
the natural-language query |
required |
project |
Union[str, Project]
|
project to use |
required |
data_source |
DataSource
|
the data source to query |
required |
retr_k |
int
|
num of items to retrieve; defaults to 10 |
10
|
rerank |
bool
|
whether to rerank retrieval results; defaults to False |
False
|
text_weight |
ConstrainedWeight
|
lexical weight for hybrid search; allowed values: {0.0, 0.1, 0.2, ..., 1.0}; defaults to 0.1 |
0.1
|
model_id |
str
|
the LLM to use for generation; defaults to None, i.e. determined by system |
None
|
prompt_template |
str
|
the prompt template to use; defaults to None, i.e. determined by system |
None
|
gen_params |
dict
|
the generation params to send to the Gen AI platforms; defaults to None, i.e. determined by system |
None
|
gen_ctx_extr_method |
Literal['window', 'page']
|
method for gen context extraction from document; defaults to "window" |
'window'
|
gen_ctx_window_size |
int
|
(relevant only if gen_ctx_extr_method=="window") max chars to use for extracted gen context (actual extraction quantized on doc item level); defaults to 5000 |
5000
|
gen_ctx_window_lead_weight |
float
|
(relevant only if gen_ctx_extr_method=="window") weight of leading text for distributing remaining window size after extracting the |
0.5
|
return_prompt |
bool
|
whether to return the instantiated prompt; defaults to False |
False
|
chunk_refs |
Optional[List[ChunkRef]]
|
list of explicit chunk references to use instead of performing retrieval; defaults to None (i.e. retrieval-mode) |
None
|
gen_timeout |
float
|
timeout for LLM generation; defaults to None, i.e. determined by system |
None
|
SemanticQuery(question, *, project, data_source, retr_k=10, rerank=False, text_weight=0.1)
¶
Create a semantic retrieval query
Parameters:
Name | Type | Description | Default |
---|---|---|---|
question |
str
|
the natural-language query |
required |
document_hash |
str
|
hash of target document |
required |
project |
Union[str, Project]
|
project to use |
required |
data_source |
DataSource
|
the data source to query |
required |
retr_k |
int
|
num of items to retrieve; defaults to 10 |
10
|
rerank |
bool
|
whether to rerank retrieval results; defaults to False |
False
|
text_weight |
ConstrainedWeight
|
lexical weight for hybrid search; allowed values: {0.0, 0.1, 0.2, ..., 1.0}; defaults to 0.1 |
0.1
|
Wf(wf_query, kg)
¶
results
¶
ChunkRef
¶
GenerationError
¶
Bases: SemanticError
__init__(msg='', *args, **kwargs)
¶
NoSearchResultsError
¶
Bases: SemanticError
__init__(msg='Search returned no results', *args, **kwargs)
¶
RAGAnswerItem
¶
RAGGroundingInfo
¶
RAGResult
¶
SearchResult
¶
SearchResultItem
¶
SemanticError
¶
Bases: Exception
documents
¶
core
¶
common_routines
¶
ERROR_MSG = f'{dashes}Suggestion:(1) Check your input.(2) Contact Deep Search developers if problem persists.{dashes}'
module-attribute
¶
WELCOME = f'{dashes}{''}Welcome to the Deep Search Toolkit{dashes}'
module-attribute
¶
dashes = f'{'-' * 86}'
module-attribute
¶
progressbar = ProgressBarParameters()
module-attribute
¶
progressbar_length = 30
module-attribute
¶
convert
¶
TASK_STOP_STATUS = ['SUCCESS', 'FAILURE']
module-attribute
¶
logger = logging.getLogger(__name__)
module-attribute
¶
check_ccs_single_task_status(api, ccs_proj_key, task_id)
¶
Check status of individual tasks.
check_cps_single_task_status(sw_api, cps_proj_key, task_id, wait=2)
¶
Check cps status of individual tasks.
check_cps_status_running_tasks(api, cps_proj_key, task_ids, progress_bar=False)
¶
Check status of multiple running cps tasks and optionally display progress with progress bar.
check_status_running_tasks(cps_proj_key, task_ids, api=None, progress_bar=False)
¶
Check status of multiple running tasks and optionally display progress with progress bar.
download_converted_documents(result_dir, download_urls, progress_bar=False)
¶
get_download_url(cps_proj_key, task_ids, api=None)
¶
Get the urls of converted documents.
make_payload(source, target, conversion_settings, collection_name='_default')
¶
Create payload for requesting conversion
send_files_for_conversion(api, cps_proj_key, source_path, target, conversion_settings, root_dir, progress_bar=False)
¶
Send multiple files for conversion.
send_urls_for_conversion(api, cps_proj_key, urls, target, conversion_settings, progress_bar=False)
¶
Send multiple online documents for conversion.
submit_conversion_payload(api, cps_proj_key, source, target, conversion_settings)
¶
Convert an online pdf using DeepSearch Technology.
create_report
¶
logger = logging.getLogger(__name__)
module-attribute
¶
get_multiple_reports(api, cps_proj_key, task_ids, source_files, result_dir, progress_bar=False)
¶
Generates reports for multiple tasks_ids and associated documents.
get_single_report(api, cps_proj_key, task_id)
¶
Get report of document conversion per individual task id
export
¶
JsonToHTML
¶
__init__()
¶
clean(data, escape=True)
¶
enum_has_ids(enums)
¶
execute(data)
¶
get_body_new(data)
¶
get_page(item)
¶
get_refs(ref)
¶
get_style(item)
¶
get_tablecell_span(cell, ix)
¶
get_title(data)
¶
make_bbox(page, bbox_rect)
¶
make_bbox_dict(page, bbox_rect)
¶
split_item_in_boxes(item)
¶
template()
¶
write_enum(item)
¶
write_table(item)
¶
write_table_simple(item)
¶
export_to_html(document)
¶
export_to_markdown(document)
¶
input_process
¶
process_cos_input(api, cps_proj_key, source_cos, target, conversion_settings, progress_bar=False)
¶
Classify user provided url(s) and take appropriate action.
process_local_input(api, cps_proj_key, source_path, target, conversion_settings, progress_bar=False)
¶
Classify the user provided local input and take appropriate action.
process_urls_input(api, cps_proj_key, urls, target, conversion_settings, progress_bar=False)
¶
Classify user provided url(s) and take appropriate action.
lookup
¶
main
¶
convert_documents(proj_key, api, urls=None, source_path=None, source_cos=None, target=None, conversion_settings=None, progress_bar=False)
¶
Document conversion via Deep Search Technology. Function to orchestrate document conversion.
Inputs¶
proj_key : string [REQUIRED] Your DeepSearch CPS Project Key. Contact DeepSearch Developers to request one.
url : string [OPTIONAL] For converting documents from the web, please provide a single url or list of urls.
source_file : path [OPTIONAL] For converting local files, please provide absolute path to file or to directory containing multiple files.
source_cos : S3Coordinates [OPTIONAL] For converting all documents in a COS bucket, please provide the S3 credentials, including bucket and key_prefix.
target : deepsearch.documents.core.models.ExportTargets [OPTIONAL] Specify to which target the documents should be exported. Available options: ZIP file, Elastic index, MongoDB collection
progress_bar : Boolean (default is False in code, True in CLI) Show progress bar for processing, submitting, converting input and downloading converted document.
NOTE: Either url or source_path should be supplied.
models
¶
ConversionModel = Union[DefaultConversionModel, ProjectConversionModel]
module-attribute
¶
ExportTarget = Union[ZipTarget, MongoS3Target, ElasticS3Target, COSTarget]
module-attribute
¶
OcrEngine = Union[AlpineOcrEngine, TesseractOcrEngine]
module-attribute
¶
T = TypeVar('T', bound=Hashable)
module-attribute
¶
UniqueList = Annotated[List[T], AfterValidator(_validate_unique_list), Field(json_schema_extra={'uniqueItems': True})]
module-attribute
¶
AlpineOcrEngine
¶
AlpineOcrLanguage
¶
COSTarget
¶
ConversionMetadata
¶
Bases: BaseModel
description: str = ''
class-attribute
instance-attribute
¶
display_name: str = ''
class-attribute
instance-attribute
¶
license: str = ''
class-attribute
instance-attribute
¶
source: str = ''
class-attribute
instance-attribute
¶
version: str = ''
class-attribute
instance-attribute
¶
from_ccs_spec(obj)
classmethod
¶
to_ccs_spec()
¶
ConversionPipelineSettings
¶
ConversionSettings
¶
Bases: BaseModel
metadata: Optional[ConversionMetadata] = None
class-attribute
instance-attribute
¶
ocr: Optional[OCRSettings] = None
class-attribute
instance-attribute
¶
pipeline: Optional[ConversionPipelineSettings] = None
class-attribute
instance-attribute
¶
from_defaults(api)
classmethod
¶
from_project(api, proj_key)
classmethod
¶
to_ccs_spec()
¶
DefaultConversionModel
¶
DocumentExistsInTargetAction
¶
Bases: str
, Enum
What to do if the document already exists on the target.
- replace
will replace the document, destroying any external modifications.
- skip
will not touch the document on the target, leaving it as-is.
Using skip
will incur in a performance increase, however, if the document
is modified externally, CCS will not update it back to the original state.
ElasticIndexCoordinates
¶
ElasticS3Target
¶
Bases: BaseModel
add_annotations: bool = False
class-attribute
instance-attribute
¶
add_cells: bool = False
class-attribute
instance-attribute
¶
add_raw_pages: bool = False
class-attribute
instance-attribute
¶
coordinates: ElasticS3TargetCoordinates
instance-attribute
¶
escape_ref_fields: bool = Field(default=True, description='If true, `$ref` fields are renamed to `__ref`. This allows the data to then be written into a MongoDB collection.')
class-attribute
instance-attribute
¶
if_document_exists: DocumentExistsInTargetAction = DocumentExistsInTargetAction.REPLACE
class-attribute
instance-attribute
¶
type: Literal['elastic_s3'] = 'elastic_s3'
class-attribute
instance-attribute
¶
ElasticS3TargetCoordinates
¶
MongoCollectionCoordinates
¶
MongoS3Target
¶
MongoS3TargetCoordinates
¶
OCRModeEnum
¶
OCRSettings
¶
Bases: BaseModel
enabled: bool = False
class-attribute
instance-attribute
¶
engine: Optional[OcrEngine] = None
class-attribute
instance-attribute
¶
merge_mode: OCRModeEnum = OCRModeEnum.prioritize_ocr
class-attribute
instance-attribute
¶
from_ccs_spec(obj)
classmethod
¶
get_backends(api)
classmethod
¶
to_ccs_spec()
¶
ProjectConversionModel
¶
S3Coordinates
¶
Bases: BaseModel
access_key: str
instance-attribute
¶
bucket: str
instance-attribute
¶
external_endpoint: Optional[str] = None
class-attribute
instance-attribute
¶
host: str
instance-attribute
¶
key_infix_format: str = Field('', description=dedent('\n Control the infix of the object keys that are saved on the document\'s `_s3_data`, after `key_prefix`,\n and before `PDFDocuments/{document_hash}.pdf` or `PDFPages/{page_hash}.pdf`.\n\n By default, the infix is empty.\n For using the name of the index in the coordinates, you can use `key_infix_format = "{index_name}"`.\n\n For example, if:\n\n ```\n key_prefix = "my_prefix/"\n key_infix_format = "{index_name}"\n index_name = "my_elastic_index"\n\n document_hash = "123"\n ```\n\n Then, the document above would be uploaded to: `my_prefix/my_elastic_index/PDFDocuments/123.pdf`.\n\n If one were to set `key_infix_format = ""`, it would be uploaded to `my_prefix/PDFDocuments/123.pdf`.\n\n If one were to set `key_infix_format = "foo"`, it would be uploaded to `my_prefix/foo/PDFDocuments/123.pdf`\n\n Finally, one can combine `{index_name}` with constants and even path separators.\n\n So, `{index_name}/test` would produce `my_prefix/my_elastic_index/test/PDFDocuments/123.pdf`\n '))
class-attribute
instance-attribute
¶
key_prefix: str = ''
class-attribute
instance-attribute
¶
location: str
instance-attribute
¶
port: int
instance-attribute
¶
secret_key: str
instance-attribute
¶
ssl: bool
instance-attribute
¶
verify_ssl: bool
instance-attribute
¶
TargetSettings
¶
TesseractOcrEngine
¶
Bases: BaseModel
TesseractOcrLanguage
¶
Bases: str
, Enum
Arabic = 'Arabic'
class-attribute
instance-attribute
¶
Armenian = 'Armenian'
class-attribute
instance-attribute
¶
Bengali = 'Bengali'
class-attribute
instance-attribute
¶
Canadian_Aboriginal = 'Canadian_Aboriginal'
class-attribute
instance-attribute
¶
Cherokee = 'Cherokee'
class-attribute
instance-attribute
¶
Cyrillic = 'Cyrillic'
class-attribute
instance-attribute
¶
Devanagari = 'Devanagari'
class-attribute
instance-attribute
¶
Ethiopic = 'Ethiopic'
class-attribute
instance-attribute
¶
Fraktur = 'Fraktur'
class-attribute
instance-attribute
¶
Georgian = 'Georgian'
class-attribute
instance-attribute
¶
Greek = 'Greek'
class-attribute
instance-attribute
¶
Gujarati = 'Gujarati'
class-attribute
instance-attribute
¶
Gurmukhi = 'Gurmukhi'
class-attribute
instance-attribute
¶
HanS = 'HanS'
class-attribute
instance-attribute
¶
HanS_vert = 'HanS_vert'
class-attribute
instance-attribute
¶
HanT = 'HanT'
class-attribute
instance-attribute
¶
HanT_vert = 'HanT_vert'
class-attribute
instance-attribute
¶
Hangul = 'Hangul'
class-attribute
instance-attribute
¶
Hangul_vert = 'Hangul_vert'
class-attribute
instance-attribute
¶
Hebrew = 'Hebrew'
class-attribute
instance-attribute
¶
Japanese = 'Japanese'
class-attribute
instance-attribute
¶
Japanese_vert = 'Japanese_vert'
class-attribute
instance-attribute
¶
Kannada = 'Kannada'
class-attribute
instance-attribute
¶
Khmer = 'Khmer'
class-attribute
instance-attribute
¶
Lao = 'Lao'
class-attribute
instance-attribute
¶
Latin = 'Latin'
class-attribute
instance-attribute
¶
Malayalam = 'Malayalam'
class-attribute
instance-attribute
¶
Myanmar = 'Myanmar'
class-attribute
instance-attribute
¶
Oriya = 'Oriya'
class-attribute
instance-attribute
¶
Sinhala = 'Sinhala'
class-attribute
instance-attribute
¶
Syriac = 'Syriac'
class-attribute
instance-attribute
¶
Tamil = 'Tamil'
class-attribute
instance-attribute
¶
Telugu = 'Telugu'
class-attribute
instance-attribute
¶
Thaana = 'Thaana'
class-attribute
instance-attribute
¶
Thai = 'Thai'
class-attribute
instance-attribute
¶
Tibetan = 'Tibetan'
class-attribute
instance-attribute
¶
Vietnamese = 'Vietnamese'
class-attribute
instance-attribute
¶
afr = 'afr'
class-attribute
instance-attribute
¶
amh = 'amh'
class-attribute
instance-attribute
¶
ara = 'ara'
class-attribute
instance-attribute
¶
asm = 'asm'
class-attribute
instance-attribute
¶
aze = 'aze'
class-attribute
instance-attribute
¶
aze_cyrl = 'aze_cyrl'
class-attribute
instance-attribute
¶
bel = 'bel'
class-attribute
instance-attribute
¶
ben = 'ben'
class-attribute
instance-attribute
¶
bod = 'bod'
class-attribute
instance-attribute
¶
bos = 'bos'
class-attribute
instance-attribute
¶
bre = 'bre'
class-attribute
instance-attribute
¶
bul = 'bul'
class-attribute
instance-attribute
¶
cat = 'cat'
class-attribute
instance-attribute
¶
ceb = 'ceb'
class-attribute
instance-attribute
¶
ces = 'ces'
class-attribute
instance-attribute
¶
chi_sim = 'chi_sim'
class-attribute
instance-attribute
¶
chi_sim_vert = 'chi_sim_vert'
class-attribute
instance-attribute
¶
chi_tra = 'chi_tra'
class-attribute
instance-attribute
¶
chi_tra_vert = 'chi_tra_vert'
class-attribute
instance-attribute
¶
chr = 'chr'
class-attribute
instance-attribute
¶
cos = 'cos'
class-attribute
instance-attribute
¶
cym = 'cym'
class-attribute
instance-attribute
¶
dan = 'dan'
class-attribute
instance-attribute
¶
deu = 'deu'
class-attribute
instance-attribute
¶
div = 'div'
class-attribute
instance-attribute
¶
dzo = 'dzo'
class-attribute
instance-attribute
¶
ell = 'ell'
class-attribute
instance-attribute
¶
eng = 'eng'
class-attribute
instance-attribute
¶
enm = 'enm'
class-attribute
instance-attribute
¶
epo = 'epo'
class-attribute
instance-attribute
¶
est = 'est'
class-attribute
instance-attribute
¶
eus = 'eus'
class-attribute
instance-attribute
¶
fao = 'fao'
class-attribute
instance-attribute
¶
fas = 'fas'
class-attribute
instance-attribute
¶
fil = 'fil'
class-attribute
instance-attribute
¶
fin = 'fin'
class-attribute
instance-attribute
¶
fra = 'fra'
class-attribute
instance-attribute
¶
frk = 'frk'
class-attribute
instance-attribute
¶
frm = 'frm'
class-attribute
instance-attribute
¶
fry = 'fry'
class-attribute
instance-attribute
¶
gla = 'gla'
class-attribute
instance-attribute
¶
gle = 'gle'
class-attribute
instance-attribute
¶
glg = 'glg'
class-attribute
instance-attribute
¶
grc = 'grc'
class-attribute
instance-attribute
¶
guj = 'guj'
class-attribute
instance-attribute
¶
hat = 'hat'
class-attribute
instance-attribute
¶
heb = 'heb'
class-attribute
instance-attribute
¶
hin = 'hin'
class-attribute
instance-attribute
¶
hrv = 'hrv'
class-attribute
instance-attribute
¶
hun = 'hun'
class-attribute
instance-attribute
¶
hye = 'hye'
class-attribute
instance-attribute
¶
iku = 'iku'
class-attribute
instance-attribute
¶
ind = 'ind'
class-attribute
instance-attribute
¶
isl = 'isl'
class-attribute
instance-attribute
¶
ita = 'ita'
class-attribute
instance-attribute
¶
ita_old = 'ita_old'
class-attribute
instance-attribute
¶
jav = 'jav'
class-attribute
instance-attribute
¶
jpn = 'jpn'
class-attribute
instance-attribute
¶
jpn_vert = 'jpn_vert'
class-attribute
instance-attribute
¶
kan = 'kan'
class-attribute
instance-attribute
¶
kat = 'kat'
class-attribute
instance-attribute
¶
kat_old = 'kat_old'
class-attribute
instance-attribute
¶
kaz = 'kaz'
class-attribute
instance-attribute
¶
khm = 'khm'
class-attribute
instance-attribute
¶
kir = 'kir'
class-attribute
instance-attribute
¶
kmr = 'kmr'
class-attribute
instance-attribute
¶
kor = 'kor'
class-attribute
instance-attribute
¶
kor_vert = 'kor_vert'
class-attribute
instance-attribute
¶
lao = 'lao'
class-attribute
instance-attribute
¶
lat = 'lat'
class-attribute
instance-attribute
¶
lav = 'lav'
class-attribute
instance-attribute
¶
lit = 'lit'
class-attribute
instance-attribute
¶
ltz = 'ltz'
class-attribute
instance-attribute
¶
mal = 'mal'
class-attribute
instance-attribute
¶
mar = 'mar'
class-attribute
instance-attribute
¶
mkd = 'mkd'
class-attribute
instance-attribute
¶
mlt = 'mlt'
class-attribute
instance-attribute
¶
mon = 'mon'
class-attribute
instance-attribute
¶
mri = 'mri'
class-attribute
instance-attribute
¶
msa = 'msa'
class-attribute
instance-attribute
¶
mya = 'mya'
class-attribute
instance-attribute
¶
nep = 'nep'
class-attribute
instance-attribute
¶
nld = 'nld'
class-attribute
instance-attribute
¶
nor = 'nor'
class-attribute
instance-attribute
¶
oci = 'oci'
class-attribute
instance-attribute
¶
ori = 'ori'
class-attribute
instance-attribute
¶
osd = 'osd'
class-attribute
instance-attribute
¶
pan = 'pan'
class-attribute
instance-attribute
¶
pol = 'pol'
class-attribute
instance-attribute
¶
por = 'por'
class-attribute
instance-attribute
¶
pus = 'pus'
class-attribute
instance-attribute
¶
que = 'que'
class-attribute
instance-attribute
¶
ron = 'ron'
class-attribute
instance-attribute
¶
rus = 'rus'
class-attribute
instance-attribute
¶
san = 'san'
class-attribute
instance-attribute
¶
sin = 'sin'
class-attribute
instance-attribute
¶
slk = 'slk'
class-attribute
instance-attribute
¶
slv = 'slv'
class-attribute
instance-attribute
¶
snd = 'snd'
class-attribute
instance-attribute
¶
spa = 'spa'
class-attribute
instance-attribute
¶
spa_old = 'spa_old'
class-attribute
instance-attribute
¶
sqi = 'sqi'
class-attribute
instance-attribute
¶
srp = 'srp'
class-attribute
instance-attribute
¶
srp_latn = 'srp_latn'
class-attribute
instance-attribute
¶
sun = 'sun'
class-attribute
instance-attribute
¶
swa = 'swa'
class-attribute
instance-attribute
¶
swe = 'swe'
class-attribute
instance-attribute
¶
syr = 'syr'
class-attribute
instance-attribute
¶
tam = 'tam'
class-attribute
instance-attribute
¶
tat = 'tat'
class-attribute
instance-attribute
¶
tel = 'tel'
class-attribute
instance-attribute
¶
tgk = 'tgk'
class-attribute
instance-attribute
¶
tha = 'tha'
class-attribute
instance-attribute
¶
tir = 'tir'
class-attribute
instance-attribute
¶
ton = 'ton'
class-attribute
instance-attribute
¶
tur = 'tur'
class-attribute
instance-attribute
¶
uig = 'uig'
class-attribute
instance-attribute
¶
ukr = 'ukr'
class-attribute
instance-attribute
¶
urd = 'urd'
class-attribute
instance-attribute
¶
uzb = 'uzb'
class-attribute
instance-attribute
¶
uzb_cyrl = 'uzb_cyrl'
class-attribute
instance-attribute
¶
vie = 'vie'
class-attribute
instance-attribute
¶
yid = 'yid'
class-attribute
instance-attribute
¶
yor = 'yor'
class-attribute
instance-attribute
¶
ZipPackageContentType
¶
ZipTarget
¶
Bases: BaseModel
Specify how the documents should be exported to a Zip file. If the [coordinates] are not specified, the project's coordinates will be used.
render
¶
results
¶
DocumentConversionResult
¶
An instance of DocumentConversionResult is generated when document conversion is requested.
proj_key = proj_key
instance-attribute
¶
statuses = statuses
instance-attribute
¶
task_ids = task_ids
instance-attribute
¶
__init__(proj_key, task_ids, statuses, api, source_path=None, source_urls=None, batched_files=None)
¶
__iter__()
¶
download_all(result_dir, progress_bar=False)
¶
Download all converted documents.
Input¶
result_dir : path local directory where converted documents will be saved progress_bar: boolean, optional (default = False) shows progress bar is True
generate_report(result_dir, progress_bar=False)
¶
Saves a csv report file for detailed information about the document conversion job. Returns a dictionary object containing counts of files/urls converted.
DocumentResult
¶
Instance of an individual DocumentConversionResult.
proj_key = proj_key
instance-attribute
¶
status = status
instance-attribute
¶
task_id = task_id
instance-attribute
¶
__init__(proj_key, task_id, status, api)
¶
download(result_dir, progress_bar=False)
¶
Download result of an individual conversion task.
Input¶
result_dir : path local directory where converted documents are stored progress_bar: boolean, optional (default = False) shows progress bar is True
url_json()
¶
Returns the url of a converted json object.
utils
¶
ALLOWED_FILE_EXTENSIONS = ['.pdf', '.jpg', '.jpeg', '.tiff', '.tif', '.png', '.gif']
module-attribute
¶
IteratedDocument
¶
URLNavigator
¶
api = api
instance-attribute
¶
url_host = self.api.client.swagger_client.configuration.host
instance-attribute
¶
url_linked_ccs = urllib.parse.urljoin(self.url_host, '/api/linked-ccs')
instance-attribute
¶
url_user_management = '/user/v1'
instance-attribute
¶
__init__(api)
¶
url_collection_settings(ccs_proj_key, collection_name)
¶
url_conversion_defaults()
¶
url_convert(ccs_proj_key)
¶
url_project_models(ccs_proj_key)
¶
url_report_metrics(ccs_proj_key, task_id)
¶
url_report_tasks(ccs_proj_key, task_id)
¶
url_request_status(ccs_proj_key, task_id)
¶
url_result(ccs_proj_key, task_id)
¶
url_system_models()
¶
url_system_ocr_backends()
¶
batch_single_files(source_path, root_dir, progress_bar=False)
¶
Batch individual input files into zip files.
Output bfiles: List[List[str]] outer list corresponds to each batch inner list corresponds to individual file in a batch
cleanup(root_dir)
¶
Clean temporarily created zip batches.
collect_all_local_files(source_path, root_dir)
¶
Function to scan directory and collect all batches for conversion
Input:¶
Path
user provided path
Path
path for temporary batched files
create_root_dir()
¶
Creates root directory labelled with timestamp
download_url(url, save_path, chunk_size=128)
¶
Download contents from a url.
iterate_converted_files(result_dir)
¶
Iterate through all the converted documents in the downloaded results.
read_lines(file_path)
¶
Returns list of lines from input file.
write_taskids(result_dir, list_to_write)
¶
Write lines in result_dir
model
¶
base
¶
controller
¶
types
¶
Annotations
¶
Bases: StrictModel
deepsearch_res_ibm_com_x_attempt_number: str = Field(..., alias='deepsearch.res.ibm.com/x-attempt-number')
class-attribute
instance-attribute
¶
deepsearch_res_ibm_com_x_deadline: datetime = Field(..., alias='deepsearch.res.ibm.com/x-deadline')
class-attribute
instance-attribute
¶
deepsearch_res_ibm_com_x_max_attempts: str = Field(..., alias='deepsearch.res.ibm.com/x-max-attempts')
class-attribute
instance-attribute
¶
deepsearch_res_ibm_com_x_transaction_id: str = Field(..., alias='deepsearch.res.ibm.com/x-transaction-id')
class-attribute
instance-attribute
¶
BaseAppPredInput
¶
Bases: StrictModel
BaseModelConfig
¶
Bases: BaseModelMetadata
kind: Kind
instance-attribute
¶
BaseModelMetadata
¶
Bases: StrictModel
author: Optional[str] = None
class-attribute
instance-attribute
¶
description: Optional[str] = None
class-attribute
instance-attribute
¶
expected_compute_time: Optional[PositiveFloat] = None
class-attribute
instance-attribute
¶
name: str
instance-attribute
¶
url: Optional[str] = None
class-attribute
instance-attribute
¶
version: str
instance-attribute
¶
CtrlInfoOutputDefs
¶
Kind
¶
Metadata
¶
Bases: StrictModel
annotations: Annotations
instance-attribute
¶
ModelInfoOutputDefsSpec
¶
StrictModel
¶
Bases: BaseModel
examples
¶
dummy_qa_generator
¶
model
¶
DummyQAGenerator
¶
Bases: BaseQAGenerator
A dummy QA generator which answers a question with the question itself.
simple_geo_nlp_annotator
¶
entities
¶
model
¶
logger = logging.getLogger('cps-nlp')
module-attribute
¶
SimpleGeoNLPAnnotator
¶
Bases: BaseNLPModel
entity_names = list(self._ent_annots.keys())
instance-attribute
¶property_names = []
instance-attribute
¶relationship_names = list(self._rel_annots.keys())
instance-attribute
¶__init__()
¶annotate_batched_entities(object_type, items, entity_names)
¶annotate_batched_properties(object_type, items, entities, property_names)
¶annotate_batched_relationships(object_type, items, entities, relationship_names)
¶get_nlp_config()
¶
relationships
¶
cities_to_countries_annotator
¶
CitiesToCountriesAnnotator
¶
Bases: MultiEntitiesRelationshipAnnotator
__init__()
¶
cities_to_provincies_annotator
¶
CitiesToProvinciesAnnotator
¶
Bases: MultiEntitiesRelationshipAnnotator
__init__()
¶
common
¶
provincies_to_countries_annotator
¶
ProvinciesToCountriesAnnotator
¶
Bases: MultiEntitiesRelationshipAnnotator
__init__()
¶
kinds
¶
nlp
¶
controller
¶
model
¶
BaseNLPModel
¶
Bases: BaseDSModel
annotate_batched_entities(object_type, items, entity_names)
abstractmethod
¶annotate_batched_properties(object_type, items, entities, property_names)
abstractmethod
¶annotate_batched_relationships(object_type, items, entities, relationship_names)
abstractmethod
¶get_config()
¶get_nlp_config()
abstractmethod
¶
types
¶
AnnotateEntitiesOutput = List[Dict[str, List[AnnotateEntitiesEntry]]]
module-attribute
¶
AnnotatePropertiesOutput = List[Dict]
module-attribute
¶
AnnotateRelationshipsOutput = List[Dict[str, AnnotateRelationshipsEntry]]
module-attribute
¶
NLPCtrlPredOutput = Union[NLPEntsCtrlPredOuput, NLPRelsCtrlPredOutput, NLPPropsCtrlPredOutput]
module-attribute
¶
NLPReqSpec = Union[NLPEntitiesReqSpec, NLPRelationshipsReqSpec, NLPPropertiesReqSpec]
module-attribute
¶
AnnotateEntitiesEntry
¶
Bases: StrictModel
AnnotateRelationshipsEntry
¶
Bases: StrictModel
AnnotationLabels
¶
Bases: StrictModel
EntityLabel
¶
Bases: StrictModel
FindEntitiesText
¶
Bases: StrictModel
FindPropertiesText
¶
Bases: StrictModel
FindRelationshipsText
¶
Bases: StrictModel
NLPAppPredInput
¶
Bases: BaseAppPredInput
NLPConfig
¶
Bases: BaseModelConfig
NLPEntitiesReqSpec
¶
Bases: StrictModel
findEntities: FindEntitiesText
instance-attribute
¶
NLPEntsCtrlPredOuput
¶
Bases: StrictModel
entities: AnnotateEntitiesOutput
instance-attribute
¶
NLPInfoOutput
¶
Bases: CtrlInfoOutput
definitions: NLPInfoOutputDefinitions
instance-attribute
¶
NLPInfoOutputDefinitions
¶
Bases: CtrlInfoOutputDefs
NLPInfoOutputDefinitionsSpec
¶
Bases: ModelInfoOutputDefsSpec
metadata: NLPModelMetadata
instance-attribute
¶
NLPModelMetadata
¶
Bases: BaseModelMetadata
supported_object_types: List[Literal['text', 'table', 'image']]
instance-attribute
¶
NLPPropertiesReqSpec
¶
Bases: StrictModel
findProperties: FindPropertiesText
instance-attribute
¶
NLPPropsCtrlPredOutput
¶
Bases: StrictModel
properties: AnnotatePropertiesOutput
instance-attribute
¶
NLPRelationshipsReqSpec
¶
Bases: StrictModel
findRelationships: FindRelationshipsText
instance-attribute
¶
NLPRelsCtrlPredOutput
¶
Bases: StrictModel
relationships: AnnotateRelationshipsOutput
instance-attribute
¶
PropertyLabel
¶
Bases: StrictModel
RelationshipColumn
¶
Bases: StrictModel
RelationshipLabel
¶
Bases: StrictModel
qagen
¶
controller
¶
model
¶
BaseQAGenerator
¶
Bases: BaseDSModel
types
¶
GenerateAnswersOutput = List[GenerateAnswersOutEntry]
module-attribute
¶
ContextEntry
¶
Bases: StrictModel
GenerateAnswers
¶
Bases: StrictModel
GenerateAnswersOutEntry
¶
Bases: StrictModel
QAGenAppPredInput
¶
Bases: BaseAppPredInput
QAGenConfig
¶
Bases: BaseModelConfig
kind: Literal[Kind.QAGenModel]
instance-attribute
¶
QAGenCtrlPredOutput
¶
Bases: StrictModel
answers: GenerateAnswersOutput
instance-attribute
¶
QAGenInfoOutput
¶
Bases: StrictModel
definitions: QAGenInfoOutputDefinitions
instance-attribute
¶
QAGenInfoOutputDefinitions
¶
Bases: CtrlInfoOutputDefs
kind: Literal[Kind.QAGenModel]
instance-attribute
¶
QAGenReqSpec
¶
Bases: StrictModel
generateAnswers: GenerateAnswers
instance-attribute
¶
server
¶
config
¶
inference_types
¶
AppModelInfoOutput = Union[NLPInfoOutput, QAGenInfoOutput]
module-attribute
¶
AppPredInput = Union[NLPAppPredInput, QAGenAppPredInput]
module-attribute
¶
CtrlPredInput = Union[NLPReqSpec, QAGenReqSpec]
module-attribute
¶
CtrlPredOutput = Union[NLPCtrlPredOutput, QAGenCtrlPredOutput]
module-attribute
¶
model_app
¶
logger = logging.getLogger('cps-fastapi')
module-attribute
¶
ModelApp
¶
app = FastAPI()
instance-attribute
¶
__init__(settings)
¶
register_model(model, name=None, controller=None)
¶
Registers a model with the app.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
BaseDSModel
|
the model to register. |
required |
name |
Optional[str]
|
an optional name under which to register the model; if not set, the model's default name is used. |
None
|
controller |
Optional[BaseController]
|
an optional custom controller to use; if not set, the default controller for the kind is used. |
None
|