IXP - Get results from stream

ixp

latest

false

Communications Mining user guide

Get results from stream

API endpoint for fetching comments and predictions from a Communications Mining stream using the recommended /results route, replacing the legacy /fetch route.

Permissions required: Consume streams, View labels, View sources.

Note:

The /results route is the new way of fetching comments and their predictions from a stream, replacing the existing /fetch route (Streams - legacy). We maintain the /fetch route for legacy support, but we recommend that all the new use cases use the /results route, as it supports all the possible use cases, including those using generative extraction.

Bash assignment

curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/results?max_results=5&max_filtered=15' \
-H "Authorization: Bearer $REINFER_TOKEN"
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/results?max_results=5&max_filtered=15' \
-H "Authorization: Bearer $REINFER_TOKEN"

Node assignment

const request = require("request");
request.get(
{
url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/results?max_results=5&max_filtered=15",
headers: {
Authorization: "Bearer " + process.env.REINFER_TOKEN,
},
},
function (error, response, json) {
// digest response
console.log(JSON.stringify(json, null, 2));
}
);
const request = require("request");
request.get(
{
url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/results?max_results=5&max_filtered=15",
headers: {
Authorization: "Bearer " + process.env.REINFER_TOKEN,
},
},
function (error, response, json) {
// digest response
console.log(JSON.stringify(json, null, 2));
}
);

Python assignment

import json
import os
import requests
response = requests.get(
"https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/results",
headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
params={"max_results": 5, "max_filtered": 15},
)
print(json.dumps(response.json(), indent=2, sort_keys=True))
import json
import os
import requests
response = requests.get(
"https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams/dispute/results",
headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]},
params={"max_results": 5, "max_filtered": 15},
)
print(json.dumps(response.json(), indent=2, sort_keys=True))

Response assignment

{
"status": "ok",
"results": [
{
"comment": {
"uid": "18ba5ce699f8da1f.0123456789abcdef",
"id": "0123456789abcdef",
"timestamp": "2018-09-17T09:54:56.332000Z",
"user_properties": {
"number:Messages": 1,
"string:Folder": "Sent (/ Sent)",
"string:Has Signature": "Yes",
"string:Message ID": "<[email protected]>",
"string:Sender": "[email protected]",
"string:Sender Domain": "company.com",
"string:Thread": "<[email protected]>"
},
"messages": [
{
"from": "[email protected]",
"to": [
"[email protected]"
],
"sent_at": "2018-09-17T09:54:56.332000Z",
"body": {
"text": "Hi Bob,\n\nCould you send me today's figures?"
},
"subject": {
"text": "Today's figures"
},
"signature": {
"text": "Thanks,\nAlice"
}
}
],
"text_format": "plain",
"attachments": [],
"source_id": "18ba5ce699f8da1f",
"last_modified": "2024-07-03T13:30:53.991000Z",
"created_at": "2020-12-14T15:07:03.718000Z",
"context": "1",
"has_annotations": true
},
"prediction": {
"taxonomies": [
{
"name": "default",
"labels": [
{
"name": "Margin Call",
"occurrence_confidence": {
"value": 0.9905891418457031,
"thresholds": ["stream"]
},
"extraction_confidence": {
"value": 0.4712367373372217,
"thresholds": []
},
"fields": [
{
"name": "Notification Date",
"value": null
}
]
},
{
"name": "Margin Call > Interest Accrual",
"occurrence_confidence": {
"value": 0.9905891418457031,
"thresholds": []
},
"extraction_confidence": {
"value": 0.9905891418457031,
"thresholds": []
},
"fields": [
{
"name": "Amount",
"value": {
"formatted": "636,000.00"
}
},
{
"name": "Broker number",
"value": null
},
{
"name": "Client name",
"value": null
},
{
"name": "Currency",
"value": {
"formatted": "AUD"
}
}
]
}
],
"general_fields": [
{
"name": "monetary-quantity",
"value": {
"formatted": "636,000.00 GBP"
}
},
{
"name": "MarginCallDateType",
"value": {
"formatted": "2018-09-21 00:00 UTC"
}
},
{
"name": "client-name",
"value": {
"formatted": "Big Client Example Bank"
}
}
]
}
]
},
"continuation": "pmjKYXYBAAADqHUvPkQf1ypNCZFR37vu"
}
],
"num_filtered": 0,
"more_results": true,
"continuation": "pmjKYXYBAAAsXghZ2niXPNP6tOIJtL_8"
}
{
"status": "ok",
"results": [
{
"comment": {
"uid": "18ba5ce699f8da1f.0123456789abcdef",
"id": "0123456789abcdef",
"timestamp": "2018-09-17T09:54:56.332000Z",
"user_properties": {
"number:Messages": 1,
"string:Folder": "Sent (/ Sent)",
"string:Has Signature": "Yes",
"string:Message ID": "<[email protected]>",
"string:Sender": "[email protected]",
"string:Sender Domain": "company.com",
"string:Thread": "<[email protected]>"
},
"messages": [
{
"from": "[email protected]",
"to": [
"[email protected]"
],
"sent_at": "2018-09-17T09:54:56.332000Z",
"body": {
"text": "Hi Bob,\n\nCould you send me today's figures?"
},
"subject": {
"text": "Today's figures"
},
"signature": {
"text": "Thanks,\nAlice"
}
}
],
"text_format": "plain",
"attachments": [],
"source_id": "18ba5ce699f8da1f",
"last_modified": "2024-07-03T13:30:53.991000Z",
"created_at": "2020-12-14T15:07:03.718000Z",
"context": "1",
"has_annotations": true
},
"prediction": {
"taxonomies": [
{
"name": "default",
"labels": [
{
"name": "Margin Call",
"occurrence_confidence": {
"value": 0.9905891418457031,
"thresholds": ["stream"]
},
"extraction_confidence": {
"value": 0.4712367373372217,
"thresholds": []
},
"fields": [
{
"name": "Notification Date",
"value": null
}
]
},
{
"name": "Margin Call > Interest Accrual",
"occurrence_confidence": {
"value": 0.9905891418457031,
"thresholds": []
},
"extraction_confidence": {
"value": 0.9905891418457031,
"thresholds": []
},
"fields": [
{
"name": "Amount",
"value": {
"formatted": "636,000.00"
}
},
{
"name": "Broker number",
"value": null
},
{
"name": "Client name",
"value": null
},
{
"name": "Currency",
"value": {
"formatted": "AUD"
}
}
]
}
],
"general_fields": [
{
"name": "monetary-quantity",
"value": {
"formatted": "636,000.00 GBP"
}
},
{
"name": "MarginCallDateType",
"value": {
"formatted": "2018-09-21 00:00 UTC"
}
},
{
"name": "client-name",
"value": {
"formatted": "Big Client Example Bank"
}
}
]
}
]
},
"continuation": "pmjKYXYBAAADqHUvPkQf1ypNCZFR37vu"
}
],
"num_filtered": 0,
"more_results": true,
"continuation": "pmjKYXYBAAAsXghZ2niXPNP6tOIJtL_8"
}

Once you create a stream, you can query it to fetch comments and their predictions. This includes labels, general fields, and label extractions, containing a set of extraction fields for each instance of that label occurring.

Comment Queue

When you create a stream, you set its initial position to be equal to its creation time. If needed, you can set the stream to a different position (either forwards or backwards in time), using the reset endpoint. The stream returns comments starting from its current position. You determine the position of the comment in the comment queue by the order in which you uploaded the comments.

Advancing Your Position in the Queue

As the stream only returns comments from its current position, you should advance it to the next position after each fetch request, by using the advance endpoint. This way, the API guarantees at least once processing of all comments. If your application fails while processing a batch, it will pick up the same batch on restart.

Note:

Since an application can successfully process a comment but fail at the advance step, you can notice that a comment appears multiple times.

Depending on your application design, you can choose between:

advancing the stream once, for the whole batch. Use the batch's continuation contained in the response.
advancing the stream for each individual comment. Use the comment's continuation, contained in the response.

Comment Filter

If you specify a comment_filter when creating the stream, the results don't include comments not matching the filter, but still count towards the requested max_filtered. You can notice responses where all of max_filtered comments are filtered out, leading to an empty results array. In the following example, you request a batch of 8 comments, all of which are filtered out.

{
  "filtered": 8,
  "results": [],
  "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR",
  "status": "ok"
}
{
  "filtered": 8,
  "results": [],
  "sequence_id": "qs8QcHIBAADJ1p3W2FtmBB3QiOJsCJlR",
  "status": "ok"
}

Pass the optional max_filtered parameter, to prevent filtered comments from counting towards the requested max_results.

Prediction thresholds

Note:

The legacy /fetch route does not return comments with predictions that did not meet the confidence threshold. In this new /results route, you return all the predictions for a comment, and the confidence``value as well. You also indicate which type(s) of threshold it meets.

  "occurrence_confidence": {
    "value": 0.9905891418457031,
    "thresholds": ["stream"]
  }
  "occurrence_confidence": {
    "value": 0.9905891418457031,
    "thresholds": ["stream"]
  }

The confidence for a prediction 0.9905.. and the thresholds value indicates that the prediction meets the configured threshold for the stream.

When building an automation, look for the stream value, to confirm that the prediction meets the threshold you configured in the stream. Additionally, auto thresholds may be returned, and the expected values for these are: high_recall, high_precision, and balanced.

For more information about generated extractions, and how to work with thresholds, check the Understanding validation on extractions and extraction performance page.

Request Format

NAME	TYPE	REQUIRED	DESCRIPTION
`max_results`	number	no	The number of comments to fetch for this stream. Returns fewer comments if it reaches the end of the batch, or if you filter out comments according to the comment filter.. Max value is 32. Default is 16.
`max_filtered`	number	no	Convenience parameter for streams with a comment filter. When you provide them, up to `max_filtered` filtered comments do not count towards the requested `max_results`. This is useful if you expect a large number of comments to not match the filter. Has no effect on streams without a comment filter. Max value is 1024. Default is null.

Response Format

NAME	TYPE	DESCRIPTION
`status`	string	`ok` if the request is successful, or `error`, in case of an error. To learn more about error responses, check the Overview page.
`num_filtered`	number	Number of comments that were filtered out based on a comment filter. If you created the stream without a filter, this number is always `0`.
`continuation`	string	The batch continuation token. Use it to acknowledge the processing of this batch, and advance the stream to the next batch.
`more_results`	bool	True if there were no additional results in the stream, when you made the request. False otherwise.
`results`	`array<Result>`	An array containing result objects.

Where Result has the following format:

NAME	TYPE	DESCRIPTION
`comment`	Comment	Comment data. For a detailed explanation, check Comment Reference.
`continuation`	string	The comment's continuation token. Used to acknowledge processing of this comment and advance stream to the next comment.
`prediction`	`array<Prediction>`	The prediction for this comment. Is available only if the stream specifies a model version. For more information about generative predictions, check the: Communications Mining™ - Understanding validation on extractions and extraction performance page.

Prediction has the following format:

NAME	TYPE	DESCRIPTION
`taxonomies`	`array<TaxonomyPrediction>`	List of taxonomy predictions. You currently define only one taxonomy per dataset, but you provide it as a list, for future compatibility.

TaxonomyPrediction has the following format:

NAME	TYPE	DESCRIPTION
`name`	string	Name of the taxonomy. The only value is currently `default`.
`labels`	`array<LabelPrediction>`	A list of extracted label predictions with their `occurrence_confidence`, `extraction_confidence` and extracted `fields`. For more information about generative predictions, check the Communications Mining - Understanding validation on extractions and extraction performance page.
`general_fields`	`array<FieldPrediction>`	A list of extracted general field predictions with their `name` and extracted `value`. For more information about generative predictions, check the Communications Mining - Understanding validation on extractions and extraction performance page.

On this page

Comment Queue
Advancing Your Position in the Queue
Comment Filter
Prediction thresholds
Request Format
Response Format

Was this page helpful?

PREVIOUSDelete a stream

NEXTFetch comments from a stream (legacy)

Comment Queue​

Advancing Your Position in the Queue​

Comment Filter​

Prediction thresholds​

Request Format​

Response Format​

Was this page helpful?

Comment Queue

Advancing Your Position in the Queue

Comment Filter

Prediction thresholds

Request Format

Response Format