communications-mining
latest
false
- API docs
- CLI
- Integration guides
- Blog
- How machines learn to understand words: a guide to embeddings in NLP
- Prompt-based learning with Transformers
- Efficient Transformers II: knowledge distillation & fine-tuning
- Efficient Transformers I: attention mechanisms
- Deep hierarchical unsupervised intent modelling: getting value without training data
- Fixing annotating bias with Communications Mining
- Active learning: better ML models in less time
- It's all in the numbers - assessing model performance with metrics
- Why model validation is important
- Comparing Communications Mining and Google AutoML for conversational data intelligence
Create a stream
Communications Mining Developer Guide
Last updated Nov 19, 2024
Create a stream
/api/v1/datasets/<project>/<dataset_name>/streams
/api/v1/datasets/<project>/<dataset_name>/streams
Permissions required: Streams admin, View labels
- Bash
curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "stream": { "comment_filter": { "user_properties": { "number:Spend": { "maximum": 100000, "minimum": 100 }, "number:Transactions": { "one_of": [ 1 ] }, "string:Country": { "one_of": [ "uk", "de" ] } } }, "description": "Used by ACME RPA to create tickets for disputes.", "model": { "label_thresholds": [ { "name": [ "Some Label" ], "threshold": 0.37 }, { "name": [ "Another Label" ], "threshold": 0.46 }, { "name": [ "Parent Label", "Child Label" ], "threshold": 0.41 } ], "version": 8 }, "name": "dispute", "title": "Collateral Disputes" } }'
curl -X PUT 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "stream": { "comment_filter": { "user_properties": { "number:Spend": { "maximum": 100000, "minimum": 100 }, "number:Transactions": { "one_of": [ 1 ] }, "string:Country": { "one_of": [ "uk", "de" ] } } }, "description": "Used by ACME RPA to create tickets for disputes.", "model": { "label_thresholds": [ { "name": [ "Some Label" ], "threshold": 0.37 }, { "name": [ "Another Label" ], "threshold": 0.46 }, { "name": [ "Parent Label", "Child Label" ], "threshold": 0.41 } ], "version": 8 }, "name": "dispute", "title": "Collateral Disputes" } }' - Node
const request = require("request"); request.put( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { stream: { comment_filter: { user_properties: { "number:Spend": { maximum: 100000, minimum: 100 }, "number:Transactions": { one_of: [1] }, "string:Country": { one_of: ["uk", "de"] }, }, }, description: "Used by ACME RPA to create tickets for disputes.", model: { label_thresholds: [ { name: ["Some Label"], threshold: 0.37 }, { name: ["Another Label"], threshold: 0.46 }, { name: ["Parent Label", "Child Label"], threshold: 0.41 }, ], version: 8, }, name: "dispute", title: "Collateral Disputes", }, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.put( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { stream: { comment_filter: { user_properties: { "number:Spend": { maximum: 100000, minimum: 100 }, "number:Transactions": { one_of: [1] }, "string:Country": { one_of: ["uk", "de"] }, }, }, description: "Used by ACME RPA to create tickets for disputes.", model: { label_thresholds: [ { name: ["Some Label"], threshold: 0.37 }, { name: ["Another Label"], threshold: 0.46 }, { name: ["Parent Label", "Child Label"], threshold: 0.41 }, ], version: 8, }, name: "dispute", title: "Collateral Disputes", }, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.put( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "stream": { "name": "dispute", "title": "Collateral Disputes", "description": "Used by ACME RPA to create tickets for disputes.", "model": { "version": 8, "label_thresholds": [ {"name": ["Some Label"], "threshold": 0.37}, {"name": ["Another Label"], "threshold": 0.46}, { "name": ["Parent Label", "Child Label"], "threshold": 0.41, }, ], }, "comment_filter": { "user_properties": { "string:Country": {"one_of": ["uk", "de"]}, "number:Spend": {"minimum": 100, "maximum": 100000}, "number:Transactions": {"one_of": [1]}, } }, } }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.put( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/streams", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "stream": { "name": "dispute", "title": "Collateral Disputes", "description": "Used by ACME RPA to create tickets for disputes.", "model": { "version": 8, "label_thresholds": [ {"name": ["Some Label"], "threshold": 0.37}, {"name": ["Another Label"], "threshold": 0.46}, { "name": ["Parent Label", "Child Label"], "threshold": 0.41, }, ], }, "comment_filter": { "user_properties": { "string:Country": {"one_of": ["uk", "de"]}, "number:Spend": {"minimum": 100, "maximum": 100000}, "number:Transactions": {"one_of": [1]}, } }, } }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
{ "status": "ok", "stream": { "context": "0", "created_at": "2019-08-03T12:30:00.123456Z", "dataset_id": "abcdef0123456789", "description": "Used by ACME RPA to create tickets for disputes.", "id": "0123456789abcdef", "model": { "version": 8 }, "name": "dispute", "title": "Collateral Disputes", "updated_at": "2019-08-03T12:30:00.123456Z" } }
Streams enable persistent, stateful iteration through comments in a dataset, with predicted labels and general fields computed using a pinned model.
Once a stream is created, the and methods can be used to iterate through comments.
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
name | string | yes | API name for the stream, used in URLs. Must be unique within a
dataset and must match
[A-Za-z0-9-_]{1,256} .
|
title | string | no | One-line human-readable title for the stream. |
description | string | no | A longer description of the stream. |
model | Model | no | If specified, comments fetched from this stream will contain predictions from a pinned model. |
comment_filter | CommentFilter | no | If specified, comments not matching the filter will not be returned. See for details on how the comment filter will affect the results returned by the stream. |
Where
Model
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
version | integer | yes | A model version that has been pinned via the Models page. |
label_thresholds | array<LabelThreshold> | no | If set, only values matching the given
label_thresholds are returned. If not set, all
labels and all prediction values will be returned.
|
Where
LabelThreshold
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
name | array<string> | yes | The name of the label to be returned, formatted as a list of
hierarchical labels. For instance, the label "Some
Label" will have the format ["Some
Label"] , and the label "Parent Label > Child
Label" will have the format ["Parent Label",
"Child Label"] .
|
threshold | number | yes | The confidence threshold to use for the label (a number between 0.0 and 1.0). The label will only be returned for a comment if its prediction is above this threshold. |
Where
CommentFilter
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
user_properties | UserPropertyFilter | no | A filter that applies to the user properties of a comment. For more on user properties, see the Comment Reference. |
The
UserPropertyFilter
is a map of user property name to filter.
String properties may be filtered to values in a set ({"one_of": ["val_1",
"val_2"]}
). Number properties may be filtered either to values in a set
({"one_of": [123, 456]}
) or to a range ({"minimum":
123, "maximum": 456}
).