JSearch in 5 Minutes

Aries Yuwono

Last updated October 15, 2015

Introduction

JSearch is a Server-side and high-level Javascript API that makes common search tasks (search, facets, and aggregates) easy. It's an easier path to learn than the cts.* API for novice users and a shortcut for applying what you already know for more advanced users. JSearch is available in MarkLogic 8-0.4 and above. This tutorial will provide a quick introduction to using the API.

Indexes setup

Before we begin with JSearch, let's set up our database with the indexes that we will use for the examples. We're using the Documents database for our examples, but feel free to create a new database (with a forest) of your choice if you wish. You can run this JavaScript code in Query Console (http://localhost:8000/qconsole) with the Content Source pointing to the Documents database to apply the setup.

var admin = require('/MarkLogic/admin');
var config = admin.getConfiguration();
var dbid = xdmp.database('Documents');
var cityIndex = admin.databaseRangeElementIndex('string', '', 'city', 'http://marklogic.com/collation/', fn.false());
var popularityIndex   = admin.databaseRangeElementIndex('int', '', 'popularity', '', fn.false());
var distanceIndex   = admin.databaseRangeElementIndex('double', '', 'distance', '', fn.false());
var dateIndex = admin.databaseRangeElementIndex('date', '', 'date', '', fn.false());
var cityLexicon = admin.databaseElementWordLexicon('', 'city', 'http://marklogic.com/collation/');
var geoIndex = admin.databaseGeospatialElementIndex('', 'latLonPoint', 'wgs84', fn.false());
config = admin.databaseAddRangeElementIndex(config, dbid, cityIndex);
config = admin.databaseAddRangeElementIndex(config, dbid, popularityIndex);
config = admin.databaseAddRangeElementIndex(config, dbid, distanceIndex);
config = admin.databaseAddRangeElementIndex(config, dbid, dateIndex);
config = admin.databaseAddElementWordLexicon(config, dbid, cityLexicon);
config = admin.databaseAddGeospatialElementIndex(config, dbid, geoIndex);
admin.saveConfiguration(config);
'Configuration Loaded';

Dataset

This city dataset contains JSON and XML documents. They are inserted into different collections (e.g. "america", "asia", ...). Some of the documents also have quality metadata value. Run this script in Query Console against Documents database to insert the data.

declareUpdate();
xdmp.documentInsert(
  '/example/jsearch/doc1.json',
  {
    'city': 'london',
    'distance': 50.4,
    'date': '2007-01-01',
    'metro': true,
    'description': 'Two recent discoveries indicate probable very early settlements near the Thames',
    'popularity': 5,
    'location': {
      'latLonPoint': '51.50, -0.12',
      'latLonPair': {
        'lat': 51.50,
        'long': -0.12
      },
      'latLonParent': {
        'latLonChild': '51.50, -0.12'
      }
    }
  },
  null,
  'europe',
  4
);
xdmp.documentInsert(
  '/example/jsearch/doc2.json',
  {
    'city': 'new york',
    'distance': 23.3,
    'date': '2006-06-23',
    'metro': true,
    'description': 'Henry Hudsons 1609 voyage marked the beginning of European involvement with the area',
    'popularity': 5,
    'location': {
      'latLonPoint': '40.71, -74.01',
      'latLonPair': {
        'lat': 40.71,
        'long': -74.01
      },
      'latLonParent': {
        'latLonChild': '40.71, -74.01'
      }
    }
  },
  null,
  'america',
  1
);
xdmp.documentInsert(
  '/example/jsearch/doc3.json',
  {
    'city': 'new jersey',
    'distance': 12.9,
    'date': '1971-12-23',
    'metro': false,
    'description': 'American forces under Washington met the forces under General Henry Clinton',
    'popularity': 2,
    'location': {
      'latLonPoint': '40.72, -74.07',
      'latLonPair': {
        'lat': 40.72,
        'long': -74.07
      },
      'latLonParent': {
        'latLonChild': '40.72, -74.07'
      }
    }
  },
  null,
  'america'
);
xdmp.documentInsert(
  '/example/jsearch/doc4.xml',
  xdmp.unquote('<doc><city>beijing</city><distance direction=\"east\">134.5</distance><date>1981-11-09</date><metro rate=\"3\">true</metro><description>The Miyun Reservoir, on the upper reaches of the Chaobai River, is the largest reservoir within the municipality</description><popularity>5</popularity><location><latLonPoint>39.90,116.40</latLonPoint><latLonParent><latLonChild>39.90,116.40</latLonChild></latLonParent><latLonPair><lat>39.90</lat><long>116.40</long></latLonPair><latLonAttrPair lat=\"39.90\" long=\"116.40\"/></location></doc>'),
  null,
  'asia',
  5
);

xdmp.documentInsert(
  '/example/jsearch/doc5.xml',
  xdmp.unquote('<doc><city>cape town</city><distance direction=\"south\">377.9</distance><date>1999-04-22</date><metro rate=\"2\">true</metro><description>The earliest known remnants in the region were found at Peers cave in Fish Hoek</description><popularity>3</popularity><location><latLonPoint>-33.91,18.42</latLonPoint><latLonParent><latLonChild>-33.91,18.42</latLonChild></latLonParent><latLonPair><lat>-33.91</lat><long>18.42</long></latLonPair><latLonAttrPair lat=\"-33.91\" long=\"18.42\"/></location></doc>'),
  null,
  'africa'
);

'Documents Inserted';

Let's say we want to search cities in the "america" continent; we can do so by doing search with a collection scope. Calling jsearch.documents() without any where() clause would match all documents in the database, but because we have specified the collections() scope, this query will match all documents in the "america" collection. Please note that for a larger dataset you'd only see the first 10 documents. We'll take a look at how you can apply pagination to the resultset to display potentially all the results.

var jsearch = require('/MarkLogic/jsearch');
var america = jsearch.collections('america');
var output = america.documents().result();
output;

Our output will return the two documents in the "america" collection.

{ 
  "results":[ 
    { 
      "index":0,
      "uri":"/example/jsearch/doc2.json",
      "score":256,
      "confidence":0,
      "fitness":0,
      "document":{ 
        "city":"new york",
        "distance":23.3,
        "date":"2006-06-23",
        "metro":true,
        "description":"Henry Hudsons 1609 voyage marked the beginning of European involvement with the area",
        "popularity":5,
        "location":{ 
          "latLonPoint":"40.71, -74.01",
          "latLonPair":{ 
            "lat":40.71,
            "long":-74.01
          },
          "latLonParent":{ 
            "latLonChild":"40.71, -74.01"
          }
        }
      }
    },
    { 
      "index":1,
      "uri":"/example/jsearch/doc3.json",
      "score":0,
      "confidence":0,
      "fitness":0,
      "document":{ 
        "city":"new jersey",
        "distance":12.9,
        "date":"1971-12-23",
        "metro":false,
        "description":"American forces under Washington met the forces under General Henry Clinton",
        "popularity":2,
        "location":{ 
          "latLonPoint":"40.72, -74.07",
          "latLonPair":{ 
            "lat":40.72,
            "long":-74.07
          },
          "latLonParent":{ 
            "latLonChild":"40.72, -74.07"
          }
        }
      }
    }
  ],
  "estimate":2
}

Adding .where() will narrow the search. Please note that you can add .where() is also possible on a collection scope query.

var jsearch = require('/MarkLogic/jsearch');
var output =
  jsearch.documents()
  .where(cts.jsonPropertyValueQuery('city', 'london'))
  .result();
output;

The output should return one document that has "london" as the "city".

With orderBy(), you can sort the results based on specified index. Ordering on a property / element will always require an index.

var jsearch = require('/MarkLogic/jsearch');
var output =
  jsearch.documents()
  .where(cts.directoryQuery('/example/jsearch/'))
  .orderBy(cts.indexOrder(cts.jsonPropertyReference('city'), 'descending'))
  .result();
output;

The output will be 5 documents in this order: "new york", "new jersey", "london", "cape town", "beijing".

Adding slice() will apply pagination on the result.

var jsearch = require('/MarkLogic/jsearch');
var output =
  jsearch.documents()
  .where(cts.directoryQuery('/example/jsearch/'))
  .orderBy(cts.indexOrder(cts.jsonPropertyReference('city'), 'descending'))
  .slice(1, 4)
  .result();
output;

As we discussed earlier by default you can see the first 10 results. The above query limits the returned documents to only 4. If you'd like to see more than 10 results you can apply the .slice() method to your search. Please make sure that you don't return a large number of documents with your search as it could lead to some unwanted problems.

JSearch provides several ways to tailor the results of your search. The map() provides built-in functions such as snippet and extract. Here is an example of using snippets.

var jsearch = require('/MarkLogic/jsearch');
var output =
  jsearch.documents()
  .where(cts.wordQuery('new'))
  .map({snippet: true})
  .result();
output;

The returned document will include match text highlighting.

...
"matches":[ 
        { 
          "path":"fn:doc(\"/example/jsearch/doc2.json\")/text(\"city\")",
          "matchText":[ 
            { 
              "highlight":"new"
            },
            " york"
          ]
        }
      ]
...

extract will allow you to return a portion of document with specified path.

var jsearch = require('/MarkLogic/jsearch');
var output =
  jsearch.documents()
  .where(cts.wordQuery('new'))
  .map({
    extract:{
      selected: 'include',
      paths: ['/city']
    }
  })
  .result();
output;

Instead of the entire content of the document, the query will only return the extracted portion.

{ 
  "results":[ 
    { 
      "score":16640,
      "fitness":0.696453452110291,
      "uri":"/example/jsearch/doc2.json",
      "extracted":[ 
        { 
          "city":"new york"
        }
      ],
      "confidence":0.525498688220978,
      "index":0
    },

You can write your own function for the mapper to produce a custom result.

var jsearch = require('/MarkLogic/jsearch');
 
function mapper(result) {
  return {
    myUri: result.uri,
    myDistance: result.document.distance + ' miles'
  };
}
 
var output =
  jsearch.documents()
  .where(cts.wordQuery('new'))
  .map(mapper)
  .result();
output;

This mapper will transform the result to:

{ 
  "results":[ 
    { 
      "myUri":"/example/jsearch/doc2.json",
      "myDistance":"23.3 miles"
    },
    { 
      "myUri":"/example/jsearch/doc3.json",
      "myDistance":"12.9 miles"
    }
  ],
  "estimate":2
}

Alternatively to map(), you can use reduce() to reduce the result. In broad strokes, a reducer takes in a previous result and a single value and returns either an item to pass to next invocation of the reducer, or a final result. The output from the final invocation becomes the result. Reducers are well suited for computing aggregates over a set of results.

You can use built-in reduce methods as well as custom ones. We will take a look at one of the built-in reducers later, let's now take a look at a custom one::

var jsearch = require('/MarkLogic/jsearch');
 
function reducer(previous, result, i, state) {
  var res =
    (i === 0 && state.isLast) ? result.document.distance :
      previous + result.document.distance;
  return res;
}
var output =
  jsearch.documents()
  .where(cts.wordQuery('new'))
  .reduce(reducer)
  .result();
output;

The above example adds the distances together of the documents that match the specified word query.

{ 
  "results":36.2,
  "estimate":2
}

Add .withOptions() to toggle various options on/off.

var jsearch = require('/MarkLogic/jsearch');
var collections = jsearch.collections(['america', 'asia']);
var output =
  collections.documents()
  .withOptions({
    returnQueryPlan: true,
    returnEstimate: true,
    returnRelevanceTrace: true
  })
  .result();
output;

In the above example we are returning things like relevancy tracing and the query execution plan.

It is also possible to parse a query string using cts.parse().

var jsearch = require('/MarkLogic/jsearch');
var queryText = 'london OR new york';
var ctsQuery = cts.parse(queryText);
var output =
  jsearch.documents()
  .where(ctsQuery)
  .slice(0, 10)
  .result();
output;

This should give you "london" and "new york". Notice how this search now understands the MarkLogic Search Grammar.

Search using values() to look up the values in a range index over documents. In this example, where() and match() are used to limit the result.

var jsearch = require('/MarkLogic/jsearch');
var output =
  jsearch.values('city')
  .where(cts.jsonPropertyRangeQuery('popularity' , '>=', 3))
  .match('*n')
  .result();
output;

The result should return an array of values, in this case: ["cape town", "london"]

Adding aggregate() on values will apply aggregate functions against an index. For example, find the average, maximum, and minimum distance of the cities.

var jsearch = require('/MarkLogic/jsearch');
var output =
  jsearch.values('distance')
  .aggregate(['avg', 'max', 'min'])
  .result();
output;

This will give you:

{
  "avg": 119.8,
  "max": 377.9,
  "min": 12.9
}

You can create range indexes and apply bucket names on them. For example, create facets on "date" and "popularity" with user-defined bucket names. Facets are quite similar to values but it provides a search report. These help users explore the data set.

var jsearch = require('/MarkLogic/jsearch');
var output =
  jsearch.facets([
    jsearch.facet('dateFacet', 'date')
      .groupInto([
        jsearch.bucketName('1970s'), '1980-01-01', 
        jsearch.bucketName('1980s'), '1990-01-01', 
        jsearch.bucketName('1990s'), '2000-01-01', 
        jsearch.bucketName('2000s'),
      ]),
    jsearch.facet('popFacet', cts.elementReference(xs.QName('popularity')))
    .groupInto([
      jsearch.bucketName('low'), 2, 
      jsearch.bucketName('medium'), 4, 
      jsearch.bucketName('high')])
  ])
  .result();
output;

The result will give you the frequencies and the values on the buckets.

{ 
  "facets":{ 
    "dateFacet":{ 
      "1970s":{ 
        "value":{ 
          "minimum":"1971-12-23",
          "maximum":"1971-12-23",
          "upperBound":"1980-01-01"
        },
        "frequency":1
      },
      "1980s":{ 
        "value":{ 
          "minimum":"1981-11-09",
          "maximum":"1981-11-09",
          "lowerBound":"1980-01-01",
          "upperBound":"1990-01-01"
        },
        "frequency":1
      },
      "1990s":{ 
        "value":{ 
          "minimum":"1999-04-22",
          "maximum":"1999-04-22",
          "lowerBound":"1990-01-01",
          "upperBound":"2000-01-01"
        },
        "frequency":1
      },
      "2000s":{ 
        "value":{ 
          "minimum":"2006-06-23",
          "maximum":"2007-01-01",
          "lowerBound":"2000-01-01"
        },
        "frequency":2
      }
    },
    "popFacet":{ 
      "medium":{ 
        "value":{ 
          "minimum":2,
          "maximum":3,
          "lowerBound":2,
          "upperBound":4
        },
        "frequency":2
      },
      "high":{ 
        "value":{ 
          "minimum":5,
          "maximum":5,
          "lowerBound":4
        },
        "frequency":3
      }
    }
  }
}

Similar to values lookups, you can look up words in word lexicons over documents. For example, find all words within "city" that has the letter "n".

var jsearch = require('/MarkLogic/jsearch.sjs');
var output =
  jsearch.words(jsearch.jsonPropertyLexicon('city'))
  .match('*n*')
  .result();
output;

This should give you:

["beijing", "london", "new", "town"]

You can do geospatial searches on your data using points, circles, boxes, or polygons.

var jsearch = require('/MarkLogic/jsearch');
var output = jsearch.documents()
  .where(cts.jsonPropertyGeospatialQuery('latLonPoint', cts.circle(10, cts.point(51.48, -0.31))))
  //.where(cts.jsonPropertyGeospatialQuery('latLonPoint', cts.box(49.16, -13.41, 60.85, 1.76)))
  .slice(0, 10)
  .result();
output;

For more features, please check the JSearch function documentation or MarkLogic University's On Demand video series on JSearch.

Contents

Introduction

Indexes setup

Dataset