Sparse Index: MongoDB

All sparse indexes are unique index, but not all unique indexes are sparse! There are situations where we want to create unique index on key/field which is not present in all documents. Wherever the key is not present, it’s value will be treated as NULL. If more than 1 document has NULL value to the key we want to make as unique, then it violates unique key rule. In such situations we can make use of Sparse index and create unique key on only those documents which has the key in it.

sparse-index-unique-key-mongodb

example: database name
company: collection name

Insert documents

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
MongoDB shell version: 2.6.1
connecting to: test
> use example
switched to db example
 
> db.company.insert({name: "Apple", product: "iPhone"});
WriteResult({ "nInserted" : 1 })
 
> db.company.insert({name: "Motorola", product: "Smart Watch"});
WriteResult({ "nInserted" : 1 })
 
> db.company.insert({name: "Technotip"});
WriteResult({ "nInserted" : 1 })
 
> db.company.insert({name: "Google"});
WriteResult({ "nInserted" : 1 })
 
 
> db.company.find().pretty()
{
        "_id" : ObjectId("53d9f5125d1942042b4e092b"),
        "name" : "Apple",
        "product" : "iPhone"
}
{
        "_id" : ObjectId("53d9f52d5d1942042b4e092c"),
        "name" : "Motorola",
        "product" : "Smart Watch"
}
{ "_id" : ObjectId("53d9f5385d1942042b4e092d"), "name" : "Technotip" }
{ "_id" : ObjectId("53d9f53f5d1942042b4e092e"), "name" : "Google" }

We have 4 documents in “company” collection. First 2 documents has “name” and “product” keys. Last 2 documents has only “name” key.

Sparse Index – Unique Key: MongoDB


[youtube https://www.youtube.com/watch?v=6pxI5j6TZRU]

YouTube Link: https://www.youtube.com/watch?v=6pxI5j6TZRU [Watch the Video In Full Screen.]



Duplicate key error

1
2
3
4
5
6
7
8
9
> db.company.ensureIndex({product: 1}, {unique: true});
{
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "ok" : 0,
        "errmsg" : "E11000 duplicate key error index: example.company.$product_1
  dup key: { : null }",
        "code" : 11000
}

Since last 2 documents do not have “product” key, its value will be treated as “NULL”. Since both these documents have value of “product” as “NULL”, trying to create unique key on “product” throws duplicate key error.

Sparse Index

1
2
3
4
5
6
7
> db.company.ensureIndex({product: 1}, {unique: true, sparse: true});
{
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
}

This creates a sparse index on “product” key/field.

system.indexes collection

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
> db.system.indexes.find().pretty()
{
        "v" : 1,
        "key" : {
                "_id" : 1
        },
        "name" : "_id_",
        "ns" : "example.company"
}
{
        "v" : 1,
        "unique" : true,
        "key" : {
                "product" : 1
        },
        "name" : "product_1",
        "ns" : "example.company",
        "sparse" : true
}

The “system.indexes” collection shows that the key “product” is a unique key and is a sparse index.

sort() method

1
2
3
4
5
6
7
8
9
10
11
12
13
> db.company.find().sort({product: 1}).pretty();
{ "_id" : ObjectId("53d9f5385d1942042b4e092d"), "name" : "Technotip" }
{ "_id" : ObjectId("53d9f53f5d1942042b4e092e"), "name" : "Google" }
{
        "_id" : ObjectId("53d9f52d5d1942042b4e092c"),
        "name" : "Motorola",
        "product" : "Smart Watch"
}
{
        "_id" : ObjectId("53d9f5125d1942042b4e092b"),
        "name" : "Apple",
        "product" : "iPhone"
}

This command sorts all the documents in lexicographical order. The output includes all the documents. Those documents which do not have “product” key in them are listed first.

Basic Cursor

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
> db.company.find().sort({product: 1}).explain()
{
        "cursor" : "BasicCursor",
        "isMultiKey" : false,
        "n" : 4,
        "nscannedObjects" : 4,
        "nscanned" : 4,
        "nscannedObjectsAllPlans" : 4,
        "nscannedAllPlans" : 4,
        "scanAndOrder" : true,
        "indexOnly" : false,
        "nYields" : 0,
        "nChunkSkips" : 0,
        "millis" : 0,
        "server" : "Satish-PC:27017",
        "filterSet" : false
}

Since the command is even listing/sorting the documents which do not have “product” key in them, it’s simply making use of Basic Cursor – which do not help in optimizing the command/query performance.

hint() method

1
2
3
4
5
6
7
8
9
10
11
> db.company.find().sort({product: 1}).hint({product: 1}).pretty();
{
        "_id" : ObjectId("53d9f52d5d1942042b4e092c"),
        "name" : "Motorola",
        "product" : "Smart Watch"
}
{
        "_id" : ObjectId("53d9f5125d1942042b4e092b"),
        "name" : "Apple",
        "product" : "iPhone"
}

hint() method tells the mongoDB server to operate only on the sparse key.. hence only retrieving and operating on the documents which has the sparse key in them.

Btree Cursor

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
> db.company.find().sort({product: 1}).hint({product: 1}).explain()
{
        "cursor" : "BtreeCursor product_1",
        "isMultiKey" : false,
        "n" : 2,
        "nscannedObjects" : 2,
        "nscanned" : 2,
        "nscannedObjectsAllPlans" : 2,
        "nscannedAllPlans" : 2,
        "scanAndOrder" : false,
        "indexOnly" : false,
        "nYields" : 0,
        "nChunkSkips" : 0,
        "millis" : 0,
        "indexBounds" : {
                "product" : [
                        [
                                {
                                        "$minElement" : 1
                                },
                                {
                                        "$maxElement" : 1
                                }
                        ]
                ]
        },
        "server" : "Satish-PC:27017",
        "filterSet" : false
}

After using hint() method, mongoDB server is only looking for documents which has the sparse index key in them, so it can directly look for the sparse index key inside “system.indexes” collection, hence it’s using Btree Cursor – and is efficient.

Remove Duplicate Documents: MongoDB

We learnt how to create unique key/index using {unique: true} option with ensureIndex() method. Now lets see how we can create unique key when there are duplicate entries/documents already present inside the collection.

dropDups-unique-key-index-mongodb

Insert documents

1
2
3
4
5
6
7
8
9
10
11
12
> db.system.indexes.find()
{ "v" : 1, "key" : { "_id" : 1 }, "name" : "_id_", "ns" : "foo.test" }
 
 
> db.test.insert({name: "Satish", age: 27});
WriteResult({ "nInserted" : 1 })
 
> db.test.insert({name: "Kiran", age: 28});
WriteResult({ "nInserted" : 1 })
 
> db.test.insert({name: "Satish", age: 27});
WriteResult({ "nInserted" : 1 })

Here we have 3 documents. First and the last document has same value for “name” and “age” fields.

dropDups() To Remove Duplicate Documents: MongoDB


[youtube https://www.youtube.com/watch?v=aQXdtDWKBiU]

YouTube Link: https://www.youtube.com/watch?v=aQXdtDWKBiU [Watch the Video In Full Screen.]



Creating unique key on field “name”

1
2
3
4
5
6
7
8
9
> db.test.ensureIndex({name: 1}, {unique: true});
{
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "ok" : 0,
        "errmsg" : "E11000 duplicate key error index: foo.test.$name_1  dup key:
 { : \"Satish\" }",
        "code" : 11000
}

This creates error, as the collection “test” already has duplicate entries/documents.

Create Unique Key by dropping random duplicate entries

1
2
3
4
5
6
7
8
9
10
11
> db.test.ensureIndex({name: 1}, {unique: true, dropDups: true});
{
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
}
 
> db.test.find();
{ "_id" : ObjectId("53d8f1268019dce2ce61eb86"), "name" : "Satish", "age" : 27 }
{ "_id" : ObjectId("53d8f12f8019dce2ce61eb87"), "name" : "Kiran", "age" : 28 }

dropDups() method retains only 1 document randomly and deletes/removes/drops all other duplicate entries/documents permanently.

Note: Since the documents are deleted randomly and can not be restored, you need to be very careful while making use of dropDup() method.

Creating Unique Key/index: MongoDB

We have learnt how to create a key/index so far – today lets learn how to create unique key/index in MongoDB.

creating-unique-key-index-mongodb

Related Read:
ObjectId ( _id ) as Primary Key: MongoDB
index creation: MongoDB

foo: database name
name: collection name

Primary Key in MongoDB: _id

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
> db.name.insert({_id: 1, a: 1});
WriteResult({ "nInserted" : 1 })
 
> db.system.indexes.find()
{ "v" : 1, "key" : { "_id" : 1 }, "name" : "_id_", "ns" : "foo.name" }
 
> db.name.insert({_id: 1, a: 2});
WriteResult({
        "nInserted" : 0,
        "writeError" : {
                "code" : 11000,
                "errmsg" : "insertDocument :: caused by :: 11000 E11000 
                            duplicate key error index: foo.name.$_id_  dup key: { : 1.0 }"
        }
})

Since “_id” is treated as primary key in mongoDB, we can’t insert duplicate values to it. In above case, we are trying to insert value of “_id” as 1 twice – the second time around it threw an error stating the entered value as duplicate.

Related Read:
DBMS Basics: Getting Started Guide
Primary Foreign Unique Keys, AUTO_INCREMENT: MySQL
Primary Key & Foreign Key Implementation: MySQL

Creating Unique Key/index: MongoDB


[youtube https://www.youtube.com/watch?v=QEy1IctH99w]

YouTube Link: https://www.youtube.com/watch?v=QEy1IctH99w [Watch the Video In Full Screen.]



Creating Key/Index

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
> db.name.insert({a: 1});
WriteResult({ "nInserted" : 1 })
 
> db.name.find()
{ "_id" : ObjectId("53d8cadbbbfe6d81d0bcc364"), "a" : 1 }
{ "_id" : 1, "a" : 1 }
 
> db.name.ensureIndex({a: 1});
{
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
}
> db.system.indexes.find()
{ "v" : 1, "key" : { "_id" : 1 }, "name" : "_id_", "ns" : "foo.name" }
{ "v" : 1, "key" : { "a" : 1 }, "name" : "a_1", "ns" : "foo.name" }

Here we create index on field “a”.

Inserting duplicate values into key field

1
2
3
4
5
6
7
> db.name.insert({a: 1});
WriteResult({ "nInserted" : 1 })
 
> db.name.find()
{ "_id" : ObjectId("53d8cadbbbfe6d81d0bcc364"), "a" : 1 }
{ "_id" : 1, "a" : 1 }
{ "_id" : ObjectId("53d8cb4dbbfe6d81d0bcc365"), "a" : 1 }

insert operation simply inserts the duplicate value to field “a” even though its made as a key/index.

Removing documents and Key/Index on field “a”

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
> db.name.find()
{ "_id" : ObjectId("53d8cadbbbfe6d81d0bcc364"), "a" : 1 }
{ "_id" : 1, "a" : 1 }
{ "_id" : ObjectId("53d8cb4dbbfe6d81d0bcc365"), "a" : 1 }
 
> db.name.remove({a: 1});
WriteResult({ "nRemoved" : 3 })
 
> db.name.find()
 
> db.name.dropIndex({a: 1});
{ "nIndexesWas" : 2, "ok" : 1 }
 
> db.system.indexes.find()
{ "v" : 1, "key" : { "_id" : 1 }, "name" : "_id_", "ns" : "foo.name" }

Before implementing unique key on field “a” we need to first remove the duplicate entries present inside our collection orelse it’ll through errors. Here we also remove the index/key on “a”, so that we can create unique key/index on “a”.

Creating Unique key/index on field “a”

1
2
3
4
5
6
7
> db.name.ensureIndex({a: 1}, {unique: true});
{
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
}

To create unique key/index, we need to make use of ensureIndex() method – first parameter being the field name to be made as unique key along with it’s value 1 or -1. 1 signifies ascending order, -1 signifies descending order. The second parameter {unique: true}, specifies that the key/index must be unique key/index, like that of “_id”.

Duplicate key error on our Unique Key!

1
2
3
4
5
6
7
8
9
10
11
12
> db.name.find()
{ "_id" : ObjectId("53d8cb85bbfe6d81d0bcc366"), "a" : 1 }
 
> db.name.insert({a: 1});
WriteResult({
        "nInserted" : 0,
        "writeError" : {
                "code" : 11000,
                "errmsg" : "insertDocument :: caused by :: 11000 E11000 
                            duplicate key error index: foo.name.$a_1  dup key: { : 1.0 }"
        }
})

Now if we try to insert duplicate values into field “a” it throws duplicate key error.

Fetch Data From MongoDB: Node.js

What’s the use of the data in database, if we can’t fetch it and display on a web page! Today’s video tutorial concentrates on fetching data out of our mongoDB collection and displaying it on the web page.

fetch-data-from-mongoDB-nodejs

Related Read: Save data To MongoDB: Node.js

Route to display registered users
app.js

1
2
3
4
5
6
app.get('/view', function(req, res){
user.find({}, function(err, docs){
if(err) res.json(err);
else    res.render('index', {users: docs});
});
});

Once the user visits /view route, she will be presented with all the registered users information. Inside /view route, we fetch all the user information by using mongoDB’s find() method. find() takes 2 parameters – first parameter is an object with condition, second parameter is a callback method. If we pass in empty object as first parameter, it means, fetch all the data – similar to SELECT * in sql.

If find() could successfully fetch the data out of mongoDB collection, it renders index.jade file and passes an object to it – which contains information of all the fetched users.

Template To Display User Information
view/index.jade

1
2
3
ul
each user in users
 li #{user.name}: #{user.age}: #{user._id}

Here we loop through users object, fetch individual user information, and display as a list item.

Fetch Data From MongoDB: Node.js


[youtube https://www.youtube.com/watch?v=U0zK-Nb2vn8]

YouTube Link: https://www.youtube.com/watch?v=U0zK-Nb2vn8 [Watch the Video In Full Screen.]



Note: Since email address has been made as _id, each registered user will have a unique email address associated with her.

Fetch individual user information
app.js

1
2
3
4
5
6
app.get('/view', function(req, res){
user.find({_id: '[email protected]'}, function(err, docs){
if(err) res.json(err);
else    res.render('index', {users: docs});
});
});

Observe the find() methods first parameter. Now we are passing _id to it, which means, it has to find and retrieve only the information related to the user with that _id.

Full Code To Insert And Fetch Data into/from MongoDB
app.js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
var express = require('express');
var http = require('http');
var path = require('path');
var mongoose = require('mongoose');
 
var app = express();
 
// all environments
app.set('port', process.env.PORT || 3000);
app.set('views', __dirname + '/views');
app.set('view engine', 'jade');
 
app.use(express.bodyParser());
app.use(express.methodOverride());
app.use(app.router);
app.use(express.static(path.join(__dirname, 'public')));
 
mongoose.connect('mongodb://localhost/Company');
 
var Schema = new mongoose.Schema({
_id    : String,
name: String,
age   : Number
});
 
var user = mongoose.model('emp', Schema);
 
app.get('/view', function(req, res){
user.find({}, function(err, docs){
if(err) res.json(err);
else    res.render('index', {users: docs});
});
});
 
app.post('/new', function(req, res){
new user({
_id    : req.body.email,
name: req.body.name,
age   : req.body.age
}).save(function(err, doc){
if(err) res.json(err);
else    res.redirect('/view');
});
});
 
var server = http.createServer(app).listen(app.get('port'), function(){
  console.log('Express server listening on port ' + app.get('port'));
});

To make this work, go to Save data To MongoDB: Node.js tutorial page and copy the index.html page code and create one for your project and put it inside public directory. Now you can directly access root(html form), enter user information and submit, which redirects to /view route, wherein you get the information of all the registered users.

Note: Make sure MongoDB server is running and turned on, when you start Node.js server and access your application from web browser.

Related Read: MongoDB – Getting Started Guide