Understanding node process (2)

If you have been broken, mend yourself. Knowing that you will become more beautiful. And always think before you act. an apology cannot repair something you’ve done without thought.

Last week we talked about multi process and process communication. Today we’re going to take a look at its stability.

Having multi processes work together can improve the usage of our cups at some extend. But how it stability is? What if the process exits unexpected?

node’s child_process does have some events available here so we can take advantage of those events listener to manage our process.

process restart

Let’s first listen to its exit event and create a new one when process exits.

// master.js
var fork = require("child_process").fork,
  cpus = require("os").cpus(),
  server = require("net").createServer()
  workers = {};

server.listen(1337);

var createWorker = function() {
  var worker = fork("./test/worker.js");
  worker.send("server", server);
  workers[worker.pid] = worker;
  console.log("create worker. pid: " + worker.pid);

  worker.on("exit", function() {
    console.log("worker " + worker.pid + " exited.");
    delete workers[worker.pid];
    createWorker();
  });
};

for(var i = 0; i<cpus.length; i++) {
  createWorker();
}

process.on("exit", function() {
  for (pid in workers) {
    workers[pid].kill();
  }
});

The code above defined a createWorker function. Every worker that is created by this function will listen to its own exit event and create a new worker process once it exits. If the master process self exits, all workers will be killed.
The worker.js would look like this:

//worker.js
var http = require('http');

var server = http.createServer(function(req,res) {
  res.writeHead(200, {'Content-Type': 'text/plain'});
  res.end('handled by child, pid is ' + process.pid + '\n');
});

process.on('message', function(m, tcp) {
  if(m === 'server') {
    tcp.on('connection', function(socket) {
      server.emit('connection', socket);
    });
  }
});

Run node master.js to create those process.

> node master.js
create worker. pid: 1089
create worker. pid: 1090
create worker. pid: 1091
create worker. pid: 1092

Now let’s kill a process to see what’s going on.

> kill 1089
worker 1089 exited.
create worker. pid: 1280

Aha, one exits, one created.

Here we just killed a process manually. In most cases, there may be some undetected bugs caused unexpected exit. So it’d better to use uncaughtException event to listen uncaught error and do some synchronous cleanup before shutting down the process. Here, for test use, we just exit process to see how it works.

var http = require('http');

var server = http.createServer(function(req,res) {
  res.writeHead(200, {'Content-Type': 'text/plain'});
  res.end('handled by child, pid is ' + process.pid + '\n');
  throw new Error("throw exception");
});

var worker;
process.on('message', function(m, tcp) {
  if(m === 'server') {
    worker = tcp;
    tcp.on('connection', function(socket) {
      server.emit('connection', socket);
    });
  }
});

process.on('uncaughtException', function(err) {
  process.send({signal: 'suicide'}); //send a suicide signal to master process
  worker.close(function() {
    process.exit();
  });
});

To simulate uncaught exception, we throw an error every time there is incomint request so that every time connect to the sever, a process will exit.

Note that we want the master process to recreate a process once error happened instead of waiting for worker process exits to ensure incoming request would be handled as much as possible. This could be done by creating worker in message event.

var createWorker = function() {
  var worker = fork("./test/worker.js");
  worker.send("server", server);
  workers[worker.pid] = worker;
  console.log("create worker. pid: " + worker.pid);

  worker.on("message", function(message) {
  if(message.signal === 'suicide') {
      createWorker();
  }
  });

  worker.on("exit", function(code, sign) {
  console.log("worker " + worker.pid + " exited.");
  delete workers[worker.pid];
  });
};

limit restarts

One more thing to do. If the error happens when creating the process, it is not in the case of uncaughtException. That would cause unlimit process restart. To avoid this unlimit restart, it’d better to limit the number of restarts during a period of time.

//master.js
var fork = require("child_process").fork,
  cpus = require("os").cpus(),
  server = require("net").createServer()
  workers = {},
  limit = 10,//restart limit
  restart = [];

server.listen(1337);

var isTooFrequently = function() {
  var time = Date.now();
  var length = restart.push(time);
  if(length > limit) {
    restart = restart.slice(limit * -1);
  }
  return restart.length >= limit && restart[restart.length -1] -restart[0] <during;
}

var createWorker = function() {
  if(isTooFrequently()) {
    process.emit('giveup', length, during);
    return;
  }
  var worker = fork("worker.js");
  worker.send("server", server);
  workers[worker.pid] = worker;
  console.log("create worker. pid: " + worker.pid);

  worker.on("message", function(message) {
    if(message.signal === 'suicide') {
      createWorker();
    }
  });

  worker.on("exit", function(code, sign) {
    console.log("worker " + worker.pid + " exited.");
    delete workers[worker.pid];
  });
};

for(var i = 0; i < cpus.length; i++) {
  createWorker();
}

process.on("exit", function() {
  for (pid in workers) {
    workers[pid].kill();
  }
});

This isTooFrequently function does the judgement. It emits giveup event if restarts more than 10 times in a minute.

reference