let's have some fun
Use compile flags:
--gdb --without-snapshot
Run your process with:
gdb -d lib/ -d src/ --args \
./node --gdbjit --gdbjit-full script.js
What you get is:
(gdb) b uv_write2 #0 uv_write2 (...) at ../deps/uv/src/unix/stream.c:1232 #1 in uv_write (...) at ../deps/uv/src/unix/stream.c:1319 #2 uv_try_write (...) at ../deps/uv/src/unix/stream.c:1344 #3 in node::StreamWrapCallbacks::TryWrite(...) () #4 in void node::StreamWrap::WriteStringImpl<(...)1>(...) () #5 in v8::internal::FunctionCallbackArguments::Call(...) () #6 in v8::internal::Builtin_HandleApiCall(...) () #7 in STUB: CEntryStub () #8 in createWriteReq (...) at net.js:719 #9 in Socket._writeGeneric (...) at net.js:662 #10 in Socket._write (...) at net.js:685 #11 in doWrite (...) at _stream_writable.js:262 #12 in clearBuffer (...) at _stream_writable.js:368 #13 in Writable.uncork () at _stream_writable.js:217 #14 in OutgoingMessage.end (...) at _http_outgoing.js:539 ^^^ Yeah, that's JS in your gdb ~~~
(gdb) f 14 #14 in OutgoingMessage.end (...) at _http_outgoing.js:539 539 this.connection.uncork(); (gdb) l 534 // Force a flush, HACK. 535 ret = this._send('', 'binary', finish); 536 } 537 538 if (this.connection && data) 539 this.connection.uncork(); 540 541 this.finished = true;
Yes, you can view the JS code from gdb.
Easily find where you're crossing boundaries
(gdb) f 7 #7 in STUB: CEntryStub () ^^^ ~~~ JS/C++ transversal (gdb) l 544 // everything to the socket. 545 debug('outgoing message end.'); 546 if (this.output.length === 0 && 547 this.connection._httpMessage === this) { 548 this._finish(); ^^^ Here's the call ~~~
Tweeted this title at 1:45 AM
Personal note: Don't tweet after midnight...
This may incur some (or much) indignation
Didn't know before planning the topic there would be a QA
... so please be kind
REMEMBER this talk is geared towards high performance and hot code, not for your everyday app
Inlined functions can't contain more than 600 characters
(including comments)
Inlined functions must be declared close to the call site
After N deopts V8 marks the function as not optimizable
(hence the importance to not allow deoptimizations in long running processes)
Some events like DNS and FS are pseudo-async.
(meaning they tie up a thread in the background)
Think really really hard before you decide to use a synchronous method call
Node uses little memory, so investigate using Buffer
s to cache resources
(reconstructing resources on the fly can be faster than reading from the file system)
For I/O intensive scenarios keep track of request requirement ratios by tracing libuv
(don't need to know C to understand what the libuv API is doing, and it can really help)
Use ktap
to create a quick histogram with this script:
var s = {} trace probe:/path/to/node:uv_[a-z]* { s[argname] += 1 } trace_end { histogram(s) }
And have it look like this:
value ------------- Distribution ------------- count uv_fs_req_cleanup |@@@@@@@@@@ 92945 uv_now |@@@@ 36680 uv_update_time |@@@@ 36680 uv_fs_stat |@@ 18932 uv_fs_open |@@ 18474 uv_fs_read |@@ 18428 uv_fs_close |@@ 18428 uv_fs_fstat |@@ 18428 uv_try_write |@@ 18297 uv_buf_init |@@ 18297 uv_write2 |@@ 18297
Who needs DTrace anyways? ;-P
We choose the amount of abstraction, but not the performance cost that comes with.
We choose how much to performance tune, but not how ugly it makes the code...
or how much it will make other developers hate you.
It's common practice to nest Functions for usually one of two reasons:
- Allow variables to propagate or keep the "private"
- Easier to pass as an argument
(thanks @mraleph for the examples)
function ClosureColor(name) {
this.getName = function () {
return name;
};
this.setName = function (n) {
name = n;
};
}
Instantiates in 280 ns
function PrototypeColor(name) {
this._name = name;
}
PrototypeColor.prototype.getName = function () {
return this._name;
};
PrototypeColor.prototype.setName = function (n) {
this._name = n;
};
Instantiates in 7 ns
Does it matter?
function sumSort(a, b) { return b.hours - a.hours }
function sumHours(people, nameReg, minHours) {
var result = [];
// Filter by regex and work hours.
Object.keys(people).filter(function(name) {
return people[name].hours >= minHours && nameReg.test(name[0]);
// Add each person with the number of hours they've worked to the array
}).forEach(function(name) {
result.push({ name: name, hours: people[name].hours });
});
// Sort the names of the people by the most hours.
return result.sort(sumSort);
}
function sumSort(a, b) { return b.hours - a.hours }
function sumHours(people, nameReg, minHours) {
var result = [];
var names = Object.keys(people);
var i, n;
for (i = 0; i < names.length; i++) {
n = names[i];
if (people[n].hours >= minHours && nameReg.test(n))
result.push({ name: n, hours: people[n].hours });
}
return result.sort(sumSort)
}
Functional: 38.1 us/op Not: 21.7 us/op Diff: 45.1%
module.exports = function upload(stream, idOrPath, tag, done) {
// ...
function backoff(err) { /* ... */ }
blob.put(stream, function (err, blobId) {
// ...
self.byUuidOrPath(idOrPath).get(function (err, file) {
// ...
Version.insert(version).execWithin(tx, function (err) {
// ...
self.createQuery(idOrPath, {
// ...
}, function (err, q) {
// ...
q.execWithin(tx, function (err) { /* ... */ });
});
// ...
});
function afterFileExists(err, fileId) {
// ...
FileVersion.insert({fileId: fileId,versionId: version.id})
.execWithin(tx, function (err) {
// ...
File.whereUpdate({id: fileId}, {
// ...
}).execWithin(tx, function (err) { /* ... */ });
})
}
});
});
}
Original response time: 290ms
By only hoisting functions and creating a single Object to maintain the request state, V8 was able to optimize these functions and keep them that way.
After slight refactor: 114ms
Heard complaints about how slow Node was to proxy data compared to nginx. So I built my own.
(https://github.com/trevnorris/psycho-proxy)
Bypasses Streams and EventEmitter
Super fast (40k/sec on loopback, and probably enough to fill up your network pipe regardless)
Makes your eyes bleed to look at
var connection;
if (isNaN(pOp))
connection = PipeConnect(this, buffer, pOp);
else
connection = TCPConnect(this, buffer, '127.0.0.1', pOp);
// Setup return loop to bypass all further checks.
connection._return_route = this;
this._return_route = connection;
// The method that proxies the data.
function proxyDataBetween(nread, data) {
var err, req;
if (nread <= 0) {
if (nread < 0)
this.close(onClose);
return;
}
if (this._return_route.fd <= 0) {
err = new Error('Attempt to write to bad file descriptor');
err.code = 'EBADF';
alert(this, err);
return;
}
req = { oncomplete: dataWritten };
err = this._return_route.writeBuffer(req, data);
if (err)
fail(this, err, 'write');
}
Sadly, I may be responsible for the most abstract interface in Node core.
New AsyncListener API... is a bit nuts.
It's about 500 lines of JS (excluding C++). 200 of which are comments.
Gives users the ability to attach a "listener" to an Object that may break the call stack in the future.
Users will be alerted to either all, or a filtered set, of branching asynchronous calls.
Errors are traceable beyond asynchronous call stack bounds.
Write code you can maintain.
Write code that doesn't crash. The slowest code is the code that doesn't run.
Code for your case. Very few code with Node core performance requirements in mind.