database; /* * This is one way to use Map-Reduce to find the IDs of linked documents which aren't used by any other documents in a collection * * This operation would take the place of an anti-join such as the LEFT JOIN/WHERE/IS NULL pattern in MySQL. * * * Our fictional setup: * We have two collections, eg, User and Server. Multiple users can share a server. * * The User collection has an array of server IDs which correspond to the IDs in the server collection. * * We wish to determine which server are no longer used by anyone so they can be decommisioned. When a user is about to be deleted, we run this * map-reduce command to find which servers are shut down. * * In this example user 5 is being deleted. We want to know which servers he was using which no one else is using */ // drop the old user collection, if needed $mongo->user->drop(); $mongo->server->drop(); // Set up our documents $users = Array( Array('_id' => 1, 'server' => Array('A','B','C','D')), Array('_id' => 2, 'server' => Array('C','D','E','F')), Array('_id' => 3, 'server' => Array('E','F','G','H')), Array('_id' => 4, 'server' => Array('I','J','K','L')), Array('_id' => 5, 'server' => Array('A','B','C','D','E','Q','R','S','T','U')), ); foreach($users as $user){ $mongo->user->insert($user); } $servers = Array('A','B','C','D','E','F','G','H','I','J','K','L','Q','R','S','T','U'); foreach($servers as $serverId){ $mongo->server->insert(Array('_id' => $serverId)); } // 3 -- For each document found (users 1,2,3) add the servers which are also // found in user 5's server list (JS global variable serverToCheck) into // serverToKeep. Emit serverToKeep with ID 1. Since we're agregating all // server IDs regardless of user ID, we're just going to use a single results // ID // // Each of users 1,2 and 3 will emit one result: // emit(1,{'server':['A','B','C','D']}) // emit(1,{'server':['C','D','E']}) // emit(1,{'server':['E']}) $map = new MongoCode( "function(){ var serverToKeep = []; this.server.forEach(function(v){ if(serverToCheck.indexOf(v) != -1){ serverToKeep.push(v); } }); // we set the emit key to 1, because we don't care which // user is using it, we just need a list of the server IDs // this way all the server IDs will end up in one document emit(1,{'server':serverToKeep}); }" ); // 4 -- Reduce the emitted results. Reduce may be called more than once. // k will be the key emitted in Map (1, in our case) and multiple will be // an array of emitted results. If they were all reduced in a single call // the arguments would be: // // function(a,[{'server':['A','B','C','D']},{'server':['C','D','E']},{'server':['E']}) // // We will return a single {'server':[...]} object which may be passed in to reduce // again later. // // The last time reduce is called, we want to return: // {'server':['A','B','C','D','E']} $reduce = new MongoCode( "function(k,multiple){ var serverToKeep = []; for(var j in multiple){ one = multiple[j]; var count = one.server.length; for(var i = 0;iuser->findOne(Array('_id' => 5),Array('server' => TRUE)); $serverToCheck = $user['server']; // 2 -- Now we construct our Mongo command // NOTE: $cmd = Array( // We define which collection will be map-reduced "mapreduce" => "user", // This query will find all user documents (since we set mapreduce => user) which // have one or more of the same servers as the user we are deleting. We exluce // user 5. "query" => Array( "server" => Array('$in' => $serverToCheck), '_id' => Array('$ne' => 5) ), // scope lets us set global JavaScript variables which we can // use in the map, reduce and finalize functions. // Here we make the list of server IDs available to JavaScript "scope" => Array( "serverToCheck" => $serverToCheck ), // The functions are called in this order: map, reduce, finalize // Map -- Each document found by the query above is submitted to // map. We extract the data we are interested in from the document // and emit it. The emitted values are passed into reduce. // // Reduce -- For each key emitted, an array of values emitted is // passed into reduce (maybe more than once per key). Here we // perform our agregation and return a single object (even if // multiple objects were passed in). // // Finalize is called once per key. Since we set our key to '1' // this is called only once in our case. "map" => $map, "reduce" => $reduce, "finalize" => $finalize, // Instead of putting the results into a new collection we will // return them inline "out" => Array("inline" => TRUE), ); $result = $mongo->command($cmd); // 6 -- Now we can use the results! print_r($result); /* Array ( [results] => Array ( [0] => Array ( [_id] => 1 [value] => Array ( [server] => Array ( [0] => Q [1] => R [2] => S [3] => T [4] => U ) ) ) ) [timeMillis] => 17 [counts] => Array ( [input] => 3 [emit] => 3 [reduce] => 1 [output] => 1 ) [ok] => 1 ) */ // 7 -- Remove servers no one is using foreach($result['results'] as $k => $v){ $servers = $v['value']['server']; $mongo->server->remove(Array('_id' => Array('$in' => $servers))); } // 8 -- Remove the user $mongo->user->remove(Array('_id' => 5));