convore.json/groups/pycon-2011/mrjob-distributed-computing-for-everyone/messages.json


			
				
					
					
						
						
							
							
							[{"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871540.99651, "message": "ctr.py example would be better if the reducer(self, _, events) was defined as reducer(self, _, user_clicked). #readability", "group_id": 373, "id": 325820}, {"user_id": 7372, "stars": [], "topic_id": 12419, "date_created": 1299871572.858027, "message": "well the events is both impressions and clicks right?", "group_id": 373, "id": 325829}, {"user_id": 18972, "stars": [{"date_created": 1299871357.8472841, "user_id": 1376}], "topic_id": 12419, "date_created": 1299871333.9445269, "message": "http://us.pycon.org/2011/schedule/presentations/216/", "group_id": 373, "id": 325776}, {"user_id": 20693, "stars": [], "topic_id": 12419, "date_created": 1299871658.3856781, "message": "@gpshead I bet most of it is from the JSON field names", "group_id": 373, "id": 325846}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871428.2933929, "message": "@dabeaz Yelp logs an entire Superboard worth of data for every ad impression they show.", "group_id": 373, "id": 325797}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871861.838099, "message": "<-- not in the habit of writing mapreduces; but I like how this is being presented.", "group_id": 373, "id": 325891}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871647.9868879, "message": "it looked like the individual yielded values from the mapper become the args to the reducer to me.", "group_id": 373, "id": 325844}, {"user_id": 1, "stars": [], "topic_id": 12419, "date_created": 1299871358.8009701, "message": "Not in the talk, is it pronounced \"M R Job\" or \"Mr. Job\"", "group_id": 373, "id": 325783}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871366.2802751, "message": "M R Job", "group_id": 373, "id": 325787}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871369.9984851, "message": "mr == mapreduce", "group_id": 373, "id": 325788}, {"user_id": 1, "stars": [], "topic_id": 12419, "date_created": 1299871395.7497411, "message": "Awww", "group_id": 373, "id": 325792}, {"user_id": 20693, "stars": [], "topic_id": 12419, "date_created": 1299872307.5682931, "message": "infochimp", "group_id": 373, "id": 326043}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871690.056046, "message": "not sure but its built on top of hadoop so...", "group_id": 373, "id": 325854}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871732.9174931, "message": "okay by the time hes updated it for ctr_fatigue it makes more sense now that its less simple", "group_id": 373, "id": 325864}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871830.8122671, "message": "i wonder if sorted() being used in the reducer is a good idea. sorting in the mappers and merging before input to the reducer is possible (but requires mappers to buffer so... maybe a no go?)", "group_id": 373, "id": 325885}, {"user_id": 6431, "stars": [], "topic_id": 12419, "date_created": 1299871899.5730591, "message": "needs more cmd-+", "group_id": 373, "id": 325902}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299872574.890717, "message": "his example given stated something that took a day to run on his laptop taking an hour to run via an amazon elastic MR.  one questioner was not impressed by that speedup.  i have to agree.", "group_id": 373, "id": 326118}, {"user_id": 20693, "stars": [], "topic_id": 12419, "date_created": 1299871668.5484331, "message": "Does this use the Hadoop streaming API", "group_id": 373, "id": 325847}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299871961.0773129, "message": "hmm yes, i'm in the second row.  good feedback for any presenter: ++font size", "group_id": 373, "id": 325922}, {"user_id": 19665, "stars": [], "topic_id": 12419, "date_created": 1299872295.9202349, "message": "what was the site that he got the aliens and stock datatsets from?", "group_id": 373, "id": 326037}, {"user_id": 20693, "stars": [], "topic_id": 12419, "date_created": 1299871892.2878499, "message": "if you're doing a sort in the reduce, probably doing something wrong", "group_id": 373, "id": 325899}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299872154.612433, "message": "\"yelp had their own hadoop cluster, there were having some issues with dumbo(?) a year or two ago. they moved to the virtual amazon elastic MR and storage.\"", "group_id": 373, "id": 325990}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299872082.2382231, "message": "since this is hadoop and hadoop is java, does this run under jython or is hadoop executing cpython?", "group_id": 373, "id": 325963}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299872265.838295, "message": "\"everyone I know calls it Mr. Job, inside the group its about 50/50 m r job vs Mr. Job.\" questioner: you should stop that, Mr. Job is cool.", "group_id": 373, "id": 326033}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299872311.1164379, "message": "haha", "group_id": 373, "id": 326044}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299872329.4149411, "message": "when you make your fortune off of that tip, remember us!", "group_id": 373, "id": 326055}, {"user_id": 20693, "stars": [], "topic_id": 12419, "date_created": 1299872141.735204, "message": "you could use bash if you wanted", "group_id": 373, "id": 325986}, {"user_id": 20693, "stars": [], "topic_id": 12419, "date_created": 1299872047.23929, "message": "yes, it looks like this uses the Hadoop Streaming jar", "group_id": 373, "id": 325951}, {"user_id": 20693, "stars": [], "topic_id": 12419, "date_created": 1299872136.654737, "message": "@gpshead, Hadoop Streaming lets you use any executable as the mapper/reducer", "group_id": 373, "id": 325981}, {"user_id": 20695, "stars": [], "topic_id": 12419, "date_created": 1299872155.2229731, "message": "could be useful as mrjob vs dumbo reference info - http://news.ycombinator.com/item?id=2122474", "group_id": 373, "id": 325991}, {"user_id": 18972, "stars": [], "topic_id": 12419, "date_created": 1299872232.8216131, "message": "shipping data out to mappers: local filename as input supplied? it'll be staged into s3 first my mrjob.", "group_id": 373, "id": 326021}, {"user_id": 19665, "stars": [], "topic_id": 12419, "date_created": 1299872312.3799341, "message": "thanks!", "group_id": 373, "id": 326045}, {"user_id": 13604, "stars": [], "topic_id": 12419, "date_created": 1299873363.7776861, "message": "the real benefit is not the clock time itself, but the Hadoop API. i'll to write a python 5 liner that will parse gigs of data in an hour, take no resources from anyone, and if it sucks I can tweak as i go", "group_id": 373, "id": 326227}]