request(options, function (error, response, body) { if (error) { console.log("scrape.error (submissions.js#36) " + error); return setTimeout(scrape, 1000); }; var jbody = JSON.parse(body); if(jbody.results && jbody.results.length > 0) { for(var i=0; i<jbody.results.length; i++) { var item = jbody.results[i].item; var domain = utils.baseurl(item.url || "selfpost") data.queueDomain(domain); data.queueUser(item.username, null, true); } } if(firsttime && offset != 900) { offset += 100; return setTimeout(scrape, 1000); } firsttime = false; offset = 0; return setTimeout(scrape, 60000); });
domains.forEach(function(domain) { domain = domain.toLowerCase(); var dprofile = data.domainData(domain); if(dprofile) { var newusers = []; for(var username in dprofile.users) { var uprofile = data.userData(username); if(uprofile) { newusers.push({ username: username, domain: domain, daysold: daysOld(uprofile.created), submissions: uprofile.submissions, comments: uprofile.comments, lastcomment: daysOld(uprofile.lastcomment), lastsubmission: daysOld(uprofile.lastsubmission), domainusersubmissions: uprofile.domains[domain], domaintotalsubmissions: dprofile.submissions, domainsubmitters: dprofile.submitters }); } else { data.queueUser(username); inserts++; newusers.push(data.queueStatus(username)); } } dprofile.userprofiles = newusers; dprofile.userprofiles.sort(function(a, b) { return a.domainusersubmissions < b.domainusersubmissions ? 1 : -1; }) } returndata.push(dprofile); })
request(options, function (error, response, body) { if (error) { console.log("scrape.error (domains.js#40) " + error); return setTimeout(scrape, 1000); }; var jbody = JSON.parse(body); if(jbody.results && jbody.results.length > 0) { for(var i=0; i<jbody.results.length; i++) { data.queueUser(jbody.results[i].item.username); } } if(offset < jbody.hits && offset != 900) { offset += 100; return setTimeout(geturl, 1000); } queue.splice(queue.indexOf(domain), 1); scraped[domain] = true; return setTimeout(scrape, 1000); });
usernames.forEach(function(user) { // user with domain if(user.indexOf(":") > -1) { if(!returndata.users) { returndata.users = []; returndata.domains = []; } var uparts = user.split(":"); var username = uparts[0]; var domain = uparts[1].toLowerCase(); var story = uparts.length == 3 ? uparts[2] : ""; var uprofile = data.userData(username); if(uprofile) { var dprofile = data.domainData(domain) || { submissions: -1, domainsubmitters: -1 }; // submission is newer than our data if(!uprofile.domains[domain]) { console.log("re-queueing", username, domain); data.queueUser(username, 0, true); } returndata.users.push({ username: username, domain: domain, story: story, daysold: daysOld(uprofile.created), submissions: uprofile.submissions, comments: uprofile.comments, lastcomment: daysOld(uprofile.lastcomment), lastsubmission: daysOld(uprofile.lastsubmission), domainusersubmissions: uprofile.domains[domain], domaintotalsubmissions: dprofile.submissions, domainsubmitters: dprofile.submitters, domainssubmitted: Object.keys(uprofile.domains).length }); var domaindata = { domain: domain, submissions: dprofile.submissions, submitters: dprofile.submitters }; returndata.domains.push(domaindata); return; } data.queueUser(username, inserts); returndata.users.push(data.queueStatus(username)); inserts++; } else { if(!returndata.users) { returndata.users= []; } var uprofile = data.userData(user); if(uprofile) { var udata = { username: username, daysold: daysOld(uprofile.created), submissions: uprofile.submissions, comments: uprofile.comments, lastcomment: daysOld(uprofile.lastcomment), lastsubmission: daysOld(uprofile.lastsubmission), domains: [] } for(var domain in uprofile.domains) { var dprofile = data.domainData(domain) || { submissions: -1, domainsubmitters: -1 }; udata.domains.push({ domain: domain, domainusersubmissions: uprofile.domains[domain], domaintotalsubmissions: dprofile.submissions, domainsubmitters: dprofile.submitters }); } returndata.users.push(udata); return; } data.queueUser(user, inserts); returndata.users.push(data.queueStatus(user)); inserts++; } });