94 lines
2.3 KiB
Ruby
94 lines
2.3 KiB
Ruby
require 'open3'
|
|
require 'json'
|
|
require 'nokogiri'
|
|
require 'date'
|
|
require 'rubygems'
|
|
require 'lda-ruby'
|
|
require 'lingua/stemmer'
|
|
require 'textoken'
|
|
require 'stopwords'
|
|
require "lemmatizer"
|
|
class Config
|
|
def initialize()
|
|
@json = JSON.parse(File.read("config.json"))
|
|
end
|
|
def json
|
|
@json
|
|
end
|
|
def get_body
|
|
json["body_url"]
|
|
end
|
|
def get_url
|
|
json["sub_url"]
|
|
end
|
|
end
|
|
class Cmd
|
|
def url
|
|
@url
|
|
end
|
|
def run_command
|
|
stdout, status = Open3.capture3(url)
|
|
page = Nokogiri::HTML(stdout)
|
|
out = page.xpath("/html/body/div/div[2]/div[2]/div/div[2]/div").text.strip
|
|
|
|
end
|
|
end
|
|
class GetUrls < Config
|
|
def run
|
|
i = 0
|
|
url = Config.new.get_url
|
|
puts url
|
|
stdout, status = Open3.capture3(url)
|
|
page = Nokogiri::HTML(stdout)
|
|
out = []
|
|
until i >= 20
|
|
t = page.xpath("/html/body/div/div[2]/div[3]/div[#{i}]/div[1]/a").css('a')
|
|
url = t.attribute('href').to_s.strip
|
|
out << url if url.size.to_i != 0
|
|
i = i+= 1
|
|
end
|
|
File.open(File.join("DarknetMarketsNoobs_urls.txt"), "a") { |file| file.write(out.join("\n")) }
|
|
end
|
|
end
|
|
class Clean
|
|
def initialize(line)
|
|
@line = line
|
|
end
|
|
def line
|
|
@line
|
|
end
|
|
def cleanize
|
|
corpus = Lda::Corpus.new
|
|
f = Stopwords::Snowball::Filter.new "en"
|
|
cleaned_post = line.downcase
|
|
text_token = Textoken(cleaned_post, exclude: 'punctuations').tokens
|
|
filtered = f.filter(text_token)
|
|
|
|
return filtered
|
|
end
|
|
end
|
|
class GetBody < Config
|
|
def run
|
|
a = []
|
|
d = DateTime.now
|
|
date = d.strftime("%m/%d/%Y")
|
|
@f_json = []
|
|
j = {}
|
|
File.readlines("DarknetMarketsNoobs_urls.txt").each do |l|
|
|
l = l.strip
|
|
curl = Config.new.get_body.to_s.gsub(".onion", ".onion#{l}")
|
|
stdout, status = Open3.capture3(curl)
|
|
page = Nokogiri::HTML(stdout)
|
|
out = page.xpath("/html/body/div/div[2]/div[2]/div/div[2]/div").text.strip
|
|
c = Clean.new(out).cleanize
|
|
@f_json << c.join(" ")
|
|
end
|
|
|
|
j[date] = @f_json
|
|
File.open(File.join("fucking_Work.json"), "w") { |file| file.write(JSON.pretty_generate(j)) }
|
|
end
|
|
end
|
|
|
|
GetUrls.new.run
|
|
GetBody.new.run
|