stuff
This commit is contained in:
Netflix 2021-03-08 03:01:20 +01:00
parent a4a7271313
commit 617f4da7cb
1 changed files with 94 additions and 0 deletions

94
test.rb Normal file
View File

@ -0,0 +1,94 @@
require 'json'
require 'date'
#p File.readlines("fucking_Work.json")
require "lemmatizer"
require 'lda-ruby'
require 'date'
require 'epitome'
require 'fileutils'
require 'lda-ruby'
require 'lingua/stemmer'
require 'tty-table'
require 'textoken'
require 'stopwords'
require 'json'
require 'tf_idf'
class Reports
def initialize(json)
@json = json
@file_name = File.join("jsons", get_date)
FileUtils.mkdir_p("jsons")
end
def json
@json
end
def file_name
@file_name
end
def get_date
d = DateTime.now
date = d.strftime("%m-%d-%Y")
date.concat(".json")
end
def write_json
File.open(file_name, "a") { |file| file.write(json) }
end
end
class VersionOne
def run
d = DateTime.now
date = d.strftime("%m/%d/%Y")
j = File.read("fucking_Work.json")
json = JSON.parse(j).to_h
values = json.values
corpus = Lda::Corpus.new
f = Stopwords::Snowball::Filter.new "en"
values.flatten.each do |l|
line = l.downcase
t = Textoken(line, exclude: 'punctuations').tokens
ll = f.filter(t)
l = Lda::TextDocument.new(corpus, ll.join(","))
corpus.add_document(l)
end
lda = Lda::Lda.new(corpus)
lda.load_vocabulary("vocab.txt")
lda.num_topics = 15
lda.em('random')
topics = lda.top_words
puts topics.to_json
end
end
class VersionTwo
def run
a = []
d = DateTime.now
date = d.strftime("%m/%d/%Y")
j = File.read("fucking_Work.json")
json = JSON.parse(j).to_h
values = json.values
@corpus = Lda::Corpus.new
f = Stopwords::Snowball::Filter.new "en"
values.flatten.each do |l|
line = l.downcase
t = Textoken(line, exclude: 'punctuations').tokens
ll = f.filter(t)
l = Lda::TextDocument.new(@corpus, ll.join(","))
@corpus.add_document(l)
end
lda = Lda::Lda.new(@corpus)
lda.num_topics = 15
lda.em('random')
topics = lda.top_words
json = JSON.pretty_generate(topics)
Reports.new(json).write_json
return topics
end
end
VersionTwo.new.run