1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
class Video
module HTTPParamConverter
def self.from_rs(rs)
HTTP::Params.parse(rs.read(String))
end
end
module XMLConverter
def self.from_rs(rs)
XML.parse_html(rs.read(String))
end
end
def initialize(id, info, html, updated, title, views, likes, dislikes, wilson_score)
@id = id
@info = info
@html = html
@updated = updated
@title = title
@views = views
@likes = likes
@dislikes = dislikes
@wilson_score = wilson_score
end
def to_a
return [@id, @info, @html, @updated, @title, @views, @likes, @dislikes, @wilson_score]
end
DB.mapping({
id: String,
info: {
type: HTTP::Params,
default: HTTP::Params.parse(""),
converter: Video::HTTPParamConverter,
},
html: {
type: XML::Node,
default: XML.parse_html(""),
converter: Video::XMLConverter,
},
updated: Time,
title: String,
views: Int64,
likes: Int32,
dislikes: Int32,
wilson_score: Float64,
})
end
# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
def ci_lower_bound(pos, n)
if n == 0
return 0.0
end
# z value here represents a confidence level of 0.95
z = 1.96
phat = 1.0*pos/n
return (phat + z*z/(2*n) - z * Math.sqrt((phat*(1 - phat) + z*z/(4*n))/n))/(1 + z*z/n)
end
def elapsed_text(elapsed)
millis = elapsed.total_milliseconds
return "#{millis.round(2)}ms" if millis >= 1
"#{(millis * 1000).round(2)}µs"
end
def get_client(pool)
while pool.empty?
sleep rand(0..10).milliseconds
end
return pool.shift
end
def fetch_video(id, client)
begin
info = client.get("/get_video_info?video_id=#{id}&el=detailpage&ps=default&eurl=&gl=US&hl=en").body
html = client.get("/watch?v=#{id}").body
end
html = XML.parse_html(html)
info = HTTP::Params.parse(info)
if info["reason"]?
raise info["reason"]
end
title = info["title"]
views = info["view_count"].to_i64
likes = html.xpath_node(%q(//button[@title="I like this"]/span))
likes = likes ? likes.content.delete(",").to_i : 1
dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
dislikes = dislikes ? dislikes.content.delete(",").to_i : 0
wilson_score = ci_lower_bound(likes, likes + dislikes)
video = Video.new(id, info, html, Time.now, title, views, likes, dislikes, wilson_score)
return video
end
def get_video(id, client, db, refresh = true)
if db.query_one?("SELECT EXISTS (SELECT true FROM videos WHERE id = $1)", id, as: Bool)
video = db.query_one("SELECT * FROM videos WHERE id = $1", id, as: Video)
# If record was last updated over an hour ago, refresh (expire param in response lasts for 6 hours)
if refresh && Time.now - video.updated > 1.hours
video = fetch_video(id, client)
db.exec("UPDATE videos SET info = $2, html = $3, updated = $4,\
title = $5, views = $6, likes = $7, dislikes = $8, wilson_score = $9 WHERE id = $1", video.to_a)
end
else
video = fetch_video(id, client)
db.exec("INSERT INTO videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)", video.to_a)
end
return video
end
def search(query, client)
begin
html = client.get("https://www.youtube.com/results?q=#{query}&sp=EgIQAVAU").body
end
html = XML.parse_html(html)
html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |item|
root = item.xpath_node(%q(div[contains(@class,"yt-lockup-video")]/div))
if root
link = root.xpath_node(%q(div[contains(@class,"yt-lockup-thumbnail")]/a/@href))
if link
yield link.content.split("=")[1]
end
end
end
end
|