-
Notifications
You must be signed in to change notification settings - Fork 107
/
Copy pathapi_client.rb
317 lines (271 loc) · 8.96 KB
/
api_client.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
require 'open-uri'
require 'net/http'
require 'digest/sha1'
require 'fileutils'
require 'json'
require 'ghtorrent/logging'
require 'ghtorrent/settings'
require 'ghtorrent/ghtime'
require 'version'
module GHTorrent
module APIClient
include GHTorrent::Logging
include GHTorrent::Settings
include GHTorrent::Logging
# A paged request. Used when the result can expand to more than one
# result pages.
def paged_api_request(url, pages = config(:mirror_history_pages_back),
last = nil)
url = ensure_max_per_page(url)
data = api_request_raw(url)
return [] if data.nil?
unless data.meta['link'].nil?
links = parse_links(data.meta['link'])
last = links['last'] if last.nil?
if pages > 0
pages = pages - 1
if pages == 0
return parse_request_result(data)
end
end
if links['next'].nil?
parse_request_result(data)
else
parse_request_result(data) | paged_api_request(links['next'], pages, last)
end
else
parse_request_result(data)
end
end
# A normal request. Returns a hash or an array of hashes representing the
# parsed JSON result. The media type
def api_request(url, media_type = '')
parse_request_result api_request_raw(ensure_max_per_page(url), media_type)
end
# Return the last modified time for the resource identified by the provided
# URL, as returned from GitHub.
def last_updated(url, etag)
begin
ts = Time.now
response = do_request(url, '', fmt_etag(etag))
info "Successful etag request. URL: #{url}, HTTP: #{response.status.first}, old_etag: #{fmt_etag(etag)}, new_etag: #{fmt_etag(response.meta['etag'])}, Remaining: #{@remaining}, Total: #{Time.now.to_ms - ts.to_ms} ms"
rescue OpenURI::HTTPError => e
response = e.io
if response.status.first != '304'
info "Successful etag request. URL: #{url}, HTTP: 304, etag: #{fmt_etag(etag)}, Remaining: #{@remaining}, Total: #{Time.now.to_ms - ts.to_ms} ms"
etag_request_error_message(url, e, etag)
raise e
end
end
return Time.parse(response.meta['last-modified']) unless response.meta['last-modified'].nil?
return Time.at(86400)
end
# Determine the number of pages contained in a multi-page API response
def num_pages(url)
url = ensure_max_per_page(url)
data = api_request_raw(url)
if data.nil? or data.meta.nil? or data.meta['link'].nil?
return 1
end
links = parse_links(data.meta['link'])
if links.nil? or links['last'].nil?
return 1
end
params = CGI::parse(URI::parse(links['last']).query)
params['page'][0].to_i
end
private
def ensure_max_per_page(url)
if url.include?('page')
if not url.include?('per_page')
if url.include?('?')
url + '&per_page=100'
else
url + '?per_page=100'
end
else
url
end
else
url
end
end
# Parse a Github link header
def parse_links(links)
links.split(/,/).reduce({}) do |acc, x|
matches = x.strip.match(/<(.*)>; rel=\"(.*)\"/)
acc[matches[2]] = matches[1]
acc
end
end
# Parse the JSON result array
def parse_request_result(result)
if result.nil?
[]
else
json = result.read
if json.nil?
[]
else
r = JSON.parse(json)
# Add the etag to the response only for individual entities
if result.meta['etag'] and r.class != Array
r['etag'] = fmt_etag(result.meta['etag'])
end
r
end
end
end
def fmt_etag(etag)
etag.tr('W"/','') unless etag.nil?
end
def fmt_token(token)
if token.nil? or token.empty?
'<empty-token>'
else
token[0..10]
end
end
def request_error_msg(url, exception)
msg = <<-MSG
Failed request. URL: #{url}, Status code: #{exception.io.status[0]},
Status: #{exception.io.status[1]},
Access: #{fmt_token(@token)},
IP: #{@attach_ip}, Remaining: #{@remaining}
MSG
msg.strip.gsub(/\s+/, ' ').gsub("\n", ' ')
end
def error_msg(url, exception)
msg = <<-MSG
Failed request. URL: #{url}, Exception: #{exception.message},
Access: #{fmt_token(@token)},
IP: #{@attach_ip}, Remaining: #{@remaining}
MSG
msg.strip.gsub(/\s+/, ' ').gsub("\n", ' ')
end
def etag_request_error_message(url, exception, etag)
request_error_msg(url, exception) + " etag: #{etag}"
end
# Do the actual request and return the result object
def api_request_raw(url, media_type = '')
begin
start_time = Time.now
contents = do_request(url, media_type)
total = Time.now.to_ms - start_time.to_ms
info "Successful request. URL: #{url}, Remaining: #{@remaining}, Total: #{total} ms"
contents
rescue OpenURI::HTTPError => e
@remaining = e.io.meta['x-ratelimit-remaining'].to_i
@reset = e.io.meta['x-ratelimit-reset'].to_i
case e.io.status[0].to_i
# The following indicate valid Github return codes
when 400, # Bad request
403, # Forbidden
404, # Not found
409, # Conflict -- returned on gets of empty repos
422 then # Unprocessable entity
warn request_error_msg(url, e)
return nil
when 401 # Unauthorized
warn request_error_msg(url, e)
warn "Unauthorised request with token: #{@token}"
raise e
when 451 # DMCA takedown
warn request_error_msg(url, e)
warn "Repo was taken down (DMCA)"
return nil
else # Server error or HTTP conditions that Github does not report
warn request_error_msg(url, e)
raise e
end
rescue StandardError => e
warn error_msg(url, e)
raise e
ensure
# The exact limit is only enforced upon the first @reset
# No idea how many requests are available on this key. Sleep if we have run out
if @remaining < @req_limit
to_sleep = @reset - Time.now.to_i + 2
warn "Request limit reached, reset in: #{to_sleep} secs"
t = Thread.new do
slept = 0
while true do
debug "Sleeping for #{to_sleep - slept} seconds"
sleep 1
slept += 1
end
end
sleep([0, to_sleep].max)
t.exit
end
end
end
def auth_method(token)
return :token unless token.nil? or token.empty?
return :none
end
def do_request(url, media_type, etag = nil)
@attach_ip ||= config(:attach_ip)
@token ||= config(:github_token)
@user_agent ||= config(:user_agent)
@remaining ||= 5000
@reset ||= Time.now.to_i + 3600
@auth_type ||= auth_method(@token)
@req_limit ||= config(:req_limit)
media_type = 'application/json' unless media_type.size > 0
headers = {
'User-Agent' => @user_agent,
'Accept' => media_type
}
headers = headers.merge({'Authorization' => "token #{@token}"}) if auth_method(@token) == :token
headers = headers.merge({'If-None-Match' => etag}) if etag
# Only way to encode square brackets in standard Ruby
if url =~ %r{\[|\]}
protocol, host, path = url.split(%r{/+}, 3)
path = path.gsub('[', '%5B').gsub(']', '%5D')
url = "#{protocol}//#{host}/#{path}"
end
result = if @attach_ip.nil? or @attach_ip.eql? '0.0.0.0'
open(url, headers)
else
attach_to(@attach_ip) do
open(url, headers)
end
end
@remaining = result.meta['x-ratelimit-remaining'].to_i
@reset = result.meta['x-ratelimit-reset'].to_i
result
end
# Attach to a specific IP address if the machine has multiple
def attach_to(ip)
TCPSocket.instance_eval do
(class << self; self; end).instance_eval do
alias_method :original_open, :open
case RUBY_VERSION
when /1.8/, /1.9/
define_method(:open) do |conn_address, conn_port|
original_open(conn_address, conn_port, ip)
end
else
define_method(:open) do |conn_address, conn_port, local_host, local_port|
original_open(conn_address, conn_port, ip, local_port)
end
end
end
end
result = begin
yield
rescue StandardError => e
raise e
ensure
TCPSocket.instance_eval do
(class << self; self; end).instance_eval do
alias_method :open, :original_open
remove_method :original_open
end
end
end
result
end
end
end