Class: Arachni::URI
Overview
The URI class automatically normalizes the URLs it is passed to parse
while maintaining compatibility with Ruby's URI core class.
It also provides cached (to maintain a low latency) helper class methods to
ease common operations such as:
Defined Under Namespace
Classes: Error, Scope
Constant Summary
collapse
- CACHE_SIZES =
{
parse: 2_500,
normalize: 2_500,
to_absolute: 2_500,
encode: 1_000,
decode: 1_000,
scope: 1_000
}
- CACHE =
{
parser: ::URI::Parser.new
}
- QUERY_CHARACTER_CLASS =
Addressable::URI::CharacterClasses::QUERY.sub( '\\&', '' )
- VALID_SCHEMES =
Set.new(%w(http https))
- PARTS =
%w(scheme userinfo host port path query)
- TO_ABSOLUTE_PARTS =
%w(scheme userinfo host port)
Class Method Summary
collapse
Instance Method Summary
collapse
caller_location, debug?, debug_level, debug_level_1?, debug_level_2?, debug_level_3?, debug_level_4?, debug_off, debug_on, disable_only_positives, error_buffer, error_log_fd, error_logfile, has_error_log?, included, log_error, mute, muted?, only_positives, only_positives?, print_bad, print_debug, print_debug_backtrace, print_debug_exception, print_debug_level_1, print_debug_level_2, print_debug_level_3, print_debug_level_4, print_error, print_error_backtrace, print_exception, print_info, print_line, print_ok, print_status, print_verbose, reroute_to_file, reroute_to_file?, reset_output_options, set_error_logfile, unmute, verbose?, verbose_off, verbose_on
Methods included from Utilities
available_port, available_port_mutex, bytes_to_kilobytes, bytes_to_megabytes, caller_name, caller_path, cookie_decode, cookie_encode, cookies_from_file, cookies_from_parser, cookies_from_response, exception_jail, exclude_path?, follow_protocol?, form_decode, form_encode, forms_from_parser, forms_from_response, full_and_absolute_url?, generate_token, get_path, hms_to_seconds, html_decode, html_encode, include_path?, links_from_parser, links_from_response, normalize_url, page_from_response, page_from_url, parse_set_cookie, path_in_domain?, path_too_deep?, port_available?, rand_port, random_seed, redundant_path?, regexp_array_match, remove_constants, request_parse_body, seconds_to_hms, skip_page?, skip_path?, skip_resource?, skip_response?, uri_decode, uri_encode, uri_parse, uri_parse_query, uri_parser, uri_rewrite
Constructor Details
#initialize(url) ⇒ URI
Note:
Will discard the fragment component, if there is one.
Returns a new instance of URI.
410
411
412
413
414
415
416
417
418
419
420
|
# File 'lib/arachni/uri.rb', line 410
def initialize( url )
@data = self.class.fast_parse( url )
fail Error, 'Failed to parse URL.' if !@data
PARTS.each do |part|
instance_variable_set( "@#{part}", @data[part.to_sym] )
end
reset_userpass
end
|
Class Method Details
._load(url) ⇒ Object
783
784
785
|
# File 'lib/arachni/uri.rb', line 783
def self._load( url )
new url
end
|
.decode(string) ⇒ String
106
107
108
109
110
111
112
|
# File 'lib/arachni/uri.rb', line 106
def decode( string )
CACHE[__method__].fetch( string ) do
s = Addressable::URI.unencode( string.gsub( '+', '%20' ) )
s.recode! if s
s
end
end
|
.encode(string, good_characters = nil) ⇒ String
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/arachni/uri.rb', line 90
def encode( string, good_characters = nil )
CACHE[__method__].fetch [string, good_characters] do
s = Addressable::URI.encode_component(
*[string, good_characters].compact
)
s.recode!
s.gsub!( '+', '%2B' )
s
end
end
|
.fast_parse(url) ⇒ Hash
Performs a parse that is less resource intensive than Ruby's URI lib's
method while normalizing the URL (will also discard the fragment and
path parameters).
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
|
# File 'lib/arachni/uri.rb', line 152
def fast_parse( url )
return if !url || url.empty?
return if url.start_with?( '#' )
durl = url.downcase
return if durl.start_with?( 'javascript:' ) ||
durl.start_with?( 'data:' )
url = url.dup
url.sub!( /#.*/, '' )
c_url = url
components = {
scheme: nil,
userinfo: nil,
host: nil,
port: nil,
path: nil,
query: nil
}
begin
if (schemeless = url.start_with?( '//' ))
url.insert 0, 'http:'
end
url = html_decode( url )
dupped_url = url.dup
has_path = true
splits = url.split( ':' )
if !splits.empty? && VALID_SCHEMES.include?( splits.first.downcase )
splits = url.split( '://', 2 )
components[:scheme] = splits.shift
components[:scheme].downcase! if components[:scheme]
if (url = splits.shift)
userinfo_host, url =
url.to_s.split( '?' ).first.to_s.split( '/', 2 )
url = url.to_s
splits = userinfo_host.to_s.split( '@', 2 )
if splits.size > 1
components[:userinfo] = splits.first
end
if !splits.empty?
splits = splits.last.split( '/', 2 )
splits = splits.first.split( ':', 2 )
if splits.size == 2
host = splits.first
if splits.last && !splits.last.empty?
components[:port] = splits.last.to_i
end
if components[:port] == 80
components[:port] = nil
end
else
host = splits.last
end
if (components[:host] = host)
components[:host].downcase!
end
else
has_path = false
end
else
has_path = false
end
end
if has_path
splits = url.split( '?', 2 )
if (components[:path] = splits.shift)
if components[:scheme]
components[:path] = "/#{components[:path]}"
end
components[:path].gsub!( /\/+/, '/' )
components[:path].sub!( /\;.*/, '' )
if components[:path]
components[:path] =
encode( decode( components[:path] ),
Addressable::URI::CharacterClasses::PATH ).dup
components[:path].gsub!( ';', '%3B' )
end
end
if c_url.include?( '?' ) &&
!(query = dupped_url.split( '?', 2 ).last).empty?
components[:query] = (query.split( '&', -1 ).map do |pair|
encode( decode( pair ), QUERY_CHARACTER_CLASS )
end).join( '&' )
end
end
if schemeless
components.delete :scheme
end
components[:path] ||= components[:scheme] ? '/' : nil
components
rescue => e
print_debug "Failed to parse '#{c_url}'."
print_debug "Error: #{e}"
print_debug_backtrace( e )
nil
end
end
|
.full_and_absolute?(url) ⇒ Bool
Returns true
is the URL is full and absolute, false
otherwise.
397
398
399
400
401
402
403
404
|
# File 'lib/arachni/uri.rb', line 397
def full_and_absolute?( url )
return false if url.to_s.empty?
parsed = parse( url.to_s )
return false if !parsed
parsed.absolute?
end
|
.normalize(url) ⇒ String
Note:
This method's results are cached for performance reasons.
If you plan on doing something destructive with its return value
duplicate it first because there may be references to it elsewhere.
Uses parse to parse and normalize the URL and then converts it to
a common String format.
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
|
# File 'lib/arachni/uri.rb', line 344
def normalize( url )
return if !url || url.empty?
cache = CACHE[__method__]
url = url.to_s.strip
c_url = url.dup
begin
if (v = cache[url]) && v == :err
return
elsif v
return v
end
cache[c_url] = parse( url ).to_s.freeze
rescue => e
print_debug "Failed to normalize '#{c_url}'."
print_debug "Error: #{e}"
print_debug_backtrace( e )
cache[c_url] = :err
nil
end
end
|
.parse(url) ⇒ Object
Note:
This method's results are cached for performance reasons.
If you plan on doing something destructive with its return value
duplicate it first because there may be references to it elsewhere.
Cached version of #initialize, if there's a chance that the same
URL will be needed to be parsed multiple times you should use this method.
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
# File 'lib/arachni/uri.rb', line 122
def parse( url )
return url if !url || url.is_a?( Arachni::URI )
CACHE[__method__].fetch url do
begin
new( url )
rescue => e
print_debug "Failed to parse '#{url}'."
print_debug "Error: #{e}"
print_debug_backtrace( e )
nil
end
end
end
|
.parse_query(url) ⇒ Hash
Extracts inputs from a URL query.
385
386
387
388
389
390
|
# File 'lib/arachni/uri.rb', line 385
def parse_query( url )
parsed = parse( url )
return {} if !parsed
parse( url ).query_parameters
end
|
.parser ⇒ URI::Parser
Returns cached URI parser.
77
78
79
|
# File 'lib/arachni/uri.rb', line 77
def parser
CACHE[__method__]
end
|
.rewrite(url, rules = Arachni::Options.scope.url_rewrites) ⇒ String
376
377
378
|
# File 'lib/arachni/uri.rb', line 376
def rewrite( url, rules = Arachni::Options.scope.url_rewrites )
parse( url ).rewrite( rules ).to_s
end
|
.to_absolute(relative, reference = Options.instance.url.to_s) ⇒ String
Note:
This method's results are cached for performance reasons.
If you plan on doing something destructive with its return value
duplicate it first because there may be references to it elsewhere.
Normalizes and converts a relative
URL to an absolute
one by merging in with a reference
URL.
Pretty much a cached version of #to_absolute.
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
|
# File 'lib/arachni/uri.rb', line 300
def to_absolute( relative, reference = Options.instance.url.to_s )
return normalize( reference ) if !relative || relative.empty?
key = [relative, reference].hash
cache = CACHE[__method__]
begin
if (v = cache[key]) && v == :err
return
elsif v
return v
end
parsed_ref = parse( reference )
if relative.start_with?( '//' )
relative = "#{parsed_ref.scheme}:#{relative}"
end
parsed = parse( relative )
return parsed_ref.to_s if !parsed
cache[key] = parsed.to_absolute( parsed_ref ).to_s.freeze
rescue
cache[key] = :err
nil
end
end
|
Instance Method Details
#==(other) ⇒ Object
429
430
431
|
# File 'lib/arachni/uri.rb', line 429
def ==( other )
to_s == other.to_s
end
|
779
780
781
|
# File 'lib/arachni/uri.rb', line 779
def _dump( _ )
to_s
end
|
#absolute? ⇒ Boolean
433
434
435
|
# File 'lib/arachni/uri.rb', line 433
def absolute?
!!@scheme
end
|
590
591
592
593
594
595
596
597
598
599
600
|
# File 'lib/arachni/uri.rb', line 590
def domain
return if !host
return @domain if @domain
return @domain = host if ip_address?
s = host.split( '.' )
return @domain = s.first if s.size == 1
return @domain = host if s.size == 2
@domain = s[1..-1].join( '.' )
end
|
770
771
772
773
774
775
776
777
|
# File 'lib/arachni/uri.rb', line 770
def dup
i = self.class.allocate
instance_variables.each do |iv|
next if !(v = instance_variable_get( iv ))
i.instance_variable_set iv, (v.dup rescue v)
end
i
end
|
787
788
789
|
# File 'lib/arachni/uri.rb', line 787
def hash
to_s.hash
end
|
692
693
694
|
# File 'lib/arachni/uri.rb', line 692
def host
@host
end
|
696
697
698
699
700
701
702
703
|
# File 'lib/arachni/uri.rb', line 696
def host=( h )
@to_s = nil
@up_to_port = nil
@without_query = nil
@domain = nil
@host = h
end
|
#ip_address? ⇒ Boolean
Returns true
if the URI contains an IP address, false
otherwise.
621
622
623
|
# File 'lib/arachni/uri.rb', line 621
def ip_address?
!(IPAddr.new( host ) rescue nil).nil?
end
|
673
674
675
|
# File 'lib/arachni/uri.rb', line 673
def password
@password
end
|
705
706
707
|
# File 'lib/arachni/uri.rb', line 705
def path
@path
end
|
709
710
711
712
713
714
715
716
717
|
# File 'lib/arachni/uri.rb', line 709
def path=( p )
@up_to_path = nil
@resource_name = nil
@resource_extension = nil
@without_query = nil
@to_s = nil
@path = p
end
|
#persistent_hash ⇒ Object
791
792
793
|
# File 'lib/arachni/uri.rb', line 791
def persistent_hash
to_s.persistent_hash
end
|
677
678
679
|
# File 'lib/arachni/uri.rb', line 677
def port
@port
end
|
681
682
683
684
685
686
687
688
689
690
|
# File 'lib/arachni/uri.rb', line 681
def port=( p )
@without_query = nil
@to_s = nil
if p
@port = p.to_i
else
@port = nil
end
end
|
625
626
627
|
# File 'lib/arachni/uri.rb', line 625
def query
@query
end
|
#query=(q) ⇒ Object
629
630
631
632
633
634
635
636
637
638
|
# File 'lib/arachni/uri.rb', line 629
def query=( q )
@to_s = nil
@without_query = nil
@query_parameters = nil
q = q.to_s
q = nil if q.empty?
@query = q
end
|
#query_parameters ⇒ Hash
Returns Extracted inputs from a URL query.
642
643
644
645
646
647
648
649
650
651
652
653
654
|
# File 'lib/arachni/uri.rb', line 642
def query_parameters
q = self.query
return {} if q.to_s.empty?
@query_parameters ||= begin
q.split( '&' ).inject( {} ) do |h, pair|
name, value = pair.split( '=', 2 )
h[::URI.decode_www_form_component( name.to_s )] =
::URI.decode_www_form_component( value.to_s )
h
end
end
end
|
#relative? ⇒ Boolean
437
438
439
|
# File 'lib/arachni/uri.rb', line 437
def relative?
!absolute?
end
|
#resource_extension ⇒ String?
Returns The extension of the URI #file_name, nil
if there is none.
549
550
551
552
553
554
|
# File 'lib/arachni/uri.rb', line 549
def resource_extension
name = resource_name.to_s
return if !name.include?( '.' )
@resource_extension ||= name.split( '.' ).last
end
|
#resource_name ⇒ String
Returns Name of the resource.
543
544
545
|
# File 'lib/arachni/uri.rb', line 543
def resource_name
@resource_name ||= path.split( '/' ).last
end
|
#rewrite(rules = Arachni::Options.scope.url_rewrites) ⇒ URI
607
608
609
610
611
612
613
614
615
616
617
|
# File 'lib/arachni/uri.rb', line 607
def rewrite( rules = Arachni::Options.scope.url_rewrites )
as_string = self.to_s
rules.each do |args|
if (rewritten = as_string.gsub( *args )) != as_string
return Arachni::URI( rewritten )
end
end
self.dup
end
|
719
720
721
|
# File 'lib/arachni/uri.rb', line 719
def scheme
@scheme
end
|
#scheme=(s) ⇒ Object
723
724
725
726
727
728
729
|
# File 'lib/arachni/uri.rb', line 723
def scheme=( s )
@up_to_port = nil
@without_query = nil
@to_s = nil
@scheme = s
end
|
423
424
425
426
427
|
# File 'lib/arachni/uri.rb', line 423
def scope
CACHE[:scope].fetch( self ){ Scope.new( self ) }
end
|
#seed_in_host? ⇒ Bool
Returns true
if the scan #seed is included in the
domain, false
otherwise.
527
528
529
|
# File 'lib/arachni/uri.rb', line 527
def seed_in_host?
host.to_s.include?( Utilities.random_seed )
end
|
#to_absolute(reference) ⇒ Object
531
532
533
|
# File 'lib/arachni/uri.rb', line 531
def to_absolute( reference )
dup.to_absolute!( reference )
end
|
#to_absolute!(reference) ⇒ Arachni::URI
Converts self into an absolute URL using reference
to fill in the
missing data.
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
|
# File 'lib/arachni/uri.rb', line 449
def to_absolute!( reference )
if !reference.is_a?( self.class )
reference = self.class.new( reference.to_s )
end
TO_ABSOLUTE_PARTS.each do |part|
next if send( part )
ref_part = reference.send( "#{part}" )
next if !ref_part
send( "#{part}=", ref_part )
end
base_path = reference.path.split( %r{/+}, -1 )
rel_path = path.split( %r{/+}, -1 )
base_path << '' if base_path.last == '..'
while (i = base_path.index( '..' ))
base_path.slice!( i - 1, 2 )
end
if (first = rel_path.first) && first.empty?
base_path.clear
rel_path.shift
end
rel_path.push('') if rel_path.last == '.' || rel_path.last == '..'
rel_path.delete('.')
tmp = []
rel_path.each do |x|
if x == '..' &&
!(tmp.empty? || tmp.last == '..')
tmp.pop
else
tmp << x
end
end
add_trailer_slash = !tmp.empty?
if base_path.empty?
base_path = [''] elsif add_trailer_slash
base_path.pop
end
while (x = tmp.shift)
if x == '..'
base_path.pop if base_path.size > 1
else
base_path << x
tmp.each {|t| base_path << t}
add_trailer_slash = false
break
end
end
base_path.push('') if add_trailer_slash
@path = base_path.join('/')
self
end
|
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
|
# File 'lib/arachni/uri.rb', line 732
def to_s
@to_s ||= begin
s = ''
if @scheme
s << @scheme
s << '://'
end
if @userinfo
s << @userinfo
s << '@'
end
if @host
s << @host
if @port
if (@scheme == 'http' && @port != 80) ||
(@scheme == 'https' && @port != 443)
s << ':'
s << @port.to_s
end
end
end
s << @path.to_s
if @query
s << '?'
s << @query
end
s
end
end
|
#up_to_path ⇒ String
Returns The URL up to its path component (no resource name, query, fragment, etc).
558
559
560
561
562
563
564
565
566
567
568
569
|
# File 'lib/arachni/uri.rb', line 558
def up_to_path
return if !path
@up_to_path ||= begin
uri_path = path.dup
uri_path = File.dirname( uri_path ) if !File.extname( path ).empty?
uri_path << '/' if uri_path[-1] != '/'
up_to_port + uri_path
end
end
|
#up_to_port ⇒ String
Returns Scheme, host & port only.
573
574
575
576
577
578
579
580
581
582
583
584
585
586
|
# File 'lib/arachni/uri.rb', line 573
def up_to_port
@up_to_port ||= begin
uri_str = "#{scheme}://#{host}"
if port && (
(scheme == 'http' && port != 80) ||
(scheme == 'https' && port != 443)
)
uri_str << ':' + port.to_s
end
uri_str
end
end
|
669
670
671
|
# File 'lib/arachni/uri.rb', line 669
def user
@user
end
|
665
666
667
|
# File 'lib/arachni/uri.rb', line 665
def userinfo
@userinfo
end
|
#userinfo=(ui) ⇒ Object
656
657
658
659
660
661
662
663
|
# File 'lib/arachni/uri.rb', line 656
def userinfo=( ui )
@without_query = nil
@to_s = nil
@userinfo = ui
ensure
reset_userpass
end
|
#without_query ⇒ String
Returns The URL up to its resource component (query, fragment, etc).
537
538
539
|
# File 'lib/arachni/uri.rb', line 537
def without_query
@without_query ||= to_s.split( '?', 2 ).first.to_s
end
|