Class: RDF::NTriples::Reader

Inherits:
Reader
  • Object
show all
Includes:
Util::Logger
Defined in:
lib/rdf/ntriples/reader.rb

Overview

N-Triples parser.

** RDF=star

Supports statements as resources using <<(s p o)>>.

Examples:

Obtaining an NTriples reader class

RDF::Reader.for(:ntriples)     #=> RDF::NTriples::Reader
RDF::Reader.for("etc/doap.nt")
RDF::Reader.for(file_name:      "etc/doap.nt")
RDF::Reader.for(file_extension: "nt")
RDF::Reader.for(content_type:   "application/n-triples")

Parsing RDF statements from an NTriples file

RDF::NTriples::Reader.open("etc/doap.nt") do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

Parsing RDF statements from an NTriples string

data = StringIO.new(File.read("etc/doap.nt"))
RDF::NTriples::Reader.new(data) do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

See Also:

Direct Known Subclasses

RDF::NQuads::Reader

Constant Summary collapse

ESCAPE_CHARS =
["\b", "\f", "\t", "\n", "\r", "\"", "'", "\\"].freeze
UCHAR4 =
/\\u([0-9A-Fa-f]{4,4})/.freeze
UCHAR8 =
/\\U([0-9A-Fa-f]{8,8})/.freeze
UCHAR =
Regexp.union(UCHAR4, UCHAR8).freeze
U_CHARS1 =

Terminals from rdf-turtle.

Unicode regular expressions.

Regexp.compile(<<-EOS.gsub(/\s+/, ''))
  [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
  [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
  [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
  [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
EOS
U_CHARS2 =
Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]").freeze
IRI_RANGE =
Regexp.compile("[[^<>\"{}\|\^`\\\\]&&[^\\x00-\\x20]]").freeze
PN_CHARS_BASE =
/[A-Z]|[a-z]|#{U_CHARS1}/.freeze
PN_CHARS_U =
/_|#{PN_CHARS_BASE}/.freeze
PN_CHARS =
/-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/.freeze
ECHAR =
/\\[tbnrf"'\\]/.freeze
IRIREF =
/<((?:#{IRI_RANGE}|#{UCHAR})*)>/.freeze
BLANK_NODE_LABEL =
/_:((?:[0-9]|#{PN_CHARS_U})(?:(?:#{PN_CHARS}|\.)*#{PN_CHARS})?)/.freeze
LANG_DIR =
/@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*(?:--[a-zA-Z]+)?)/.freeze
STRING_LITERAL_QUOTE =
/"((?:[^\"\\\n\r]|#{ECHAR}|#{UCHAR})*)"/.freeze
TT_START =
/^<<\(/.freeze
TT_END =
/^\s*\)>>/.freeze
QT_START =

DEPRECATED

/^<</.freeze
QT_END =

DEPRECATED

/^\s*>>/.freeze
COMMENT =
/^#\s*(.*)$/.freeze
NODEID =
/^#{BLANK_NODE_LABEL}/.freeze
URIREF =
/^#{IRIREF}/.freeze
LITERAL_PLAIN =
/^#{STRING_LITERAL_QUOTE}/.freeze
LITERAL_WITH_LANGUAGE =
/^#{STRING_LITERAL_QUOTE}#{LANG_DIR}/.freeze
LITERAL_WITH_DATATYPE =
/^#{STRING_LITERAL_QUOTE}\^\^#{IRIREF}/.freeze
DATATYPE_URI =
/^\^\^#{IRIREF}/.freeze
LITERAL =
Regexp.union(LITERAL_WITH_LANGUAGE, LITERAL_WITH_DATATYPE, LITERAL_PLAIN).freeze
SUBJECT =
Regexp.union(URIREF, NODEID).freeze
PREDICATE =
Regexp.union(URIREF).freeze
OBJECT =
Regexp.union(URIREF, NODEID, LITERAL).freeze
END_OF_STATEMENT =
/^\s*\.\s*(?:#.*)?$/.freeze
LANGTAG =

LANGTAG is deprecated

LANG_DIR
ESCAPE_CHARS_ESCAPED =

cache constants to optimize escaping the escape chars in self.unescape

{
  "\\b"   =>  "\b",
  "\\f"   =>  "\f",
  "\\t"   =>  "\t",
  "\\n"   =>  "\n",
  "\\r"   =>  "\r",
  "\\\""  =>  "\"",
  "\\'"   =>  "'",
  "\\\\"  =>  "\\"
} .freeze
ESCAPE_CHARS_ESCAPED_REGEXP =
Regexp.union(
  ESCAPE_CHARS_ESCAPED.keys
).freeze

Constants included from Util::Logger

Util::Logger::IOWrapper

Instance Attribute Summary

Attributes inherited from Reader

#options

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Util::Logger

#log_debug, #log_depth, #log_error, #log_fatal, #log_info, #log_recover, #log_recovering?, #log_statistics, #log_warn, #logger

Methods inherited from Reader

#base_uri, #canonicalize?, #close, each, #each_pg_statement, #each_statement, #each_triple, #encoding, #fail_object, #fail_predicate, #fail_subject, for, format, #initialize, #intern?, #lineno, open, options, #prefix, #prefixes, #prefixes=, #read_statement, #rewind, #to_sym, to_sym, #valid?, #validate?

Methods included from Util::Aliasing::LateBound

#alias_method

Methods included from Enumerable

#canonicalize, #canonicalize!, #dump, #each_graph, #each_object, #each_predicate, #each_quad, #each_statement, #each_subject, #each_term, #each_triple, #enum_graph, #enum_object, #enum_predicate, #enum_quad, #enum_statement, #enum_subject, #enum_term, #enum_triple, #graph?, #graph_names, #invalid?, #method_missing, #object?, #objects, #predicate?, #predicates, #project_graph, #quad?, #quads, #respond_to_missing?, #statement?, #statements, #subject?, #subjects, #supports?, #term?, #terms, #to_a, #to_h, #to_set, #triple?, #triples, #valid?, #validate!

Methods included from Countable

#count, #empty?

Methods included from Readable

#readable?

Constructor Details

This class inherits a constructor from RDF::Reader

Dynamic Method Handling

This class handles dynamic methods through the method_missing method in the class RDF::Enumerable

Class Method Details

.parse_literal(input, **options) ⇒ RDF::Term, RDF::Literal

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

Options Hash (**options):

Returns:



154
155
156
157
158
159
160
161
162
163
# File 'lib/rdf/ntriples/reader.rb', line 154

def self.parse_literal(input, **options)
  case input
    when LITERAL_WITH_LANGUAGE
      RDF::Literal.new(unescape($1), language: $4)
    when LITERAL_WITH_DATATYPE
      RDF::Literal.new(unescape($1), datatype: $4)
    when LITERAL_PLAIN
      RDF::Literal.new(unescape($1))
  end
end

.parse_node(input, **options) ⇒ RDF::Term, RDF::Node

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

Options Hash (**options):

Returns:



135
136
137
138
139
# File 'lib/rdf/ntriples/reader.rb', line 135

def self.parse_node(input, **options)
  if input =~ NODEID
    RDF::Node.new($1)
  end
end

.parse_object(input, **options) ⇒ RDF::Term

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

Options Hash (**options):

Returns:



128
129
130
# File 'lib/rdf/ntriples/reader.rb', line 128

def self.parse_object(input, **options)
  parse_uri(input, **options) || parse_node(input, **options) || parse_literal(input, **options)
end

.parse_predicate(input, **options) ⇒ RDF::Term, RDF::URI

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

Options Hash (**options):

Returns:



122
123
124
# File 'lib/rdf/ntriples/reader.rb', line 122

def self.parse_predicate(input, **options)
  parse_uri(input, intern: true)
end

.parse_subject(input, **options) ⇒ RDF::Term, RDF::Resource

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

Options Hash (**options):

Returns:



115
116
117
# File 'lib/rdf/ntriples/reader.rb', line 115

def self.parse_subject(input, **options)
  parse_uri(input, **options) || parse_node(input, **options)
end

.parse_uri(input, intern: false, **options) ⇒ RDF::Term, RDF::URI

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

  • input (String)
  • options ({Symbol => Object})
  • intern (Boolean) (defaults to: false)

    (false) Use Interned URI

Options Hash (**options):

Returns:



145
146
147
148
149
# File 'lib/rdf/ntriples/reader.rb', line 145

def self.parse_uri(input, intern: false, **options)
  if input =~ URIREF
    RDF::URI.send(intern ? :intern : :new, unescape($1))
  end
end

.unescape(string) ⇒ String



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/rdf/ntriples/reader.rb', line 186

def self.unescape(string)
  # Note: avoiding copying the input string when no escaping is needed
  # greatly reduces the number of allocations and the processing time.
  string = string.dup.force_encoding(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
  scanner = StringScanner.new(string)

  buffer = ""

  while !scanner.eos?
    buffer << if scanner.scan(ESCAPE_CHARS_ESCAPED_REGEXP)
      ESCAPE_CHARS_ESCAPED[scanner.matched]
    elsif scanner.scan(UCHAR)
      scanner.matched.sub(UCHAR) {[($1 || $2).hex].pack('U*')}
    else
      # Scan one character
      scanner.getch
    end
  end

  buffer
end

.unserialize(input, **options) ⇒ RDF::Term

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

Options Hash (**options):

Returns:



105
106
107
108
109
110
# File 'lib/rdf/ntriples/reader.rb', line 105

def self.unserialize(input, **options)
  case input
    when nil then nil
    else self.new(input, logger: [], **options).read_value
  end
end

Instance Method Details

#read_commentBoolean

Returns:

  • (Boolean)

See Also:



280
281
282
# File 'lib/rdf/ntriples/reader.rb', line 280

def read_comment
  match(COMMENT)
end

#read_eosBoolean

Returns:

  • (Boolean)

See Also:



339
340
341
# File 'lib/rdf/ntriples/reader.rb', line 339

def read_eos
  match(END_OF_STATEMENT)
end

#read_literalRDF::Literal

Returns:

See Also:



313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/rdf/ntriples/reader.rb', line 313

def read_literal
  if literal_str = match(LITERAL_PLAIN)
    literal_str = self.class.unescape(literal_str)
    literal = case
      when lang_dir = match(LANG_DIR)
        language, direction = lang_dir.split('--')
        raise ArgumentError if direction && !@options[:rdfstar]
        RDF::Literal.new(literal_str, language: language, direction: direction)
      when datatype = match(/^(\^\^)/) # FIXME
        RDF::Literal.new(literal_str, datatype: read_uriref || fail_object)
      else
        RDF::Literal.new(literal_str) # plain string literal
    end
    literal.validate!     if validate?
    literal.canonicalize! if canonicalize?
    literal
  end
rescue ArgumentError
  v = literal_str
  v += "@#{lang_dir}" if lang_dir
  log_error("Invalid Literal (found: \"#{v}\")", lineno: lineno, token: "#v", exception: RDF::ReaderError)
end

#read_nodeRDF::Node

Returns:

See Also:



303
304
305
306
307
308
# File 'lib/rdf/ntriples/reader.rb', line 303

def read_node
  if node_id = match(NODEID)
    @nodes ||= {}
    @nodes[node_id] ||= RDF::Node.new(node_id)
  end
end

#read_quotedTripleRDF::Statement

Deprecated.

Quoted triples are now deprecated (not supported when validating)

Returns:



263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/rdf/ntriples/reader.rb', line 263

def read_quotedTriple
  if @options[:rdfstar] && !match(TT_START) && match(QT_START) && !validate?
    warn "[DEPRECATION] RDF-star quoted triples are deprecated and will be removed in a future version.\n" +
         "Called from #{Gem.location_of_caller.join(':')}"
    subject   = read_uriref || read_node || read_quotedTriple || fail_subject
    predicate = read_uriref(intern: true) || fail_predicate
    object    = read_uriref || read_node || read_literal || read_quotedTriple || fail_object
    if !match(QT_END)
      log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
    end
    RDF::Statement.new(subject, predicate, object, quoted: true)
  end
end

#read_tripleArray



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/rdf/ntriples/reader.rb', line 223

def read_triple
  loop do
    readline.strip! # EOFError thrown on end of input
    line = @line    # for backtracking input in case of parse error

    begin
      unless blank? || read_comment
        subject   = read_uriref || read_node || read_quotedTriple || fail_subject
        predicate = read_uriref(intern: true) || fail_predicate
        object    = read_uriref || read_node || read_literal || read_tripleTerm || read_quotedTriple || fail_object

        if validate? && !read_eos
          log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
        end
        return [subject, predicate, object]
      end
    rescue RDF::ReaderError => e
      @line = line  # this allows #read_value to work
      raise e
    end
  end
end

#read_tripleTermRDF::Statement

Returns:



248
249
250
251
252
253
254
255
256
257
258
# File 'lib/rdf/ntriples/reader.rb', line 248

def read_tripleTerm
  if @options[:rdfstar] && match(TT_START)
    subject   = read_uriref || read_node || fail_subject
    predicate = read_uriref(intern: true) || fail_predicate
    object    = read_uriref || read_node || read_literal || read_tripleTerm || fail_object
    if !match(TT_END)
      log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
    end
    RDF::Statement.new(subject, predicate, object, tripleTerm: true)
  end
end

#read_uriref(intern: false, **options) ⇒ RDF::URI

Parameters:

  • intern (Boolean) (defaults to: false)

    (false) Use Interned Node

Returns:

See Also:



288
289
290
291
292
293
294
295
296
297
298
# File 'lib/rdf/ntriples/reader.rb', line 288

def read_uriref(intern: false, **options)
  if uri_str = match(URIREF)
    uri_str = self.class.unescape(uri_str)
    uri = RDF::URI.send(intern? && intern ? :intern : :new, uri_str)
    uri.validate!     if validate?
    uri.canonicalize! if canonicalize?
    uri
  end
rescue ArgumentError
  log_error("Invalid URI (found: \"<#{uri_str}>\")", lineno: lineno, token: "<#{uri_str}>", exception: RDF::ReaderError)
end

#read_valueRDF::Term

Returns:



210
211
212
213
214
215
216
217
218
# File 'lib/rdf/ntriples/reader.rb', line 210

def read_value
  begin
    read_statement
  rescue RDF::ReaderError
    value = read_uriref || read_node || read_literal || read_tripleTerm || read_quotedTriple
    log_recover
    value
  end
end