From 300264fa6f0ef327a1851220fb02caab02d995ba Mon Sep 17 00:00:00 2001 From: Armin Date: Mon, 8 May 2017 09:49:44 +0200 Subject: [PATCH 1/8] gitignore idea --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6368c33..6dbabec 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ Gemfile.lock .bundle vendor *.swp +.idea From 5fe6539e74be41c34337c6fc5171950c11493669 Mon Sep 17 00:00:00 2001 From: Armin Date: Mon, 8 May 2017 10:10:44 +0200 Subject: [PATCH 2/8] cleanup dirty import --- lib/logstash/filters/grok/timeout_enforcer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/filters/grok/timeout_enforcer.rb b/lib/logstash/filters/grok/timeout_enforcer.rb index 505a2bf..a00e40d 100644 --- a/lib/logstash/filters/grok/timeout_enforcer.rb +++ b/lib/logstash/filters/grok/timeout_enforcer.rb @@ -11,7 +11,7 @@ def initialize(logger, timeout_nanos) # Stores running matches with their start time, this is used to cancel long running matches # Is a map of Thread => start_time @threads_to_start_time = {} - @state_lock = java.util.concurrent.locks.ReentrantLock.new + @state_lock = ReentrantLock.new end def grok_till_timeout(event, grok, field, value) From 54a03131d702c1ffcff2dd397902d3537fdcd209 Mon Sep 17 00:00:00 2001 From: Armin Date: Mon, 8 May 2017 10:16:35 +0200 Subject: [PATCH 3/8] #110 reproducing test --- spec/filters/grok_spec.rb | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/spec/filters/grok_spec.rb b/spec/filters/grok_spec.rb index 424d02b..57dc763 100644 --- a/spec/filters/grok_spec.rb +++ b/spec/filters/grok_spec.rb @@ -759,6 +759,33 @@ def pattern_path(path) end end + describe "patterns with file glob on directory that contains subdirectories" do + require 'tmpdir' + require 'tempfile' + + let(:tmpdir) { Dir.mktmpdir(nil, "/tmp") } + + before do + @file3 = Tempfile.new(['grok', '.pattern'], tmpdir) + @file3.write('WORD \b[0-1]\b') + @file3.close + Dir.mktmpdir(nil, tmpdir) + end + + let(:config) do + "filter { grok { patterns_dir => \"#{tmpdir}\" patterns_files_glob => \"*\" match => { \"message\" => \"%{WORD:word}\" } } }" + end + + sample("message" => '0') do + insist { subject.get("tags") } == nil + end + + after do + @file3.unlink + FileUtils.remove_entry tmpdir + end + end + describe "grok with nil coerced value" do config <<-CONFIG filter { From 20998cd0d638a48d5e1ccef694423af479deac84 Mon Sep 17 00:00:00 2001 From: Armin Date: Mon, 8 May 2017 10:30:17 +0200 Subject: [PATCH 4/8] #110 ignore globbed subdirectories --- lib/logstash/filters/grok.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index c81b10f..d8b7224 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -138,7 +138,7 @@ # `SYSLOGBASE` pattern which itself is defined by other patterns. # # Another option is to define patterns _inline_ in the filter using `pattern_definitions`. - # This is mostly for convenience and allows user to define a pattern which can be used just in that + # This is mostly for convenience and allows user to define a pattern which can be used just in that # filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter. # class LogStash::Filters::Grok < LogStash::Filters::Base @@ -168,7 +168,7 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # necessarily need to define this yourself unless you are adding additional # patterns. You can point to multiple pattern directories using this setting. # Note that Grok will read all files in the directory matching the patterns_files_glob - # and assume it's a pattern file (including any tilde backup files). + # and assume it's a pattern file (including any tilde backup files). # [source,ruby] # patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"] # @@ -183,9 +183,9 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # The patterns are loaded when the pipeline is created. config :patterns_dir, :validate => :array, :default => [] - # A hash of pattern-name and pattern tuples defining custom patterns to be used by - # the current filter. Patterns matching existing names will override the pre-existing - # definition. Think of this as inline patterns available just for this definition of + # A hash of pattern-name and pattern tuples defining custom patterns to be used by + # the current filter. Patterns matching existing names will override the pre-existing + # definition. Think of this as inline patterns available just for this definition of # grok config :pattern_definitions, :validate => :hash, :default => {} @@ -237,7 +237,7 @@ class LogStash::Filters::Grok < LogStash::Filters::Base config :overwrite, :validate => :array, :default => [] attr_reader :timeout_enforcer - + # Register default pattern paths @@patterns_path ||= Set.new @@patterns_path += [ @@ -298,7 +298,7 @@ def filter(event) @patterns.each do |field, groks| success = match(groks, field, event) - + if success matched = true break if @break_on_match @@ -337,7 +337,7 @@ def match(groks, field, event) @logger.warn("Grok regexp threw exception", :exception => e.message, :backtrace => e.backtrace, :class => e.class.name) return false end - + private def match_against_groks(groks, field, input, event) input = input.to_s @@ -351,7 +351,7 @@ def match_against_groks(groks, field, input, event) break if @break_on_match end end - + matched end @@ -390,7 +390,7 @@ def patterns_files_from_paths(paths, glob) Dir.glob(path).each do |file| @logger.trace("Grok loading patterns from file", :path => file) - patternfiles << file + patternfiles << file unless File.directory?(file) end end patternfiles From ff85d3b85b45cf9571c355705d63476f193d78d8 Mon Sep 17 00:00:00 2001 From: Armin Date: Mon, 8 May 2017 13:16:08 +0200 Subject: [PATCH 5/8] #110 use stud for temp directory --- logstash-filter-grok.gemspec | 1 + spec/filters/grok_spec.rb | 42 +++++++++++++++--------------------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/logstash-filter-grok.gemspec b/logstash-filter-grok.gemspec index 9b3db31..96d5ed9 100644 --- a/logstash-filter-grok.gemspec +++ b/logstash-filter-grok.gemspec @@ -23,6 +23,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" s.add_runtime_dependency 'jls-grok', '~> 0.11.3' + s.add_runtime_dependency 'stud', '~> 0.0.22' s.add_runtime_dependency 'logstash-patterns-core' s.add_development_dependency 'logstash-devutils' diff --git a/spec/filters/grok_spec.rb b/spec/filters/grok_spec.rb index 57dc763..ba21260 100644 --- a/spec/filters/grok_spec.rb +++ b/spec/filters/grok_spec.rb @@ -1,6 +1,6 @@ # encoding: utf-8 require "logstash/devutils/rspec/spec_helper" - +require "stud/temporary" module LogStash::Environment # running the grok code outside a logstash package means @@ -668,15 +668,13 @@ def pattern_path(path) end describe "patterns in the 'patterns/' dir override core patterns" do - require 'tmpdir' - require 'tempfile' let(:pattern_dir) { File.join(LogStash::Environment::LOGSTASH_HOME, "patterns") } let(:has_pattern_dir?) { Dir.exist?(pattern_dir) } before do FileUtils.mkdir(pattern_dir) unless has_pattern_dir? - @file = Tempfile.new('grok', pattern_dir) + @file = File.new(File.join(pattern_dir, 'grok.pattern'), 'w+') @file.write('WORD \b[2-5]\b') @file.close end @@ -690,25 +688,23 @@ def pattern_path(path) end after do - @file.unlink + File.unlink @file FileUtils.rm_rf(pattern_dir) if has_pattern_dir? end end describe "patterns in custom dir override those in 'patterns/' dir" do - require 'tmpdir' - require 'tempfile' - let(:tmpdir) { Dir.mktmpdir } + let(:tmpdir) { Stud::Temporary.directory } let(:pattern_dir) { File.join(LogStash::Environment::LOGSTASH_HOME, "patterns") } let(:has_pattern_dir?) { Dir.exist?(pattern_dir) } before do FileUtils.mkdir(pattern_dir) unless has_pattern_dir? - @file1 = Tempfile.new('grok', pattern_dir) + @file1 = File.new(File.join(pattern_dir, 'grok.pattern'), 'w+') @file1.write('WORD \b[2-5]\b') @file1.close - @file2 = Tempfile.new('grok', tmpdir) + @file2 = File.new(File.join(tmpdir, 'grok.pattern'), 'w+') @file2.write('WORD \b[0-1]\b') @file2.close end @@ -722,24 +718,22 @@ def pattern_path(path) end after do - @file1.unlink - @file2.unlink + File.unlink @file1 + File.unlink @file2 FileUtils.remove_entry tmpdir FileUtils.rm_rf(pattern_dir) unless has_pattern_dir? end end describe "patterns with file glob" do - require 'tmpdir' - require 'tempfile' - let(:tmpdir) { Dir.mktmpdir(nil, "/tmp") } + let(:tmpdir) { Stud::Temporary.directory } before do - @file3 = Tempfile.new(['grok', '.pattern'], tmpdir) + @file3 = File.new(File.join(tmpdir, 'grok.pattern'), 'w+') @file3.write('WORD \b[0-1]\b') @file3.close - @file4 = Tempfile.new(['grok', '.pattern.old'], tmpdir) + @file4 = File.new(File.join(tmpdir, 'grok.pattern.old'), 'w+') @file4.write('WORD \b[2-5]\b') @file4.close end @@ -753,23 +747,21 @@ def pattern_path(path) end after do - @file3.unlink - @file4.unlink + File.unlink @file3 + File.unlink @file4 FileUtils.remove_entry tmpdir end end describe "patterns with file glob on directory that contains subdirectories" do - require 'tmpdir' - require 'tempfile' - let(:tmpdir) { Dir.mktmpdir(nil, "/tmp") } + let(:tmpdir) { Stud::Temporary.directory } before do - @file3 = Tempfile.new(['grok', '.pattern'], tmpdir) + @file3 = File.new(File.join(tmpdir, 'grok.pattern'), 'w+') @file3.write('WORD \b[0-1]\b') @file3.close - Dir.mktmpdir(nil, tmpdir) + Dir.mkdir(File.join(tmpdir, "subdir")) end let(:config) do @@ -781,7 +773,7 @@ def pattern_path(path) end after do - @file3.unlink + File.unlink @file3 FileUtils.remove_entry tmpdir end end From de2b5b6234b13160e62e057d1be2a50e4b52746e Mon Sep 17 00:00:00 2001 From: Armin Date: Tue, 9 May 2017 23:24:38 +0200 Subject: [PATCH 6/8] #110 reverted trailing space cleanup --- lib/logstash/filters/grok.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index d8b7224..89e8a66 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -138,7 +138,7 @@ # `SYSLOGBASE` pattern which itself is defined by other patterns. # # Another option is to define patterns _inline_ in the filter using `pattern_definitions`. - # This is mostly for convenience and allows user to define a pattern which can be used just in that + # This is mostly for convenience and allows user to define a pattern which can be used just in that # filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter. # class LogStash::Filters::Grok < LogStash::Filters::Base @@ -168,7 +168,7 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # necessarily need to define this yourself unless you are adding additional # patterns. You can point to multiple pattern directories using this setting. # Note that Grok will read all files in the directory matching the patterns_files_glob - # and assume it's a pattern file (including any tilde backup files). + # and assume it's a pattern file (including any tilde backup files). # [source,ruby] # patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"] # @@ -183,9 +183,9 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # The patterns are loaded when the pipeline is created. config :patterns_dir, :validate => :array, :default => [] - # A hash of pattern-name and pattern tuples defining custom patterns to be used by - # the current filter. Patterns matching existing names will override the pre-existing - # definition. Think of this as inline patterns available just for this definition of + # A hash of pattern-name and pattern tuples defining custom patterns to be used by + # the current filter. Patterns matching existing names will override the pre-existing + # definition. Think of this as inline patterns available just for this definition of # grok config :pattern_definitions, :validate => :hash, :default => {} @@ -237,7 +237,7 @@ class LogStash::Filters::Grok < LogStash::Filters::Base config :overwrite, :validate => :array, :default => [] attr_reader :timeout_enforcer - + # Register default pattern paths @@patterns_path ||= Set.new @@patterns_path += [ @@ -298,7 +298,7 @@ def filter(event) @patterns.each do |field, groks| success = match(groks, field, event) - + if success matched = true break if @break_on_match @@ -337,7 +337,7 @@ def match(groks, field, event) @logger.warn("Grok regexp threw exception", :exception => e.message, :backtrace => e.backtrace, :class => e.class.name) return false end - + private def match_against_groks(groks, field, input, event) input = input.to_s @@ -351,7 +351,7 @@ def match_against_groks(groks, field, input, event) break if @break_on_match end end - + matched end From 9a78649e7f35debd7fb03698047568f69fbdc4f9 Mon Sep 17 00:00:00 2001 From: Armin Date: Tue, 9 May 2017 23:26:20 +0200 Subject: [PATCH 7/8] #110 log skipped folders --- lib/logstash/filters/grok.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index 89e8a66..e43079f 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -390,7 +390,11 @@ def patterns_files_from_paths(paths, glob) Dir.glob(path).each do |file| @logger.trace("Grok loading patterns from file", :path => file) - patternfiles << file unless File.directory?(file) + if File.directory?(file) + @logger.debug("Skipping path because it is a directory", :path => file) + else + patternfiles << file + end end end patternfiles From 44afc8b8e85c4f0bc949b69536d5867cd19b8e56 Mon Sep 17 00:00:00 2001 From: Armin Date: Tue, 9 May 2017 23:51:17 +0200 Subject: [PATCH 8/8] #110 version + changelog --- CHANGELOG.md | 3 +++ logstash-filter-grok.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e083a7..d279b6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 3.4.1 + - Fix subdirectories in a pattern folder causing an exception in some cases + ## 3.4.0 - Add option to define patterns inline in the filter using `pattern_definitions` configuration. diff --git a/logstash-filter-grok.gemspec b/logstash-filter-grok.gemspec index 96d5ed9..7a3a456 100644 --- a/logstash-filter-grok.gemspec +++ b/logstash-filter-grok.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-filter-grok' - s.version = '3.4.0' + s.version = '3.4.1' s.licenses = ['Apache License (2.0)'] s.summary = "Parse arbitrary text and structure it." s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"