gonids: Go Coverage Report

/* Copyright 2019 Google Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package gonids

import (
        "fmt"
        "os"
)

var fuzzInit = false

// FuzzParseRule is used by OSS-Fuzz to fuzz the library.
func FuzzParseRule(data []byte) int {
        if !fuzzInit {
                fmt.Printf("GODEBUG=%s", os.Getenv("GODEBUG"))
                fuzzInit = true
        }
        r, err := ParseRule(string(data))
        if err != nil {
                // Handle parse error
                return 0
        }
        r.OptimizeHTTP()
        _ = r.String()
        return 1
}

/* Copyright 2016 Google Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package gonids

import (
        "errors"
        "fmt"
        "strings"
        "unicode"
        "unicode/utf8"
)

// item represents a token or text string returned from the lexer.
type item struct {
        typ   itemType // The type of this item.
        value string   // The value of this item.
}

// String returns a string describing an item.
func (i item) String() string {
        switch i.typ {
        case itemEOF:
                return "EOF"
        case itemError:
                return i.value
        }
        return fmt.Sprintf("%q: %s", i.typ, i.value)
}

type itemType int

const (
        itemError itemType = iota
        itemComment
        itemAction
        itemProtocol
        itemSourceAddress
        itemSourcePort
        itemDirection
        itemDestinationAddress
        itemDestinationPort
        itemNot
        itemOptionKey
        itemOptionValue
        itemOptionNoValue
        itemOptionValueString
        itemEOR
        itemEOF
)

const eof = -1

// stateFn represents the state of the scanner as a function that returns the next state.
type stateFn func(*lexer) stateFn

// lexer holds the state of the scanner.
type lexer struct {
        input string    // the string being scanned
        state stateFn   // the next lexing function to enter
        pos   int       // current position in the input
        start int       // start position of this item
        width int       // width of last rune read from input
        items chan item // channel of scanned items
}

// next returns the next rune in the input.
func (l *lexer) next() rune {
        if l.pos >= len(l.input) {
                l.width = 0
                return eof
        }
        r, w := utf8.DecodeRuneInString(l.input[l.pos:])
        if r == utf8.RuneError && w == 1 {
                // The whole input string has been validated at init.
                panic("invalid UTF-8 character")
        }
        l.width = w
        l.pos += l.width
        return r
}

// skipNext skips over the next rune in the input.
func (l *lexer) skipNext() {
        l.next()
        l.ignore()
}

// len returns the current length of the item in processing.
func (l *lexer) len() int {
        if l.pos >= len(l.input) {
                return -1
        }
        return l.pos - l.start
}

// backup steps back one rune. Can only be called once per call of next.
func (l *lexer) backup() {
        if l.width == -1 {
                panic("double backup")
        }
        l.pos -= l.width
        l.width = -1
}

// emit passes an item back to the client, trimSpaces can be used to trim spaces around item
// value before emiting.
func (l *lexer) emit(t itemType, trimSpaces bool) {
        input := l.input[l.start:l.pos]
        if trimSpaces {
                input = strings.TrimSpace(input)
        }

        // This is a bit of a hack. We lex until `;` now so we end up with extra `"`.
        input = strings.TrimSuffix(input, `"`)
        l.items <- item{t, input}
        l.start = l.pos
}

// ignore skips over the pending input before this point.
func (l *lexer) ignore() {
        l.start = l.pos
}

// acceptRun consumes a run of runes from the valid set.
func (l *lexer) acceptRun(valid string) {
        for strings.ContainsRune(valid, l.next()) {
        }
        l.backup()
}

// ignoreSpaces ignores all spaces at the start of the input.
func (l *lexer) ignoreSpaces() {
        for unicode.IsSpace(l.next()) {
                l.ignore()
        }
        l.backup()
}

// errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem.
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
        l.items <- item{itemError, fmt.Sprintf(format, args...)}
        return nil
}

func (l *lexer) unexpectedEOF() stateFn {
        return nil
}

// nextItem returns the next item from the input.
func (l *lexer) nextItem() item {
        r, more := <-l.items
        if !more {
                return item{itemError, "unexpected EOF"}
        }
        return r
}

// lex initializes and runs a new scanner for the input string.
func lex(input string) (*lexer, error) {
        if !utf8.ValidString(input) {
                return nil, errors.New("input is not a valid UTF-8 string")
        }
        l := &lexer{
                input: input,
                items: make(chan item, 0x1000),
        }
        go l.run()
        return l, nil
}

// TODO: handle error and corner case in all states.
// run runs the state machine for the lexer.
func (l *lexer) run() {
        for l.state = lexRule; l.state != nil; {
                l.state = l.state(l)
        }
        close(l.items)
}

func (l *lexer) close() {
        // Reads all items until channel close to be sure goroutine has ended.
        more := true
        for more {
                _, more = <-l.items
        }
}

// lexRule starts the scan of a rule.
func lexRule(l *lexer) stateFn {
        r := l.next()
        switch {
        case unicode.IsSpace(r):
                l.ignore()
                return lexRule
        case r == '#':
                return lexComment
        case r == eof:
                l.emit(itemEOF, false)
                return nil
        }
        return lexAction
}

// lexComment consumes a commented rule.
func lexComment(l *lexer) stateFn {
        // Ignore leading spaces and #.
        l.ignore()
        for {
                r := l.next()
                if unicode.IsSpace(r) || r == '#' {
                        l.ignore()
                } else {
                        break
                }
        }
        l.backup()

        for {
                switch l.next() {
                case '\r', '\n':
                        l.emit(itemComment, false)
                        return lexRule
                case eof:
                        l.backup()
                        l.emit(itemComment, false)
                        return lexRule
                }
        }
}

// lexAction consumes a rule action.
func lexAction(l *lexer) stateFn {
        for {
                r := l.next()
                switch {
                case r == ' ':
                        l.emit(itemAction, true)
                        return lexProtocol
                case !unicode.IsLetter(r):
                        return l.errorf("invalid character %q for a rule action", r)
                }
        }
}

// lexProtocol consumes a rule protocol.
func lexProtocol(l *lexer) stateFn {
        l.ignoreSpaces()
        for {
                r := l.next()
                switch {
                case r == ' ':
                        l.emit(itemProtocol, true)
                        return lexSourceAddress
                case !(unicode.IsLetter(r) || unicode.IsDigit(r) || (l.len() > 0 && r == '-')):
                        return l.errorf("invalid character %q for a rule protocol", r)
                }
        }

}

// lexSourceAddress consumes a source address.
func lexSourceAddress(l *lexer) stateFn {
        l.ignoreSpaces()
        for {
                switch l.next() {
                case ' ':
                        l.emit(itemSourceAddress, true)
                        return lexSourcePort
                case eof:
                        return l.unexpectedEOF()
                }
        }
}

// lexSourcePort consumes a source port.
func lexSourcePort(l *lexer) stateFn {
        l.ignoreSpaces()
        for {
                switch l.next() {
                case ' ':
                        l.emit(itemSourcePort, true)
                        return lexDirection
                case eof:
                        return l.unexpectedEOF()
                }
        }
}

// lexDirection consumes a rule direction.
func lexDirection(l *lexer) stateFn {
        l.ignoreSpaces()
        l.acceptRun("<->")
        if r := l.next(); r != ' ' {
                return l.errorf("invalid character %q for a rule direction", r)
        }
        l.emit(itemDirection, true)
        return lexDestinationAddress
}

// lexDestinationAddress consumes a destination address.
func lexDestinationAddress(l *lexer) stateFn {
        l.ignoreSpaces()
        for {
                switch l.next() {
                case ' ':
                        l.emit(itemDestinationAddress, true)
                        return lexDestinationPort
                case eof:
                        return l.unexpectedEOF()
                }
        }
}

// lexDestinationPort consumes a destination port.
func lexDestinationPort(l *lexer) stateFn {
        for {
                switch l.next() {
                case '(':
                        l.backup()
                        l.emit(itemDestinationPort, true)
                        l.skipNext()
                        return lexOptionKey
                case eof:
                        return l.unexpectedEOF()
                }
        }
}

// lexOptionKey scans a key from the rule options.
func lexOptionKey(l *lexer) stateFn {
        for {
                switch l.next() {
                case ':':
                        l.backup()
                        l.emit(itemOptionKey, true)
                        l.skipNext()
                        return lexOptionValueBegin
                case ';':
                        l.backup()
                        if l.pos > l.start {
                                l.emit(itemOptionKey, true)
                                l.emit(itemOptionNoValue, true)
                        }
                        l.skipNext()
                        return lexOptionKey
                case ')':
                        l.backup()
                        if l.pos > l.start {
                                l.emit(itemOptionKey, true)
                        }
                        l.skipNext()
                        return lexRuleEnd
                case eof:
                        return l.unexpectedEOF()
                }
        }
}

// lexOptionValueBegin scans the beginning of a value from the rule option.
func lexOptionValueBegin(l *lexer) stateFn {
        switch l.next() {
        case '"':
                l.ignore()
                return lexOptionValueString
        case ' ':
                l.ignore()
                return lexOptionValueBegin
        case '!':
                l.emit(itemNot, true)
                return lexOptionValueBegin
        }
        return lexOptionValue
}

// lexOptionValueString consumes the inner content of a string value from the rule options.
func lexOptionValueString(l *lexer) stateFn {
        escaped := false
        for {
                switch l.next() {
                case ';':
                        l.backup()
                        l.emit(itemOptionValueString, false)
                        l.skipNext()
                        return lexOptionKey
                case '\\':
                        escaped = !escaped
                        if l.next() != ';' || !escaped {
                                l.backup()
                        }
                case eof:
                        return l.unexpectedEOF()
                default:
                        escaped = false
                }
        }
}

// lexOptionValue scans a value from the rule options.
func lexOptionValue(l *lexer) stateFn {
        for {
                switch l.next() {
                case ';':
                        l.backup()
                        l.emit(itemOptionValue, true)
                        l.skipNext()
                        return lexOptionKey
                case eof:
                        return l.unexpectedEOF()
                }
        }
}

// lexOptionEnd marks the end of a rule.
func lexRuleEnd(l *lexer) stateFn {
        l.emit(itemEOR, false)
        return lexRule
}

/* Copyright 2016 Google Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package gonids

import (
        "bytes"
        "strings"
)

// ShouldBeHTTP returns true if a rule looks like the protocol should be http, but is not.
func (r *Rule) ShouldBeHTTP() bool {
        // If the rule is already HTTP, then stop looking.
        if r.Protocol == "http" {
                return false
        }
        // If we look at http buffers or sticky buffers, we should use the HTTP protocol.
        for _, c := range r.Contents() {
                if strings.HasPrefix(c.DataPosition.String(), "http_") {
                        return true
                }
                for _, co := range c.Options {
                        if strings.HasPrefix(co.Name, "http_") {
                                return true
                        }
                }
        }
        return false
}

// TODO: See if ET folks have any data around this.
// Minimum length of a content to be considered safe for use with a PCRE.
const minPCREContentLen = 5

// Some of these may be caught by min length check, but including for completeness.
// All lower case for case insenstive checks.
// Many of this come from: https://github.com/EmergingThreats/IDSDeathBlossom/blob/master/config/fpblacklist.txt
var bannedContents = []string{"get",
        "post",
        "/",
        "user-agent",
        "user-agent: mozilla",
        "host",
        "index.php",
        "index.php?id=",
        "index.html",
        "content-length",
        ".htm",
        ".html",
        ".php",
        ".asp",
        ".aspx",
        "content-disposition",
        "wp-content/plugins",
        "wp-content/themes",
        "activexobject",
        "default.asp",
        "default.aspx",
        "default.asp",
}

// ExpensivePCRE returns true if a rule appears to use a PCRE without
// conditions that make it expensive to compute.
func (r *Rule) ExpensivePCRE() bool {
        // No PCRE, not expensive.
        if len(r.PCREs()) < 1 {
                return false
        }

        // If we have PCRE, but no contents, this is probably expensive.
        cs := r.Contents()
        if len(cs) < 1 {
                return true
        }

        // Look for a content with sufficient length to make performance acceptable.
        short := true
        for _, c := range cs {
                // TODO: Identify a reasonable length.
                if len(c.Pattern) >= minPCREContentLen {
                        short = false
                }
        }
        if short {
                return true
        }

        // If all content matches are common strings, also not good.
        common := true
        for _, c := range cs {
                if !inSlice(strings.ToLower(strings.Trim(string(c.Pattern), "\r\n :/?")), bannedContents) {
                        common = false
                }
        }
        return common
}

// SnortHTTPHeader returns true if any content contains double CRLF at the end.
func (r *Rule) SnortHTTPHeader() bool {
        cs := r.Contents()
        if len(cs) < 1 {
                return false
        }
        for _, c := range cs {
                if c.SnortHTTPHeader() {
                        return true
                }
        }
        return false
}

// SnortHTTPHeader returns true if a specific content contains double CRLF at the end.
func (c Content) SnortHTTPHeader() bool {
        for _, o := range c.Options {
                if o.Name == "http_header" {
                        if bytes.HasSuffix(c.Pattern, []byte("\r\n\r\n")) {
                                return true
                        }
                }
        }
        return false
}

// NoReferences returns true if there are no references in the rule.
func (r *Rule) NoReferences() bool {
        return len(r.References) == 0
}

// Length at which we warn if all matchers are this Contents with length or shorter.
// Possibly align this with the minPCREContentLength.
const shortContentLen = 4

// OnlyShortContents returns true if all Matchers are Contents and all matches are very short.
func (r *Rule) OnlyShortContents() bool {
        // There are non-Content matches in the rule.
        cs := r.Contents()
        if len(r.Matchers) != len(cs) {
                return false
        }
        for _, c := range cs {
                // Some content is longer than the minimum.
                if len(c.Pattern) > shortContentLen {
                        return false
                }
        }
        return true
}

/* Copyright 2016 Google Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package gonids

import (
        "bytes"
)

// Suricata 4.x content options mapped to Suricata 5.0 sticky buffers.
var cOptToStickyBuffer = map[string]DataPos{
        // HTTP Content Modifiers
        "http_client_body":  httpClientBody,
        "http_cookie":       httpCookie,
        "http_header":       httpHeader,
        "http_host":         httpHost,
        "http_method":       httpMethod,
        "http_raw_header":   httpHeaderRaw,
        "http_raw_host":     httpHostRaw,
        "http_raw_uri":      httpURIRaw,
        "http_request_line": httpRequestLine5,
        "http_server_body":  httpServerBody,
        "http_stat_code":    httpStatCode,
        "http_stat_msg":     httpStatMsg,
        "http_uri":          httpURI,
        "http_user_agent":   httpUserAgent,
}

var suri4StickyTo5Sticky = map[DataPos]DataPos{
        fileData: fileData5,
        // HTTP
        httpAccept:       httpAccept5,
        httpAcceptEnc:    httpAcceptEnc5,
        httpAcceptLang:   httpAcceptLang5,
        httpConnection:   httpConnection5,
        httpContentLen:   httpContentLen5,
        httpContentType:  httpContentType5,
        httpHeaderNames:  httpHeaderNames5,
        httpProtocol:     httpProtocol5,
        httpReferer:      httpReferer5,
        httpRequestLine:  httpRequestLine5,
        httpResponseLine: httpResponseLine5,
        httpStart:        httpStart5,
        // TLS
        tlsCertSubject:     tlsCertSubject5,
        tlsCertIssuer:      tlsCertIssuer5,
        tlsCertSerial:      tlsCertSerial5,
        tlsCertFingerprint: tlsCertFingerprint5,
        tlsSNI:             tlsSNI5,
        // JA3
        ja3Hash:   ja3Hash5,
        ja3String: ja3String5,
        // SSH
        sshProto:    sshProto5,
        sshSoftware: sshSoftware5,
        // DNS
        dnsQuery: dnsQuery5,
}

// OptimizeHTTP tunes an old style rule to leverage port independent HTTP detection.
func (r *Rule) OptimizeHTTP() bool {
        if !r.ShouldBeHTTP() {
                return false
        }
        // Switch protocol to HTTP.
        r.Protocol = "http"

        // Make detection port independent.
        for i, p := range r.Source.Ports {
                if p == "$HTTP_PORTS" {
                        r.Source.Ports[i] = "any"
                }
        }

        for i, p := range r.Destination.Ports {
                if p == "$HTTP_PORTS" {
                        r.Destination.Ports[i] = "any"
                }
        }

        // Annotate rule to indicate modification
        r.Metas = append(r.Metas, MetadataModifier("http_optimize"))
        return true
}

// SnortURILenFix will optimize a urilen keyword from a Snort rule for Suricata.
func (r *Rule) SnortURILenFix() bool {
        var modified bool
        // Update this once we parse urilen in a better structure.
        for _, l := range r.LenMatchers() {
                if l.Kind == uriLen && l.Operator == "<>" {
                        l.Min--
                        l.Max++
                        modified = true
                }
                setRaw := true
                for _, o := range l.Options {
                        if o == "norm" || o == "raw" {
                                // If Snort rule specified norm or raw, trust author.
                                setRaw = false
                                break
                        }
                }
                // If author did not specify, set 'raw'.
                if setRaw {
                        modified = true
                        l.Options = append(l.Options, "raw")
                }
        }
        if modified {
                r.Metas = append(r.Metas, MetadataModifier("snort_urilen"))
        }
        return modified
}

// SnortHTTPHeaderFix will fix broken http_header matches.
func (r *Rule) SnortHTTPHeaderFix() bool {
        var modified bool
        if !r.SnortHTTPHeader() {
                return false
        }
        for i, m := range r.Matchers {
                // If this is a content, check it out.
                if c, ok := m.(*Content); ok {
                        if c.SnortHTTPHeader() {
                                modified = true
                                c.Pattern = bytes.TrimSuffix(c.Pattern, []byte("\r\n"))
                                if err := r.InsertMatcher(&ByteMatch{Kind: isDataAt, Negate: true, NumBytes: "1"}, i+1); err != nil {
                                        return false
                                }
                        }
                }
        }

        if modified {
                r.Metas = append(r.Metas, MetadataModifier("snort_http_header"))
        }
        return modified
}

// UpgradeToSuri5 optimizes a Suricata 4.x rule to Suricata 5.x features.
func (r *Rule) UpgradeToSuri5() bool {
        var modified bool
        for _, c := range r.Contents() {
                for i, opt := range c.Options {
                        if sticky, ok := cOptToStickyBuffer[opt.Name]; ok {
                                // Remove the old modifier.
                                // TODO(duane): Find a better way to handle this. If I break this into another function I need
                                // to iterate again across everything.
                                if i < len(c.Options)-1 {
                                        copy(c.Options[i:], c.Options[i+1:])
                                }
                                c.Options[len(c.Options)-1] = nil // or the zero value of T
                                c.Options = c.Options[:len(c.Options)-1]

                                c.DataPosition = sticky
                                modified = true
                        }
                }
                // old sticky buffer to new sticky buffer
                if sticky, ok := suri4StickyTo5Sticky[c.DataPosition]; ok {
                        c.DataPosition = sticky
                        modified = true
                }
        }

        if modified {
                r.Metas = append(r.Metas, MetadataModifier("upgrade_to_suri5"))
        }
        return modified
}

// MetadataModifier returns a metadata that identifies a given modification.
func MetadataModifier(s string) *Metadata {
        return &Metadata{Key: "gonids", Value: s}
}

/* Copyright 2016 Google Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package gonids implements a basic parser of IDS rules.
//
// For now the parser is very basic and it only parses a subset of fields.
// We intentionally omit http_encode as it doesn't seem to be used in practice.
package gonids

import (
        "encoding/hex"
        "errors"
        "fmt"
        "net"
        "regexp"
        "strconv"
        "strings"
)

// hexRE matches on hexadecimal content like |41 41 41| for example.
var hexRE = regexp.MustCompile(`(?i)(\|(?:\s*[a-f0-9]{2}\s*)+\|)`)

// escapeRE matches char that needs to escaped in regexp.
var escapeRE = regexp.MustCompile(`([()+.'\\])`)

// escapeContent matches escaped special characters.
var escapeContent = regexp.MustCompile(`\\([\\;":])`)

// metaSplitRE matches string in metadata.
var metaSplitRE = regexp.MustCompile(`,\s*`)

// nestedNetRE matches nested network groups.
var nestedNetRE = regexp.MustCompile(`,(!?\[[^]]*\])`)

// portSplitRE splits port lists and ranges for validation.
var portSplitRE = regexp.MustCompile(`[:,]`)

var appLayerProtocols = []string{
        "dcerpc",
        "dhcp",
        "dnp3",
        "dns",
        "enip",
        "ftp",
        "ftp-data",
        "http",
        "http2",
        "icmp",
        "icmpv4",
        "icmpv6",
        "ikev2",
        "imap",
        "ip",
        "ip4",
        "ip6",
        "ipv4",
        "ipv6",
        "irc",
        "jabber",
        "krb5",
        "modbus",
        "mqtt",
        "nfs",
        "ntp",
        "pkthdr",
        "rdp",
        "rfb",
        "sctp",
        "sip",
        "smb",
        "smtp",
        "snmp",
        "ssh",
        "tcp",
        "tcp-pkt",
        "tcp-stream",
        "tftp",
        "tls",
        "udp",
}

// parseContent decodes rule content match. For now it only takes care of escaped and hex
// encoded content.
func parseContent(content string) ([]byte, error) {
        // Decode and replace all occurrences of hexadecimal content.
        var errpanic error
        defer func() {
                r := recover()
                if r != nil {
                        errpanic = fmt.Errorf("recovered from panic: %v", r)
                }
        }()

        if containsUnescaped(content) {
                return nil, fmt.Errorf("invalid special characters escaping")
        }

        b := escapeContent.ReplaceAllString(content, "$1")

        b = hexRE.ReplaceAllStringFunc(b,
                func(h string) string {
                        r, err := hex.DecodeString(strings.Replace(strings.Trim(h, "|"), " ", "", -1))
                        if err != nil {
                                panic("invalid hexRE regexp")
                        }
                        return string(r)
                })
        return []byte(b), errpanic
}

// parsePCRE parses the components of a PCRE. Returns PCRE struct.
func parsePCRE(s string) (*PCRE, error) {
        c := strings.Count(s, "/")
        if c < 2 {
                return nil, fmt.Errorf("all pcre patterns must contain at least 2 '/', found: %d", c)
        }

        l := strings.LastIndex(s, "/")
        if l < 0 {
                return nil, fmt.Errorf("couldn't find options in PCRE")
        }

        i := strings.Index(s, "/")
        if l < 0 {
                return nil, fmt.Errorf("couldn't find start of pattern")
        }

        return &PCRE{
                Pattern: []byte(s[i+1 : l]),
                Options: []byte(s[l+1:]),
        }, nil
}

// parseLenMatch parses a LenMatch (like urilen).
func parseLenMatch(k lenMatchType, s string) (*LenMatch, error) {
        m := new(LenMatch)
        m.Kind = k
        switch {
        // Simple case, no operators.
        case !strings.ContainsAny(s, "><"):
                // Ignore options after ','.
                numTmp := strings.Split(s, ",")[0]
                num, err := strconv.Atoi(strings.TrimSpace(numTmp))
                if err != nil {
                        return nil, fmt.Errorf("%v is not an integer", s)
                }
                m.Num = num

        // Leading operator, single number.
        case strings.HasPrefix(s, ">") || strings.HasPrefix(s, "<"):
                m.Operator = s[0:1]
                // Strip leading < or >.
                numTmp := strings.TrimLeft(s, "><")
                // Ignore options after ','.
                numTmp = strings.Split(numTmp, ",")[0]
                num, err := strconv.Atoi(strings.TrimSpace(numTmp))
                if err != nil {
                        return nil, fmt.Errorf("%v is not an integer", s)
                }
                m.Num = num

        // Min/Max center operator.
        case strings.Contains(s, "<>"):
                m.Operator = "<>"
                parts := strings.Split(s, "<>")
                if len(parts) != 2 {
                        return nil, fmt.Errorf("must have exactly 2 parts for min/max operator. got %d", len(parts))
                }
                var min, max int
                var err error
                min, err = strconv.Atoi(strings.TrimSpace(parts[0]))
                if err != nil {
                        return nil, fmt.Errorf("%v is not an integer", strings.TrimSpace(parts[0]))
                }
                maxTmp := strings.Split(parts[1], ",")[0]
                max, err = strconv.Atoi(strings.TrimSpace(maxTmp))
                if err != nil {
                        return nil, fmt.Errorf("%v is not an integer", strings.TrimSpace(maxTmp))
                }
                // Do stuff to handle options here.
                m.Min = min
                m.Max = max
        }

        // Parse options:
        if strings.Contains(s, ",") {
                opts := strings.Split(s, ",")[1:]
                for i, o := range opts {
                        opts[i] = strings.TrimSpace(o)
                }
                m.Options = opts
        }
        return m, nil
}

func parseBase64Decode(k byteMatchType, s string) (*ByteMatch, error) {
        if k != b64Decode {
                return nil, fmt.Errorf("kind %v is not base64_decode", k)
        }
        b := new(ByteMatch)
        b.Kind = k

        // All options to base64_decode are optional, and specified by their keyword.
        for _, p := range strings.Split(s, ",") {
                v := strings.TrimSpace(p)
                switch {
                case strings.HasPrefix(v, "bytes"):
                        b.NumBytes = strings.TrimSpace(strings.SplitAfter(v, "bytes")[1])
                case strings.HasPrefix(v, "offset"):
                        val := strings.TrimSpace(strings.SplitAfter(v, "offset")[1])
                        i, err := strconv.Atoi(val)
                        if err != nil {
                                return nil, fmt.Errorf("offset is not an int: %s; %s", val, err)
                        }
                        if i < 1 {
                                return nil, fmt.Errorf("offset must be positive, non-zero values only")
                        }
                        b.Offset = i
                case strings.HasPrefix(v, "relative"):
                        b.Options = []string{"relative"}
                }
        }
        return b, nil
}

// parseByteMatch parses a ByteMatch.
func parseByteMatch(k byteMatchType, s string) (*ByteMatch, error) {
        b := new(ByteMatch)
        b.Kind = k

        parts := strings.Split(s, ",")

        // Num bytes is required for all byteMatchType keywords.
        if len(parts) < 1 {
                return nil, fmt.Errorf("%s keyword has %d parts", s, len(parts))
        }

        b.NumBytes = strings.TrimSpace(parts[0])

        if len(parts) < b.Kind.minLen() {
                return nil, fmt.Errorf("invalid %s length: %d", b.Kind, len(parts))
        }

        if k == bExtract || k == bJump {
                // Parse offset.
                offset, err := strconv.Atoi(strings.TrimSpace(parts[1]))
                if err != nil {
                        return nil, fmt.Errorf("%s offset is not an int: %v; %s", b.Kind, parts[1], err)
                }
                b.Offset = offset
        }

        if k == bExtract {
                // Parse variable name.
                name := parts[2]
                b.Variable = name
        }

        if k == bTest {
                // Parse operator.
                b.Operator = strings.TrimSpace(parts[1])
                // Parse value. Can use a variable.
                b.Value = strings.TrimSpace(parts[2])
                // Parse offset.
                offset, err := strconv.Atoi(strings.TrimSpace(parts[3]))
                if err != nil {
                        return nil, fmt.Errorf("%s offset is not an int: %v; %s", b.Kind, parts[1], err)
                }
                b.Offset = offset
        }

        // The rest of the options, for all types not b64decode
        for i, l := b.Kind.minLen(), len(parts); i < l; i++ {
                parts[i] = strings.TrimSpace(parts[i])
                b.Options = append(b.Options, parts[i])
        }

        return b, nil
}

// parseFlowbit parses a flowbit.
func parseFlowbit(s string) (*Flowbit, error) {
        parts := strings.Split(s, ",")
        if len(parts) < 1 {
                return nil, fmt.Errorf("couldn't parse flowbit string: %s", s)
        }
        // Ensure all actions are of valid type.
        a := strings.TrimSpace(parts[0])
        if !inSlice(a, []string{"noalert", "isset", "isnotset", "set", "unset", "toggle"}) {
                return nil, fmt.Errorf("invalid action for flowbit: %s", a)
        }
        fb := &Flowbit{
                Action: a,
        }
        if fb.Action == "noalert" && len(parts) > 1 {
                return nil, fmt.Errorf("noalert shouldn't have a value")
        }
        if len(parts) == 2 {
                fb.Value = strings.TrimSpace(parts[1])
        }
        return fb, nil
}

// parseXbit parses an xbit.
func parseXbit(s string) (*Xbit, error) {
        parts := strings.Split(s, ",")
        // All xbits must have an action, name and track
        if len(parts) < 3 {
                return nil, fmt.Errorf("not enough parts for xbits: %s", s)
        }
        // Ensure all actions are of valid type.
        a := strings.TrimSpace(parts[0])
        if !inSlice(a, []string{"set", "unset", "isset", "isnotset", "toggle"}) {
                return nil, fmt.Errorf("invalid action for xbits: %s", a)
        }
        xb := &Xbit{
                Action: a,
                Name:   strings.TrimSpace(parts[1]),
        }

        // Track.
        t := strings.Fields(parts[2])
        if len(t) != 2 {
                return nil, fmt.Errorf("wrong number of parts for track: %v", t)
        }
        if t[0] != "track" {
                return nil, fmt.Errorf("%s should be 'track'", t[0])
        }
        xb.Track = t[1]

        // Expire
        if len(parts) == 4 {
                e := strings.Fields(parts[3])
                if len(e) != 2 {
                        return nil, fmt.Errorf("wrong number of parts for expire: %v", e)
                }
                if e[0] != "expire" {
                        return nil, fmt.Errorf("%s should be 'expire'", e[0])
                }
                xb.Expire = e[1]
        }
        return xb, nil

}

// parseFlowint parses a flowint.
func parseFlowint(s string) (*Flowint, error) {
        parts := strings.Split(s, ",")
        // All flowints must have a name and modifier
        if len(parts) < 2 {
                return nil, fmt.Errorf("not enough parts for flowint: %s", s)
        }
        // Ensure all actions are of valid type.
        m := strings.TrimSpace(parts[1])
        if !inSlice(m, []string{"+", "-", "=", ">", "<", ">=", "<=", "==", "!=", "isset", "isnotset"}) {
                return nil, fmt.Errorf("invalid modifier for flowint: %s", m)
        }
        fi := &Flowint{
                Name:     strings.TrimSpace(parts[0]),
                Modifier: m,
        }

        if len(parts) == 3 {
                fi.Value = strings.TrimSpace(parts[2])
        }

        return fi, nil
}

// containsUnescaped checks content whether special characters are properly escaped.
func containsUnescaped(s string) bool {
        esc := false

        for _, b := range s {
                if esc {
                        switch b {
                        case '\\', ';', '"', ':':
                                esc = false
                        default:
                                return true
                        }
                } else {
                        switch b {
                        case '\\':
                                esc = true
                        case ';', '"':
                                return true
                        }
                }
        }

        return esc
}

func unquote(s string) string {
        if strings.IndexByte(s, '"') < 0 {
                return s
        }
        return strings.Replace(s, `\"`, `"`, -1)
}

func inSlice(str string, strings []string) bool {
        for _, k := range strings {
                if str == k {
                        return true
                }
        }
        return false
}

// comment decodes a comment (commented rule, or just a comment.)
func (r *Rule) comment(key item, l *lexer) error {
        if key.typ != itemComment {
                panic("item is not a comment")
        }
        if r.Disabled {
                // ignoring comment for rule with empty action
                return nil
        }
        rule, err := parseRuleAux(key.value, true)

        // If there was an error this means the comment is not a rule.
        if err != nil {
                return fmt.Errorf("this is not a rule: %s", err)
        }

        // We parsed a rule, this was a comment so set the rule to disabled.
        rule.Disabled = true

        // Overwrite the rule we're working on with the recently parsed, disabled rule.
        *r = *rule
        return nil
}

// action decodes an IDS rule option based on its key.
func (r *Rule) action(key item, l *lexer) error {
        if key.typ != itemAction {
                panic("item is not an action")
        }
        if !inSlice(key.value, []string{"alert", "drop", "pass"}) {
                return fmt.Errorf("invalid action: %v", key.value)
        }
        r.Action = key.value
        return nil
}

// protocol decodes an IDS rule protocol based on its key.
func (r *Rule) protocol(key item, l *lexer) error {
        if key.typ != itemProtocol {
                panic("item is not a protocol")
        }
        if !inSlice(key.value, appLayerProtocols) {
                return fmt.Errorf("invalid protocol: %v", key.value)
        }
        r.Protocol = key.value
        return nil
}

// network decodes an IDS rule network (networks and ports) based on its key.
func (r *Rule) network(key item, l *lexer) error {
        // This is a hack. We use a regexp to replace the outer `,` with `___`
        // to give us a discrete string to split on, avoiding the inner `,`.

        // Specify TrimSuffix and TrimPrefix to ensure only one instance of `[` and `]` are trimmed.
        tmp := strings.TrimSuffix(strings.TrimPrefix(key.value, "["), "]")
        items := strings.Split(nestedNetRE.ReplaceAllString(tmp, "___${1}"), "___")

        // Validate that no items contain spaces.
        for _, i := range items {
                if len(strings.Fields(i)) > 1 || len(strings.TrimSpace(i)) != len(i) {
                        return fmt.Errorf("network component contains spaces: %v", i)
                }
        }
        switch key.typ {
        case itemSourceAddress:
                if validNetworks(items) {
                        r.Source.Nets = append(r.Source.Nets, items...)
                } else {
                        return fmt.Errorf("some or all source ips are invalid: %v", items)
                }
        case itemSourcePort:
                if portsValid(items) {
                        r.Source.Ports = append(r.Source.Ports, items...)
                } else {
                        return fmt.Errorf("some or all source ports are invalid: %v", items)
                }
        case itemDestinationAddress:
                if validNetworks(items) {
                        r.Destination.Nets = append(r.Destination.Nets, items...)
                } else {
                        return fmt.Errorf("some or all destination ips are invalid: %v", items)
                }
        case itemDestinationPort:
                if portsValid(items) {
                        r.Destination.Ports = append(r.Destination.Ports, items...)
                } else {
                        return fmt.Errorf("some or all destination ports are invalid: %v", items)
                }
        default:
                panic("item is not a network component")
        }
        return nil
}

// Validate that every item is between 1 and 65535.
func portsValid(p []string) bool {
        for _, u := range p {

                if strings.Count(u, "[") != strings.Count(u, "]") {
                        // unbalanced groups.
                        return false
                }

                u = strings.TrimPrefix(u, "!")
                // If this port range is a grouping, check the inner group.
                if strings.HasPrefix(u, "[") {
                        if portsValid(strings.Split(strings.Trim(u, "[]"), ",")) {
                                continue
                        }
                        return false
                }

                ports := portSplitRE.Split(u, -1)
                for _, port := range ports {
                        port = strings.TrimPrefix(port, "!")
                        if port == "any" || port == "" || strings.HasPrefix(port, "$") {
                                continue
                        }
                        x, err := strconv.Atoi(port)
                        if err != nil {
                                return false
                        }
                        if x > 65535 || x < 0 {
                                return false
                        }
                }
        }
        return true
}

// Validate item is either a valid ip or ip range.
func validNetwork(i string) bool {
        _, _, err := net.ParseCIDR(i)
        if err == nil {
                return true
        }
        if net.ParseIP(i) != nil {
                return true
        }
        return false
}

// Validate every item is either a valid ip or ip range.
func validNetworks(nets []string) bool {
        for _, net := range nets {
                if strings.Count(net, "[") != strings.Count(net, "]") {
                        // unbalanced groups.
                        return false
                }

                net = strings.TrimPrefix(net, "!")
                // If this network is a grouping, check the inner group.
                if strings.HasPrefix(net, "[") || strings.Contains(net, ",") {
                        if validNetworks(strings.Split(strings.Trim(net, "[]"), ",")) {
                                continue
                        }
                        return false
                }
                switch {
                case net == "any":
                        continue
                case strings.HasPrefix(net, "$"):
                        continue
                case !validNetwork(net):
                        return false
                }
        }
        return true
}

// direction decodes an IDS rule direction based on its key.
func (r *Rule) direction(key item, l *lexer) error {
        if key.typ != itemDirection {
                panic("item is not a direction")
        }
        switch key.value {
        case "->":
                r.Bidirectional = false
        case "<>":
                r.Bidirectional = true
        default:
                return fmt.Errorf("invalid direction operator %q", key.value)
        }
        return nil
}

var dataPosition = pktData

// option decodes an IDS rule option based on its key.
func (r *Rule) option(key item, l *lexer) error {
        if key.typ != itemOptionKey {
                panic("item is not an option key")
        }
        switch {
        // TODO: Many of these simple tags could be factored into nicer structures.
        case inSlice(key.value, []string{"classtype", "flow", "tag", "priority", "app-layer-protocol", "noalert", "target",
                "flags", "ipopts", "ip_proto", "geoip", "fragbits", "fragoffset", "tos",
                "window",
                "threshold", "detection_filter",
                "dce_iface", "dce_opnum", "dce_stub_data",
                "asn1"}):
                nextItem := l.nextItem()
                if nextItem.typ != itemOptionValue {
                        return fmt.Errorf("no valid value for %s tag", key.value)
                }
                if r.Tags == nil {
                        r.Tags = make(map[string]string)
                }
                r.Tags[key.value] = nextItem.value
        case inSlice(key.value, []string{"sameip", "tls.store", "ftpbounce"}):
                r.Statements = append(r.Statements, key.value)
        case inSlice(key.value, tlsTags):
                t := &TLSTag{
                        Key: key.value,
                }
                nextItem := l.nextItem()
                if nextItem.typ == itemNot {
                        t.Negate = true
                        nextItem = l.nextItem()
                }
                t.Value = nextItem.value
                r.TLSTags = append(r.TLSTags, t)
        case key.value == "stream_size":
                nextItem := l.nextItem()
                parts := strings.Split(nextItem.value, ",")
                if len(parts) != 3 {
                        return fmt.Errorf("invalid number of parts for stream_size: %d", len(parts))
                }
                num, err := strconv.Atoi(strings.TrimSpace(parts[2]))
                if err != nil {
                        return fmt.Errorf("comparison number is not an integer: %v", parts[2])
                }
                r.StreamMatch = &StreamCmp{
                        Direction: parts[0],
                        Operator:  parts[1],
                        Number:    num,
                }
        case key.value == "reference":
                nextItem := l.nextItem()
                if nextItem.typ != itemOptionValue {
                        return errors.New("no valid value for reference")
                }
                refs := strings.SplitN(nextItem.value, ",", 2)
                if len(refs) != 2 {
                        return fmt.Errorf("invalid reference definition: %s", refs)
                }
                r.References = append(r.References, &Reference{Type: refs[0], Value: refs[1]})
        case key.value == "metadata":
                nextItem := l.nextItem()
                if nextItem.typ != itemOptionValue {
                        return errors.New("no valid value for metadata")
                }
                metas := metaSplitRE.Split(nextItem.value, -1)
                for _, kv := range metas {
                        metaTmp := strings.SplitN(kv, " ", 2)
                        if len(metaTmp) != 2 {
                                return fmt.Errorf("invalid metadata definition: %s", metaTmp)
                        }
                        r.Metas = append(r.Metas, &Metadata{Key: strings.TrimSpace(metaTmp[0]), Value: strings.TrimSpace(metaTmp[1])})
                }
        case key.value == "sid":
                nextItem := l.nextItem()
                if nextItem.typ != itemOptionValue {
                        return errors.New("no value for option sid")
                }
                sid, err := strconv.Atoi(nextItem.value)
                if err != nil {
                        return fmt.Errorf("invalid sid %s", nextItem.value)
                }
                r.SID = sid
        case key.value == "rev":
                nextItem := l.nextItem()
                if nextItem.typ != itemOptionValue {
                        return errors.New("no value for option rev")
                }
                rev, err := strconv.Atoi(nextItem.value)
                if err != nil {
                        return fmt.Errorf("invalid rev %s", nextItem.value)
                }
                r.Revision = rev
        case key.value == "msg":
                nextItem := l.nextItem()
                if nextItem.typ != itemOptionValueString {
                        return errors.New("no value for option msg")
                }
                r.Description = nextItem.value
        case isStickyBuffer(key.value):
                var d DataPos
                var err error
                if d, err = StickyBuffer(key.value); err != nil {
                        return err
                }
                dataPosition = d
        case inSlice(key.value, []string{"content", "uricontent"}):
                nextItem := l.nextItem()
                negate := false
                if nextItem.typ == itemNot {
                        nextItem = l.nextItem()
                        negate = true
                }
                if nextItem.typ == itemOptionValueString {
                        c, err := parseContent(nextItem.value)
                        if err != nil {
                                return err
                        }
                        var options []*ContentOption
                        if key.value == "uricontent" {
                                options = append(options, &ContentOption{Name: "http_uri"})
                        }
                        con := &Content{
                                DataPosition: dataPosition,
                                Pattern:      c,
                                Negate:       negate,
                                Options:      options,
                        }
                        r.Matchers = append(r.Matchers, con)
                } else {
                        return fmt.Errorf("invalid type %q for option content", nextItem.typ)
                }
        case inSlice(key.value, []string{"http_cookie", "http_raw_cookie", "http_method", "http_header", "http_raw_header",
                "http_uri", "http_raw_uri", "http_user_agent", "http_stat_code", "http_stat_msg",
                "http_client_body", "http_server_body", "http_host", "nocase", "rawbytes", "startswith", "endswith"}):
                lastContent := r.LastContent()
                if lastContent == nil {
                        return fmt.Errorf("invalid content option %q with no content match", key.value)
                }
                lastContent.Options = append(lastContent.Options, &ContentOption{Name: key.value})
        case inSlice(key.value, []string{"depth", "distance", "offset", "within"}):
                lastContent := r.LastContent()
                if lastContent == nil {
                        return fmt.Errorf("invalid content option %q with no content match", key.value)
                }
                nextItem := l.nextItem()
                if nextItem.typ != itemOptionValue {
                        return fmt.Errorf("no value for content option %s", key.value)
                }

                lastContent.Options = append(lastContent.Options, &ContentOption{Name: key.value, Value: nextItem.value})

        case key.value == "fast_pattern":
                lastContent := r.LastContent()
                if lastContent == nil {
                        return fmt.Errorf("invalid content option %q with no content match", key.value)
                }
                var (
                        only   bool
                        offset int
                        length int
                )
                nextItem := l.nextItem()
                if nextItem.typ == itemOptionValue {
                        v := nextItem.value
                        switch {
                        case v == "only":
                                only = true
                        case strings.Contains(v, ","):
                                s := strings.Split(v, ",")
                                i, err := strconv.Atoi(s[0])
                                if err != nil {
                                        return fmt.Errorf("fast_pattern offset is not an int: %s; %s", s[0], err)
                                }
                                offset = i
                                i, err = strconv.Atoi(s[1])
                                if err != nil {
                                        return fmt.Errorf("fast_pattern length is not an int: %s; %s", s[1], err)
                                }
                                length = i
                        }
                }
                lastContent.FastPattern = FastPattern{true, only, offset, length}
        case key.value == "pcre":
                nextItem := l.nextItem()
                negate := false
                if nextItem.typ == itemNot {
                        nextItem = l.nextItem()
                        negate = true
                }
                if nextItem.typ == itemOptionValueString {
                        p, err := parsePCRE(unquote(nextItem.value))
                        if err != nil {
                                return err
                        }
                        p.DataPosition = dataPosition
                        p.Negate = negate
                        r.Matchers = append(r.Matchers, p)
                } else {
                        return fmt.Errorf("invalid type %q for option content", nextItem.typ)
                }
        case inSlice(key.value, allbyteMatchTypeNames()):
                k, err := byteMatcher(key.value)
                if err != nil {
                        return fmt.Errorf("%s is not a supported byteMatchType keyword", key.value)
                }

                // Handle negation logic here, don't want to pass lexer to parseByteMatch.
                nextItem := l.nextItem()
                var negate bool
                if k == isDataAt && nextItem.typ == itemNot {
                        negate = true
                        nextItem = l.nextItem()
                }

                var b *ByteMatch
                // Parse base64_decode differently as it has odd semantics.
                if k == b64Decode {
                        b, err = parseBase64Decode(k, nextItem.value)
                        if err != nil {
                                return fmt.Errorf("could not parse base64Decode: %v", err)
                        }
                        // base64_decode allows NumBytes to be empty, an int or a variable.
                        if i, err := strconv.Atoi(b.NumBytes); err != nil && b.NumBytes != "" {
                                // NumBytes is not an int, check if it is a variable from byte_extract.
                                if !r.HasVar(b.NumBytes) {
                                        return fmt.Errorf("number of bytes is not an int, or an extracted variable: %s; %s", b.NumBytes, err)
                                } else if i < 1 {
                                        return fmt.Errorf("bytes must be positive, non-zero values only: %d", i)
                                }
                        }
                } else {
                        b, err = parseByteMatch(k, nextItem.value)
                        if err != nil {
                                return fmt.Errorf("could not parse byteMatch: %v", err)
                        }
                        if _, err := strconv.Atoi(b.NumBytes); err != nil {
                                // NumBytes is not an int, check if it is a variable from byte_extract.
                                if !r.HasVar(b.NumBytes) {
                                        return fmt.Errorf("number of bytes is not an int, or an extracted variable: %s; %s", b.NumBytes, err)
                                }
                        }
                }
                b.Negate = negate

                r.Matchers = append(r.Matchers, b)
        case inSlice(key.value, allLenMatchTypeNames()):
                k, err := lenMatcher(key.value)
                if err != nil {
                        return fmt.Errorf("%s is not a support lenMatch keyword", key.value)
                }
                nextItem := l.nextItem()
                m, err := parseLenMatch(k, nextItem.value)
                if err != nil {
                        return fmt.Errorf("could not parse LenMatch: %v", err)
                }
                m.DataPosition = dataPosition
                r.Matchers = append(r.Matchers, m)
        case key.value == "flowbits":
                nextItem := l.nextItem()
                fb, err := parseFlowbit(nextItem.value)
                if err != nil {
                        return fmt.Errorf("error parsing flowbit: %v", err)
                }
                r.Flowbits = append(r.Flowbits, fb)
        case key.value == "xbits":
                nextItem := l.nextItem()
                xb, err := parseXbit(nextItem.value)
                if err != nil {
                        return fmt.Errorf("error parsing xbits: %v", err)
                }
                r.Xbits = append(r.Xbits, xb)
        case key.value == "flowint":
                nextItem := l.nextItem()
                fi, err := parseFlowint(nextItem.value)
                if err != nil {
                        return fmt.Errorf("error parsing flowint: %v", err)
                }
                r.Flowints = append(r.Flowints, fi)
        default:
                return &UnsupportedOptionError{
                        Options: []string{key.value},
                }
        }
        return nil
}

// UnsupportedOptionError contains a partially parsed rule, and the options that aren't
// supported for parsing.
type UnsupportedOptionError struct {
        Rule    *Rule
        Options []string
}

// Error returns a string for UnsupportedOptionError
func (uoe *UnsupportedOptionError) Error() string {
        return fmt.Sprintf("rule contains unsupported option(s): %s", strings.Join(uoe.Options, ","))
}

// parseRuleAux parses an IDS rule, optionally ignoring comments.
func parseRuleAux(rule string, commented bool) (*Rule, error) {
        l, err := lex(rule)
        if err != nil {
                return nil, err
        }
        defer l.close()
        dataPosition = pktData
        r := &Rule{}
        var unsupportedOptions = make([]string, 0, 3)
        for item := l.nextItem(); item.typ != itemEOR && item.typ != itemEOF && err == nil; item = l.nextItem() {
                switch item.typ {
                case itemComment:
                        if r.Action != "" || commented {
                                // Ignore comment ending rule.
                                return r, nil
                        }
                        err = r.comment(item, l)
                        // Error here means that the comment was not a commented rule.
                        // So we're not parsing a rule and we need to break out.
                        if err != nil {
                                break
                        }
                        // This line was a commented rule.
                        return r, nil
                case itemAction:
                        err = r.action(item, l)
                case itemProtocol:
                        err = r.protocol(item, l)
                case itemSourceAddress, itemDestinationAddress, itemSourcePort, itemDestinationPort:
                        err = r.network(item, l)
                case itemDirection:
                        err = r.direction(item, l)
                case itemOptionKey:
                        err = r.option(item, l)
                        // We will continue to parse a rule with unsupported options.
                        if uerr, ok := err.(*UnsupportedOptionError); ok {
                                unsupportedOptions = append(unsupportedOptions, uerr.Options...)
                                // This is ugly but allows the parsing to continue.
                                err = nil
                        }
                case itemError:
                        err = errors.New(item.value)
                }
                // Unrecoverable parse error.
                if err != nil {
                        return nil, err
                }
        }

        // If we encountered one or more unsupported keys, return an UnsupportedOptionError.
        if len(unsupportedOptions) > 0 {
                return nil, &UnsupportedOptionError{
                        Rule:    r,
                        Options: unsupportedOptions,
                }
        }

        return r, nil
}

// ParseRule parses an IDS rule and returns a struct describing the rule.
func ParseRule(rule string) (*Rule, error) {
        return parseRuleAux(rule, false)
}

/* Copyright 2016 Google Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package gonids

import (
        "bytes"
        "fmt"
        "regexp"
        "strconv"
        "strings"
)

// Rule describes an IDS rule.
type Rule struct {
        // Disbled identifies if the rule is disabled/commented out.
        Disabled bool
        // Action is the action the rule will take (alert, pass, drop, etc.).
        Action string
        // Protocol is the protocol the rule looks at.
        Protocol string
        // Source is the address and ports for the source of the traffic.
        Source Network
        // Destination is the address and ports for the source of the traffic.
        Destination Network
        // Bidirectional indicates the directionality of a rule (-> or <>).
        Bidirectional bool
        // SID is the identifier of the rule.
        SID int
        // Revision is the revision of the rule.
        Revision int
        // Description is the msg field of the rule.
        Description string
        // References contains references associated to the rule (e.g. CVE number).
        References []*Reference
        // Contents are all the decoded content matches.
        Tags map[string]string
        // Statements is a slice of string. These items are similar to Tags, but have no value. (e.g. 'sameip;')
        Statements []string
        // TLSTags is a slice of TLS related matches.
        TLSTags []*TLSTag
        // StreamMatch holds stream_size parameters.
        StreamMatch *StreamCmp
        // Metas is a slice of Metadata.
        Metas Metadatas
        // Flowbits is a slice of Flowbit.
        Flowbits []*Flowbit
        // Xbits is a slice of Xbit
        Xbits []*Xbit
        // Flowints is a slice of Flowint
        Flowints []*Flowint
        // Matchers are internally used to ensure relative matches are printed correctly.
        // Make this private before checkin?
        Matchers []orderedMatcher
}

type orderedMatcher interface {
        String() string
}

// Metadata describes metadata tags in key-value struct.
type Metadata struct {
        Key   string
        Value string
}

// Flowbit describes a flowbit. A flowbit consists of an Action, and optional Value.
type Flowbit struct {
        Action string
        Value  string
}

// Flowint describes a flowint.
type Flowint struct {
        Name     string
        Modifier string
        Value    string
}

// Xbit describes an Xbit.
// TODO: Consider adding more structure to Track and Expire.
type Xbit struct {
        Action string
        Name   string
        Track  string
        // Expire should be an int, default 0 value makes stringer difficult because this is an
        // optional parameter. If we can confirm that this must be > 0 we can convert to int.
        Expire string
}

// Metadatas allows for a Stringer on []*Metadata
type Metadatas []*Metadata

// Network describes the IP addresses and port numbers used in a rule.
// TODO: Ensure all values either begin with $ (variable) or they are valid IPNet/int.
type Network struct {
        Nets  []string // Currently just []string because these can be variables $HOME_NET, not a valid IPNet.
        Ports []string // Currently just []string because these can be variables $HTTP_PORTS, not just ints.
}

// DataPos indicates the data position for content matches. These should be referenced for creation
// by using their Suricata keywords and the StickyBuffer() function.
type DataPos int

const (
        pktData DataPos = iota
        fileData
        base64Data
        //
        // Suricata 4.x Sticky Buffers
        //
        // HTTP Sticky buffers
        httpAcceptEnc
        httpAccept
        httpAcceptLang
        httpConnection
        httpContentLen
        httpContentType
        httpHeaderNames
        httpProtocol
        httpReferer
        httpRequestLine
        httpResponseLine
        httpStart
        // TLS Sticky Buffers
        tlsCertSubject
        tlsCertIssuer
        tlsCertSerial
        tlsCertFingerprint
        tlsSNI
        // JA3 Sticky Buffers
        ja3Hash
        ja3String
        // SSH Sticky Buffers
        sshProto
        sshSoftware
        // Kerberos Sticky Buffers
        krb5Cname
        krb5Sname
        // DNS Sticky Buffers
        dnsQuery
        // SMB Sticky Buffers
        smbNamedPipe
        smbShare
        //
        // Suricata 5.0 Sticky Buffers
        //
        fileData5
        // HTTP Sticky Buffers
        httpAccept5
        httpAcceptEnc5
        httpAcceptLang5
        httpClientBody
        httpConnection5
        httpContentLen5
        httpContentType5
        httpCookie
        httpHeader
        httpHeaderNames5
        httpHeaderRaw
        httpHost
        httpHostRaw
        httpLocation
        httpMethod
        httpProtocol5
        httpReferer5
        httpRequestBody
        httpRequestLine5
        httpResponseBody
        httpResponseLine5
        httpServer
        httpServerBody
        httpStart5
        httpStatCode
        httpStatMsg
        httpURI
        httpURIRaw
        httpUserAgent
        // TLS Sticky Buffers
        tlsCertSubject5
        tlsCertIssuer5
        tlsCertSerial5
        tlsCertFingerprint5
        tlsSNI5
        // JA3 Sticky Buffers
        ja3Hash5
        ja3String5
        ja3sHash
        ja3sString
        // SSH Sticky Buffers
        sshProto5
        sshSoftware5
        // Kerberos Sticky Buffers - Unchanged from Suricata 4.x
        // DNS Sticky Buffers
        dnsQuery5
        // SMB - Documentation lacking. Unknown.
)

// Contains both Suricata 4.x and 5.0 buffers. Some day we'll deprecate the 4.x ones.
var stickyBuffers = map[DataPos]string{
        pktData:    "pkt_data",
        fileData:   "file_data",
        base64Data: "base64_data",
        // Suricata 4.X Sticky Buffers
        // HTTP Sticky Buffers
        httpAcceptEnc:    "http_accept_enc",
        httpAccept:       "http_accept",
        httpAcceptLang:   "http_accept_lang",
        httpConnection:   "http_connection",
        httpContentLen:   "http_content_len",
        httpContentType:  "http_content_type",
        httpHeaderNames:  "http_header_names",
        httpProtocol:     "http_protocol",
        httpReferer:      "http_referer",
        httpRequestLine:  "http_request_line",
        httpResponseLine: "http_response_line",
        httpStart:        "http_start",
        // TLS Sticky Buffers
        tlsCertSubject:     "tls_cert_subject",
        tlsCertIssuer:      "tls_cert_issuer",
        tlsCertSerial:      "tls_cert_serial",
        tlsCertFingerprint: "tls_cert_fingerprint",
        tlsSNI:             "tls_sni",
        // JA3 Sticky Buffers
        ja3Hash:   "ja3_hash",
        ja3String: "ja3_string",
        // SSH Sticky Buffers
        sshProto:    "ssh_proto",
        sshSoftware: "ssh_software",
        // Kerberos Sticky Buffers
        krb5Cname: "krb5_cname",
        krb5Sname: "krb5_sname",
        // DNS Sticky Buffers
        dnsQuery: "dns_query",
        // SMB Sticky Buffers
        smbNamedPipe: "smb_named_pipe",
        smbShare:     "smb_share",
        // Suricata 5.0 Sticky Buffers
        fileData5: "file.data",
        // HTTP Sticky Buffers
        httpAccept5:       "http.accept",
        httpAcceptEnc5:    "http.accept_enc",
        httpAcceptLang5:   "http.accept_lang",
        httpClientBody:    "http.client_body",
        httpConnection5:   "http.connection",
        httpContentLen5:   "http.content_len",
        httpContentType5:  "http.content_type",
        httpCookie:        "http.cookie",
        httpHeader:        "http.header",
        httpHeaderNames5:  "http.header_names",
        httpHeaderRaw:     "http.header.raw",
        httpHost:          "http.host",
        httpHostRaw:       "http.host.raw",
        httpLocation:      "http.location",
        httpMethod:        "http.method",
        httpProtocol5:     "http.protocol",
        httpReferer5:      "http.referer",
        httpRequestBody:   "http.request_body",
        httpRequestLine5:  "http.request_line",
        httpResponseBody:  "http.response_body",
        httpResponseLine5: "http.response_line",
        httpServer:        "http.server",
        httpServerBody:    "http.server_body",
        httpStart5:        "http.start",
        httpStatCode:      "http.stat_code",
        httpStatMsg:       "http.stat_msg",
        httpURI:           "http.uri",
        httpURIRaw:        "http.uri.raw",
        httpUserAgent:     "http.user_agent",
        // TLS Sticky Buffers
        tlsCertSubject5:     "tls.cert_subject",
        tlsCertIssuer5:      "tls.cert_issuer",
        tlsCertSerial5:      "tls.cert_serial",
        tlsCertFingerprint5: "tls.cert_fingerprint",
        tlsSNI5:             "tls.sni",
        // JA3 Sticky Buffers
        ja3Hash5:   "ja3.hash",
        ja3String5: "ja3.string",
        ja3sHash:   "ja3s.hash",
        ja3sString: "ja3s.string",
        // SSH Sticky Buffers
        sshProto5:    "ssh.proto",
        sshSoftware5: "ssh.software",
        // Kerberos Sticky Buffers - Unchanged from Suricata 4.x
        // DNS Sticky Buffers
        dnsQuery5: "dns.query",
        // SMB - Documentation lacking. Unknown.
}

func (d DataPos) String() string {
        return stickyBuffers[d]
}

// StickyBuffer returns the data position value for the string representation of a sticky buffer name (e.g. "file_data")
func StickyBuffer(s string) (DataPos, error) {
        for k, v := range stickyBuffers {
                if v == s {
                        return k, nil
                }
        }
        return pktData, fmt.Errorf("%s is not a sticky buffer", s)
}

// isStickyBuffer returns true if the provided string is a known sticky buffer.
func isStickyBuffer(s string) bool {
        _, err := StickyBuffer(s)
        return err == nil
}

// Content describes a rule content. A content is composed of a pattern followed by options.
type Content struct {
        // DataPosition defaults to pkt_data state, can be modified to apply to file_data, base64_data locations.
        // This value will apply to all following contents, to reset to default you must reset DataPosition during processing.
        DataPosition DataPos
        // FastPattern settings for the content.
        FastPattern FastPattern
        // Pattern is the pattern match of a content (e.g. HTTP in content:"HTTP").
        Pattern []byte
        // Negate is true for negated content match.
        Negate bool
        // Options are the option associated to the content (e.g. http_header).
        Options []*ContentOption
}

// byteMatchType describes the kinds of byte matches and comparisons that are supported.
type byteMatchType int

const (
        bUnknown byteMatchType = iota
        bExtract
        bTest
        bJump
        isDataAt
        b64Decode
)

var byteMatchTypeVals = map[byteMatchType]string{
        bExtract:  "byte_extract",
        bJump:     "byte_jump",
        bTest:     "byte_test",
        isDataAt:  "isdataat",
        b64Decode: "base64_decode",
}

// allbyteMatchTypeNames returns a slice of valid byte_* keywords.
func allbyteMatchTypeNames() []string {
        b := make([]string, len(byteMatchTypeVals))
        var i int
        for _, n := range byteMatchTypeVals {
                b[i] = n
                i++
        }
        return b
}

// String returns the string representation of a byte_* keyword.
func (b byteMatchType) String() string {
        return byteMatchTypeVals[b]
}

// byteMatcher returns a byteMatchType iota for a provided String.
func byteMatcher(s string) (byteMatchType, error) {
        for k, v := range byteMatchTypeVals {
                if v == s {
                        return k, nil
                }
        }
        return bUnknown, fmt.Errorf("%s is not a byteMatchType* keyword", s)
}

// lenMatcher returns an lenMatchType or an error for a given string.
func lenMatcher(s string) (lenMatchType, error) {
        for k, v := range lenMatchTypeVals {
                if v == s {
                        return k, nil
                }
        }
        return lUnknown, fmt.Errorf("%s is not an lenMatch keyword", s)
}

// Returns the number of mandatory parameters for a byteMatchType keyword, -1 if unknown.
func (b byteMatchType) minLen() int {
        switch b {
        case bExtract:
                return 3
        case bJump:
                return 2
        case bTest:
                return 4
        case isDataAt:
                return 1
        case b64Decode:
                return 0
        }
        return -1
}

// ByteMatch describes a byte matching operation, similar to a Content.
type ByteMatch struct {
        // DataPosition defaults to pkt_data state, can be modified to apply to file_data, base64_data locations.
        // This value will apply to all following contents, to reset to default you must reset DataPosition during processing.
        DataPosition DataPos
        // Kind is a specific operation type we're taking.
        Kind byteMatchType
        // Negate indicates negation of a value, currently only used for isdataat.
        Negate bool
        // A variable name being extracted by byte_extract.
        Variable string
        // Number of bytes to operate on. "bytes to convert" in Snort Manual. This can be an int, or a var from byte_extract.
        NumBytes string
        // Operator for comparison in byte_test.
        Operator string
        // Value to compare against using byte_test.
        Value string
        // Offset within given buffer to operate on.
        Offset int
        // Other specifics required for jump/test here. This might make sense to pull out into a "ByteMatchOption" later.
        Options []string
}

// lenMatchType describes the type of length matches and comparisons that are supported.
type lenMatchType int

const (
        lUnknown lenMatchType = iota
        iType
        iCode
        iID
        iSeq
        uriLen
        dSize
        ipTTL
        ipID
        tcpSeq
        tcpACK
        bSize
)

// lenMatchTypeVals map len types to string representations.
var lenMatchTypeVals = map[lenMatchType]string{
        iType:  "itype",
        iCode:  "icode",
        iID:    "icmp_id",
        iSeq:   "icmp_seq",
        uriLen: "urilen",
        dSize:  "dsize",
        ipTTL:  "ttl",
        ipID:   "id",
        tcpSeq: "seq",
        tcpACK: "ack",
        bSize:  "bsize",
}

// allLenMatchTypeNames returns a slice of string containing all length match keywords.
func allLenMatchTypeNames() []string {
        i := make([]string, len(lenMatchTypeVals))
        var j int
        for _, n := range lenMatchTypeVals {
                i[j] = n
                j++
        }
        return i
}

// String returns the string keyword for an lenMatchType.
func (i lenMatchType) String() string {
        return lenMatchTypeVals[i]
}

// LenMatch holds the values to represent an Length Match.
type LenMatch struct {
        // DataPosition defaults to pkt_data state, can be modified to apply to file_data, base64_data locations.
        // This value will apply to all following contents, to reset to default you must reset DataPosition during processing.
        DataPosition DataPos
        Kind         lenMatchType
        Min          int
        Max          int
        Num          int
        Operator     string
        Options      []string
}

// PCRE describes a PCRE item of a rule.
type PCRE struct {
        // DataPosition defaults to pkt_data state, can be modified to apply to file_data, base64_data locations.
        // This value will apply to all following contents, to reset to default you must reset DataPosition during processing.
        DataPosition DataPos
        Pattern      []byte
        Negate       bool
        Options      []byte
}

// FastPattern describes various properties of a fast_pattern value for a content.
type FastPattern struct {
        Enabled bool
        Only    bool
        Offset  int
        Length  int
}

// ContentOption describes an option set on a rule content.
type ContentOption struct {
        // Name is the name of the option (e.g. offset).
        Name string
        // Value is the value associated to the option, default to "" for option without value.
        Value string
}

// Reference describes a gonids reference in a rule.
type Reference struct {
        // Type is the system name for the reference: (url, cve, md5, etc.)
        Type string
        // Value is the identifier in the system: (address, cvd-id, hash)
        Value string
}

// TODO: Add support for tls_cert_nobefore, tls_cert_notafter, tls_cert_expired, tls_cert_valid.
// Valid keywords for extracting TLS matches. Does not include tls.store, or sticky buffers.
var tlsTags = []string{"ssl_version", "ssl_state", "tls.version", "tls.subject", "tls.issuerdn", "tls.fingerprint"}

// TLSTag describes a TLS specific match (non-sticky buffer based).
type TLSTag struct {
        // Is the match negated (!).
        Negate bool
        // Key holds the thing we're inspecting (tls.version, tls.fingerprint, etc.).
        Key string
        // TODO: Consider string -> []byte and handle hex input.
        // TODO: Consider supporting []struct if we can support things like: tls.version:!1.2,!1.3
        // Value holds the value for the match.
        Value string
}

// StreamCmp represents a stream comparison (stream_size:>20).
type StreamCmp struct {
        // Direction of traffic to inspect: server, client, both, either.
        Direction string
        // Operator is the comparison operator to apply >, <, !=, etc.
        Operator string
        // TODO: Can this number be a variable, if yes s/int/string.
        // Number is the size to compare against
        Number int
}

// escape escapes special char used in regexp.
func escape(r string) string {
        return escapeRE.ReplaceAllString(r, `\$1`)
}

// within returns the within value for a specific content.
func within(options []*ContentOption) string {
        for _, o := range options {
                if o.Name == "within" {
                        return o.Value
                }
        }
        return ""
}

// RE returns all content matches as a single and simple regexp.
func (r *Rule) RE() string {
        var re string
        for _, c := range r.Contents() {
                // TODO: handle pcre, depth, offset, distance.
                if d, err := strconv.Atoi(within(c.Options)); err == nil && d > 0 {
                        re += fmt.Sprintf(".{0,%d}", d)
                } else {
                        re += ".*"
                }
                re += escape(string(c.Pattern))
        }
        return re
}

// CVE extracts CVE from a rule.
func (r *Rule) CVE() string {
        for _, ref := range r.References {
                if ref.Type == "cve" {
                        return ref.Value
                }
        }
        return ""
}

// LenMatchers returns all *LenMatch for a rule.
func (r *Rule) LenMatchers() []*LenMatch {
        lms := make([]*LenMatch, 0, len(r.Matchers))
        for _, m := range r.Matchers {
                if lm, ok := m.(*LenMatch); ok {
                        lms = append(lms, lm)
                }
        }
        return lms
}

// Contents returns all *Content for a rule.
func (r *Rule) Contents() []*Content {
        cs := make([]*Content, 0, len(r.Matchers))
        for _, m := range r.Matchers {
                if c, ok := m.(*Content); ok {
                        cs = append(cs, c)
                }
        }
        return cs
}

// LastContent returns the last *Content from Matchers
func (r *Rule) LastContent() *Content {
        for i := range r.Matchers {
                if co, ok := r.Matchers[len(r.Matchers)-i-1].(*Content); ok {
                        return co
                }
        }
        return nil
}

// ByteMatchers returns all *ByteMatch for a rule.
func (r *Rule) ByteMatchers() []*ByteMatch {
        bs := make([]*ByteMatch, 0, len(r.Matchers))
        for _, m := range r.Matchers {
                if b, ok := m.(*ByteMatch); ok {
                        bs = append(bs, b)
                }
        }
        return bs
}

// PCREs returns all *PCRE for a rule.
func (r *Rule) PCREs() []*PCRE {
        var ps []*PCRE
        for _, m := range r.Matchers {
                if p, ok := m.(*PCRE); ok {
                        ps = append(ps, p)
                }
        }
        return ps
}

func netString(netPart []string) string {
        var s strings.Builder
        if len(netPart) > 1 {
                s.WriteString("[")
        }
        for i, n := range netPart {
                s.WriteString(n)
                if i < len(netPart)-1 {
                        s.WriteString(",")
                }
        }
        if len(netPart) > 1 {
                s.WriteString("]")
        }
        return s.String()
}

// String retunrs a string for a Network.
func (n Network) String() string {
        return fmt.Sprintf("%s %s", netString(n.Nets), netString(n.Ports))
}

// String returns a string for a FastPattern.
func (f FastPattern) String() string {
        if !f.Enabled {
                return ""
        }
        // This is an invalid state.
        if f.Only && (f.Offset != 0 || f.Length != 0) {
                return ""
        }

        var s strings.Builder
        s.WriteString("fast_pattern")
        if f.Only {
                s.WriteString(":only;")
                return s.String()
        }

        // "only" and "chop" modes are mutually exclusive.
        if f.Offset != 0 || f.Length != 0 {
                s.WriteString(fmt.Sprintf(":%d,%d", f.Offset, f.Length))
        }

        s.WriteString(";")
        return s.String()
}

// String returns a string for a ContentOption.
func (co ContentOption) String() string {
        if inSlice(co.Name, []string{"depth", "distance", "offset", "within"}) {
                return fmt.Sprintf("%s:%v;", co.Name, co.Value)
        }
        return fmt.Sprintf("%s;", co.Name)
}

// String returns a string for a Reference.
func (r Reference) String() string {
        return fmt.Sprintf("reference:%s,%s;", r.Type, r.Value)
}

// String returns a string for a Content (ignoring sticky buffers.)
func (c Content) String() string {
        var s strings.Builder
        s.WriteString("content:")
        if c.Negate {
                s.WriteString("!")
        }
        s.WriteString(fmt.Sprintf(`"%s";`, c.FormatPattern()))
        for _, o := range c.Options {
                s.WriteString(fmt.Sprintf(" %s", o))
        }
        if c.FastPattern.Enabled {
                s.WriteString(fmt.Sprintf(" %s", c.FastPattern))
        }

        return s.String()
}

// base64DecodeString returns a string for a base64_decode ByteMatch.
func (b ByteMatch) base64DecodeString() string {
        var parts []string
        if b.NumBytes != "" {
                parts = append(parts, fmt.Sprintf("bytes %s", b.NumBytes))
        }
        if b.Offset > 0 {
                parts = append(parts, fmt.Sprintf("offset %d", b.Offset))
        }
        // This should only be "relative" but we'll support "anything"
        parts = append(parts, b.Options...)
        if len(parts) == 0 {
                return fmt.Sprintf("%s;", byteMatchTypeVals[b.Kind])
        }
        return fmt.Sprintf("%s:%s;", byteMatchTypeVals[b.Kind], strings.Join(parts, ","))
}

// String returns a string for a ByteMatch.
func (b ByteMatch) String() string {
        // TODO: Support dataPos?
        // TODO: Write tests.
        var s strings.Builder
        s.WriteString(fmt.Sprintf("%s:", byteMatchTypeVals[b.Kind]))

        switch b.Kind {
        case bExtract:
                s.WriteString(fmt.Sprintf("%s,%d,%s", b.NumBytes, b.Offset, b.Variable))
        case bJump:
                s.WriteString(fmt.Sprintf("%s,%d", b.NumBytes, b.Offset))
        case bTest:
                s.WriteString(fmt.Sprintf("%s,%s,%s,%d", b.NumBytes, b.Operator, b.Value, b.Offset))
        case isDataAt:
                if b.Negate {
                        s.WriteString("!")
                }
                s.WriteString(b.NumBytes)
        // Logic for this case is a bit different so it's handled outside.
        case b64Decode:
                return b.base64DecodeString()
        }
        for _, o := range b.Options {
                s.WriteString(fmt.Sprintf(",%s", o))
        }
        s.WriteString(";")
        return s.String()
}

// String returns a string for an length match.
func (i LenMatch) String() string {
        var s strings.Builder
        s.WriteString(fmt.Sprintf("%s:", i.Kind))
        switch {
        case i.Operator == "<>":
                s.WriteString(fmt.Sprintf("%d%s%d", i.Min, i.Operator, i.Max))
        case i.Operator != "":
                s.WriteString(fmt.Sprintf("%s%d", i.Operator, i.Num))
        default:
                s.WriteString(fmt.Sprintf("%d", i.Num))
        }
        for _, o := range i.Options {
                s.WriteString(fmt.Sprintf(",%s", o))
        }
        s.WriteString(";")
        return s.String()
}

// String returns a string for all of the metadata values.
func (ms Metadatas) String() string {
        var s strings.Builder
        if len(ms) < 1 {
                return ""
        }
        s.WriteString("metadata:")
        for i, m := range ms {
                if i < len(ms)-1 {
                        s.WriteString(fmt.Sprintf("%s %s, ", m.Key, m.Value))
                        continue
                }
                s.WriteString(fmt.Sprintf("%s %s;", m.Key, m.Value))
        }
        return s.String()
}

func (t *TLSTag) String() string {
        var s strings.Builder
        s.WriteString(fmt.Sprintf("%s:", t.Key))
        if t.Negate {
                s.WriteString("!")
        }
        // Values for these get wrapped in `"`.
        if inSlice(t.Key, []string{"tls.issuerdn", "tls.subject", "tls.fingerprint"}) {
                s.WriteString(fmt.Sprintf(`"%s";`, t.Value))
        } else {
                s.WriteString(fmt.Sprintf("%s;", t.Value))
        }
        return s.String()
}

func (s *StreamCmp) String() string {
        return fmt.Sprintf("stream_size:%s,%s,%d;", s.Direction, s.Operator, s.Number)
}

// String returns a string for a PCRE.
func (p PCRE) String() string {
        pattern := p.Pattern
        if len(pattern) < 1 {
                return ""
        }

        // escape quote signs, if necessary
        if bytes.IndexByte(pattern, '"') > -1 {
                pattern = bytes.Replace(pattern, []byte(`"`), []byte(`\"`), -1)
        }

        var s strings.Builder
        s.WriteString("pcre:")
        if p.Negate {
                s.WriteString("!")
        }
        s.WriteString(fmt.Sprintf(`"/%s/%s";`, pattern, p.Options))
        return s.String()
}

// String returns a string for a Flowbit.
func (fb Flowbit) String() string {
        if !inSlice(fb.Action, []string{"noalert", "isset", "isnotset", "set", "unset", "toggle"}) {
                return ""
        }
        var s strings.Builder
        s.WriteString(fmt.Sprintf("flowbits:%s", fb.Action))
        if fb.Value != "" {
                s.WriteString(fmt.Sprintf(",%s", fb.Value))
        }
        s.WriteString(";")
        return s.String()
}

// String returns a string for a Flowbit.
func (fi Flowint) String() string {
        var s strings.Builder
        s.WriteString(fmt.Sprintf("flowint:%s", fi.Name))
        if inSlice(fi.Modifier, []string{"isset", "isnotset"}) {
                s.WriteString(fmt.Sprintf(",%s", fi.Modifier))
        }
        if inSlice(fi.Modifier, []string{"+", "-", "=", ">", "<", ">=", "<=", "==", "!="}) && fi.Value != "" {
                s.WriteString(fmt.Sprintf(",%s,%s", fi.Modifier, fi.Value))
        }
        s.WriteString(";")
        return s.String()
}

// String returns a string for a Flowbit.
func (xb Xbit) String() string {
        var s strings.Builder
        s.WriteString(fmt.Sprintf("xbits:%s,%s,track %s", xb.Action, xb.Name, xb.Track))
        if xb.Expire != "" {
                s.WriteString(fmt.Sprintf(",expire %s", xb.Expire))
        }
        s.WriteString(";")
        return s.String()
}

// String returns a string for a rule.
func (r Rule) String() string {
        var s strings.Builder
        if r.Disabled {
                s.WriteString("#")
        }
        s.WriteString(fmt.Sprintf("%s %s %s ", r.Action, r.Protocol, r.Source))
        if !r.Bidirectional {
                s.WriteString("-> ")
        } else {
                s.WriteString("<> ")
        }

        s.WriteString(fmt.Sprintf(`%s (msg:"%s"; `, r.Destination, r.Description))

        // Pull flow out of tags if it exists, we like flow at the beginning of rules.
        if v, ok := r.Tags["flow"]; ok {
                s.WriteString(fmt.Sprintf("flow:%s; ", v))
        }

        // Write out matchers in order (because things can be relative.)
        if len(r.Matchers) > 0 {
                d := pktData
                for _, m := range r.Matchers {
                        if c, ok := m.(*Content); ok {
                                if d != c.DataPosition {
                                        d = c.DataPosition
                                        s.WriteString(fmt.Sprintf("%s; ", d))
                                }
                        }
                        if c, ok := m.(*LenMatch); ok {
                                if d != c.DataPosition {
                                        d = c.DataPosition
                                        s.WriteString(fmt.Sprintf("%s; ", d))
                                }
                        }
                        if c, ok := m.(*PCRE); ok {
                                if d != c.DataPosition {
                                        d = c.DataPosition
                                        s.WriteString(fmt.Sprintf("%s; ", d))
                                }
                        }
                        s.WriteString(fmt.Sprintf("%s ", m))
                }
        }

        if r.StreamMatch != nil {
                s.WriteString(fmt.Sprintf("%s ", r.StreamMatch))
        }

        if len(r.TLSTags) > 0 {
                for _, t := range r.TLSTags {
                        s.WriteString(fmt.Sprintf("%s ", t))
                }
        }

        if len(r.Metas) > 0 {
                s.WriteString(fmt.Sprintf("%s ", r.Metas))
        }

        for k, v := range r.Tags {
                if k == "flow" {
                        continue
                }
                s.WriteString(fmt.Sprintf("%s:%s; ", k, v))
        }

        for _, v := range r.Statements {
                s.WriteString(fmt.Sprintf("%s; ", v))
        }

        for _, fb := range r.Flowbits {
                s.WriteString(fmt.Sprintf("%s ", fb))
        }

        for _, fi := range r.Flowints {
                s.WriteString(fmt.Sprintf("%s ", fi))
        }

        for _, xb := range r.Xbits {
                s.WriteString(fmt.Sprintf("%s ", xb))
        }

        for _, ref := range r.References {
                s.WriteString(fmt.Sprintf("%s ", ref))
        }

        s.WriteString(fmt.Sprintf("sid:%d; rev:%d;)", r.SID, r.Revision))
        return s.String()

}

// ToRegexp returns a string that can be used as a regular expression
// to identify content matches in an ASCII dump of a packet capture (tcpdump -A).
func (c *Content) ToRegexp() string {
        var buffer bytes.Buffer
        for _, b := range c.Pattern {
                if b > 126 || b < 32 {
                        buffer.WriteString(".")
                } else {
                        buffer.WriteByte(b)
                }
        }
        return regexp.QuoteMeta(buffer.String())
}

// FormatPattern returns a string for a Pattern in a content
func (c *Content) FormatPattern() string {
        var buffer bytes.Buffer
        pipe := false
        for _, b := range c.Pattern {
                if b != ' ' && (b > 126 || b < 35 || b == ':' || b == ';' || b == '|' || b == '\\') {
                        if !pipe {
                                buffer.WriteByte('|')
                                pipe = true
                        } else {
                                buffer.WriteString(" ")
                        }
                        buffer.WriteString(fmt.Sprintf("%.2X", b))
                } else {
                        if pipe {
                                buffer.WriteByte('|')
                                pipe = false
                        }
                        buffer.WriteByte(b)
                }
        }
        if pipe {
                buffer.WriteByte('|')
        }
        return buffer.String()
}

// InsertMatcher will insert an ordered matcher at a position specified.
func (r *Rule) InsertMatcher(m orderedMatcher, pos int) error {
        if pos < 0 {
                return fmt.Errorf("cannot insert matcher, position %d < 0", pos)
        }
        if pos > len(r.Matchers) {
                return fmt.Errorf("cannot insert matcher, position %d > %d", pos, len(r.Matchers))
        }

        r.Matchers = append(r.Matchers, &Content{})
        copy(r.Matchers[pos+1:], r.Matchers[pos:])
        r.Matchers[pos] = m
        return nil
}

// HasVar returns true if a variable with the provided name exists.
func (r *Rule) HasVar(s string) bool {
        for _, m := range r.Matchers {
                if b, ok := m.(*ByteMatch); ok {
                        if b.Variable == s {
                                return true
                        }
                }
        }
        return false
}

// GetSidMsg returns a string representing a sidmsg.map entry.
func (r *Rule) GetSidMsg() string {
        var sidmsg strings.Builder
        sidmsg.WriteString(fmt.Sprintf("%s || %s", strconv.Itoa(r.SID), r.Description))
        for _, ref := range r.References {
                sidmsg.WriteString(fmt.Sprintf(" || %s,%s", ref.Type, ref.Value))
        }
        return sidmsg.String()
}