Regular expressions in Excel VBA

I use the Microsoft regex engine in Excel VBA. I am very new to regex, but now I have a pattern. I need to expand it, and I have problems. Here is my code:

Sub ImportFromDTD()

Dim sDTDFile As Variant
Dim ffile As Long
Dim sLines() As String
Dim i As Long
Dim Reg1 As RegExp
Dim M1 As MatchCollection
Dim M As Match
Dim myRange As Range

Set Reg1 = New RegExp

ffile = FreeFile

sDTDFile = Application.GetOpenFilename("DTD Files,*.XML", , _
"Browse for file to be imported")

If sDTDFile = False Then Exit Sub '(user cancelled import file browser)


Open sDTDFile For Input Access Read As #ffile
  Lines = Split(Input$(LOF(ffile), #ffile), vbNewLine)
Close #ffile

Cells(1, 2) = "From DTD"
J = 2

For i = 0 To UBound(Lines)

  'Debug.Print "Line"; i; "="; Lines(i)

  With Reg1
      '.Pattern = "(\<\!ELEMENT\s)(\w*)(\s*\(\#\w*\)\s*\>)"
      .Pattern = "(\<\!ELEMENT\s)(\w*)(\s*\(\#\w*\)\s*\>)"

      .Global = True
      .MultiLine = True
      .IgnoreCase = False
  End With

  If Reg1.Test(Lines(i)) Then
    Set M1 = Reg1.Execute(Lines(i))
    For Each M In M1
      sExtract = M.SubMatches(1)
      sExtract = Replace(sExtract, Chr(13), "")
      Cells(J, 2) = sExtract
      J = J + 1
      'Debug.Print sExtract
    Next M
  End If
Next i

Set Reg1 = Nothing

End Sub

I am currently mapping the dataset as follows:

 <!ELEMENT DealNumber  (#PCDATA) >

and extract the Dealnumber, but now I need to add one more match to this data:

<!ELEMENT DealParties  (DealParty+) >

and extract only Dealparty without Parens and +

I use this as a link, and it's awesome, but I'm still a bit confused. How to use regular expressions (Regex) in Microsoft Excel both inside and in loops

EDIT

I came across several new scenarios that need to be aligned.

 Extract Deal
 <!ELEMENT Deal  (DealNumber,DealType,DealParties) >

 Extract DealParty the ?,CR are throwing me off
 <!ELEMENT DealParty  (PartyType,CustomerID,CustomerName,CentralCustomerID?,
           LiabilityPercent,AgentInd,FacilityNo?,PartyReferenceNo?,
           PartyAddlReferenceNo?,PartyEffectiveDate?,FeeRate?,ChargeType?) >

 Extract Deals
 <!ELEMENT Deals  (Deal*) >
+4
2

Regex;

  .Pattern = "\<\!ELEMENT\s+(\w+)\s+\((#\w+|(\w+)\+)\)\s+\>"

(#\w+|(\w+)\+)

,

# -z0-9
     -z0-9 +

.

..

(# PCDATA)
(DealParty +)

  1. DealNumber , DealParty

- M.submatches(0)

    Sub ImportFromDTD()

Dim sDTDFile As Variant
Dim ffile As Long
Dim sLines() As String
Dim i As Long
Dim Reg1 As RegExp
Dim M1 As MatchCollection
Dim M As Match
Dim myRange As Range

Set Reg1 = New RegExp
J = 1

strIn = "<!ELEMENT Deal12Number  (#PCDATA) > <!ELEMENT DealParties  (DealParty+) >"

With Reg1
      .Pattern = "\<\!ELEMENT\s+(\w+)\s+\((#\w+|(\w+)\+)\)\s+\>"
      .Global = True
      .MultiLine = True
      .IgnoreCase = False
End With

If Reg1.Test(strIn) Then
    Set M1 = Reg1.Execute(strIn)
    For Each M In M1
      sExtract = M.SubMatches(2)
      If Len(sExtract) = 0 Then sExtract = M.SubMatches(0)
      sExtract = Replace(sExtract, Chr(13), "")
      Cells(J, 2) = sExtract
      J = J + 1
    Next M
End If

Set Reg1 = Nothing

End Sub
+1

. PCDATA DealParty. :

  With Reg1
      .Pattern = "\<!ELEMENT\s+\w+\s+\(\W*(\w+)\W*\)"

      .Global = True
      .MultiLine = True
      .IgnoreCase = False
  End With

: Regex101.

+3

Source: https://habr.com/ru/post/1611796/


All Articles