Hello everyone,
I have been working on an application in Excel that mines a text file, and produces a new text file that contains the unique words and frequencies of each word. I am hoping to share this with colleagues but first want to stand-alone application first. I have attempted not to use anything specific to Excel vba so that conversion would be possible. My question is quite general. How would I convert my vba code to vb.net. My code is presented below for context. Any advice is welcomed.
Thanks.
AMAS
I have been working on an application in Excel that mines a text file, and produces a new text file that contains the unique words and frequencies of each word. I am hoping to share this with colleagues but first want to stand-alone application first. I have attempted not to use anything specific to Excel vba so that conversion would be possible. My question is quite general. How would I convert my vba code to vb.net. My code is presented below for context. Any advice is welcomed.
Thanks.
AMAS
VB.NET:
Option Explicit
Option Base 1
Dim Arr As Variant
Dim x&
Dim r&
Dim newArr As Variant
Dim CiteCount&
Dim Counter&
Dim PctDone As Single
Dim PctTotal&
Dim PrevPct As Double
Dim jag1n1%
Dim StartTime As Double
Dim TI As Boolean, ab As Boolean, KW As Boolean
Dim TopR&
Dim jag%
Dim newTempArr As Variant
Dim n%
Public CancelMe As Boolean
Sub Import_RIS()
Dim UBarr&
Dim DeleteThese As Variant
Dim d%
Dim strFile$
Dim iFF%
Dim FSO As FileSystemObject
Dim mystring$
Dim sDummyPrint$
Dim FSOFile As TextStream
' Set values for variables
StartTime = Time()
CancelMe = False
' Import File
Set FSO = New FileSystemObject
strFile = Application.GetOpenFilename("Text Files,*.txt")
If strFile = "False" Then GoTo ExitTheSub
' Use Freefile to get next available number
iFF = FreeFile
' Open file and convert to string
mystring = Space(FileLen(strFile))
Open strFile For Binary Access Read As #iFF
Get #iFF, , mystring
Close #iFF
' Clean and trim the results
mystring = Trim(StrConv(mystring, vbLowerCase))
' Delete punctuation and non-printing characters (clean)
DeleteThese = Array(Chr(1), Chr(2), Chr(3), Chr(4), Chr(5), Chr(6), Chr(7), Chr(8), Chr(9), Chr(11), Chr(12), Chr(14), Chr(15), _
Chr(16), Chr(17), Chr(18), Chr(19), Chr(20), Chr(21), Chr(22), Chr(23), Chr(24), Chr(25), Chr(26), Chr(27), _
Chr(28), Chr(29), Chr(30), Chr(31), Chr(33), Chr(34), Chr(35), Chr(36), Chr(37), Chr(38), Chr(39), Chr(40), _
Chr(41), Chr(42), Chr(43), Chr(44), Chr(46), Chr(47), Chr(58), Chr(59), Chr(60), Chr(62), Chr(63), Chr(64), _
Chr(91), Chr(92), Chr(93), Chr(94), Chr(95), Chr(96), Chr(123), Chr(124), Chr(125), Chr(126), Chr(127), _
Chr(128), Chr(129), Chr(130), Chr(131), Chr(132), Chr(133), Chr(134), Chr(135), Chr(136), Chr(137), Chr(138), _
Chr(139), Chr(140), Chr(141), Chr(142), Chr(143), Chr(144), Chr(145), Chr(146), Chr(147), Chr(148), Chr(149), _
Chr(150), Chr(151), Chr(152), Chr(153), Chr(154), Chr(155), Chr(156), Chr(157), Chr(158), Chr(159), Chr(160), _
Chr(161), Chr(162), Chr(163), Chr(164), Chr(165), Chr(166), Chr(167), Chr(168), Chr(169), Chr(170), Chr(171), _
Chr(172), Chr(173), Chr(174), Chr(175), Chr(176), Chr(177), Chr(178), Chr(179), Chr(180), Chr(181), Chr(182), _
Chr(183), Chr(184), Chr(185), Chr(186), Chr(187), Chr(188), Chr(189), Chr(190), Chr(191), Chr(192), Chr(193), _
Chr(194), Chr(195), Chr(196), Chr(197), Chr(198), Chr(199), Chr(200), Chr(201), Chr(202), Chr(203), Chr(204), _
Chr(205), Chr(206), Chr(207), Chr(208), Chr(209), Chr(210), Chr(211), Chr(212), Chr(213), Chr(214), Chr(215), _
Chr(216), Chr(217), Chr(218), Chr(219), Chr(220), Chr(221), Chr(222), Chr(223), Chr(224), Chr(225), Chr(226), _
Chr(227), Chr(228), Chr(229), Chr(230), Chr(231), Chr(232), Chr(233), Chr(234), Chr(235), Chr(236), Chr(237), _
Chr(238), Chr(239), Chr(240), Chr(241), Chr(242), Chr(243), Chr(244), Chr(245), Chr(246), Chr(247), Chr(248), _
Chr(249), Chr(250), Chr(251), Chr(252), Chr(253), Chr(254), Chr(255))
For d = LBound(DeleteThese) To UBound(DeleteThese)
mystring = Replace(mystring, DeleteThese(d), "")
Next
' Remove common symbols and words
DeleteThese = Array("=", "-", " a ", " also ", " an ", " and ", " are ", " as ", " at ", " be ", " been ", " but ", _
" by ", " can ", " for ", " from ", " has ", " have ", " in ", " is ", " it ", " its ", " may ", _
" more ", " no ", " not ", " of ", " on ", " one ", " or ", " than ", " that ", " the ", " these ", _
" this ", " three ", " to ", " two ", " use ", " using ", " was ", " we ", " were ", " which ", _
" who ", " with ")
For d = LBound(DeleteThese) To UBound(DeleteThese)
mystring = Replace(" " & mystring & " ", DeleteThese(d), " ")
Next
' Convert to array
Arr = Split(mystring, vbCrLf)
' Read each element from array and update the temp array
For x = LBound(Arr) To UBound(Arr)
If Left(Arr(x), 6) = "ty " Then CiteCount = CiteCount + 1 ' Count the number of citations
Next
' Prepare userform
ProgressBar.LabelProgress.Width = 0
ProgressBar.Show vbModeless
Counter = 0
PctDone = 0
PrevPct = 0
PctTotal = CiteCount ' Update to the number of comparisons
' Set dimensions of jagged array
ReDim arrTemp(1 To 3)
' Redimension the inner arrays
ReDim vTitle(1 To CiteCount) As String
arrTemp(1) = vTitle
Erase vTitle
ReDim vAbstract(1 To CiteCount) As String
arrTemp(2) = vAbstract
Erase vAbstract
ReDim vKeyWords(1 To CiteCount) As String
arrTemp(3) = vKeyWords
Erase vKeyWords
' Reset variables
UBarr = 0: TI = False: ab = False: KW = False
' Read each element from array and update the temp array
For x = LBound(Arr) To UBound(Arr)
Select Case Left(Arr(x), 6)
Case "ty " ' Specific to RIS format
UBarr = UBarr + 1
TI = False: ab = False: KW = False
Case "t1 ", "ti ", "bti "
arrTemp(1)(UBarr) = Mid(Arr(x), 7)
TI = True: ab = False: KW = False
Case "n2 ", "ab "
arrTemp(2)(UBarr) = Mid(Arr(x), 7)
TI = False: ab = True: KW = False
Case "kw ", "rn ", "mh ", "pt "
With arrTemp(3)(UBarr)
If arrTemp(3)(UBarr) = False Then
arrTemp(3)(UBarr) = Mid(Arr(x), 7)
Else
arrTemp(3)(UBarr) = arrTemp(3)(UBarr) & "; " & Mid(Arr(x), 7)
End If
End With
TI = False: ab = False: KW = True
Case Else
If TI = True Or ab = True Or KW = True Then
If TI = True Then
If Mid(Arr(x), 4, 3) = " " Then TI = False Else _
arrTemp(1)(UBarr) = arrTemp(1)(UBarr) & " " & Arr(x)
ElseIf ab = True Then
If Mid(Arr(x), 4, 3) = " " Then ab = False Else _
arrTemp(2)(UBarr) = arrTemp(2)(UBarr) & " " & Arr(x)
ElseIf KW = True Then
If Mid(Arr(x), 4, 3) = " " Then KW = False Else _
arrTemp(3)(UBarr) = arrTemp(3)(UBarr) & "; " & Arr(x)
End If
End If
End Select
Next
' Erase array to free up memory
Erase Arr
' Run Dummy Analysis
jag1n1 = 0
For jag = 1 To 3
newArr = arrTemp(jag)
For n = 1 To 2
r = 1
DummyAnalysis n
Next
Next
' Print Dummy results
ReDim Preserve newTempArr(TopR)
newTempArr(1) = "Title - one word" & Chr(9) & "Freq" & Chr(9) & Chr(9) & "Title - two words" & Chr(9) & "Freq" & Chr(9) & Chr(9) & _
"Abstract - one word" & Chr(9) & "Freq" & Chr(9) & Chr(9) & "Abstract - two words" & Chr(9) & "Freq" & Chr(9) & Chr(9) & _
"Key Words - one word" & Chr(9) & "Freq" & Chr(9) & Chr(9) & "Key Words - two words" & Chr(9) & "Freq"
sDummyPrint = Join(newTempArr, vbCrLf)
strFile = ThisWorkbook.Path & "\" & "Dummy - " & "Title - Abstract - KeyWords" & " (" & _
Format(Date, "yyyy-mm-dd") & ").txt"
Set FSOFile = FSO.OpenTextFile(strFile, 2, True) ' Opens file in write mode (ForReading = 1, ForWriting = 2, ForAppending = 3)
With FSOFile
.Write sDummyPrint ' Write text to file
.Close ' Close file
End With
ExitTheSub:
Unload ProgressBar
iFF = 1
CiteCount = 0
TopR = 0
MsgBox "Task Completed..." & vbCrLf & vbCrLf & "Time for completion: " & _
Format(Time() - StartTime, "Hh:mm:ss"), vbInformation, "DUMMY analysis"
End Sub
Private Sub DummyAnalysis(nWds As Integer)
Dim e&
Dim uE$
Dim y&
Dim d$
Dim arrCitation
Dim z&
Dim AllKeys
Dim AllItems
Dim Dic1 As New Scripting.Dictionary
Dim Dic2 As New Scripting.Dictionary
On Error Resume Next
' Prepare userform
ProgressBar.LabelProgress.Width = 0
ProgressBar.Show vbModeless
Counter = 0
PctDone = 0
PrevPct = 0
PctTotal = CiteCount ' Update to the number of comparisons
' Clear Scripting dictionaries
Dic1.RemoveAll
Dic2.RemoveAll
' Run analysis on all cells in specific range
For e = LBound(newArr) To UBound(newArr)
With Dic1
.CompareMode = vbTextCompare
' Set type of delimiter
If jag > 2 Then d = "; " Else d = " "
' For each citation --> create array with all unique elements(uE)
arrCitation = Split(newArr(e), d)
For x = LBound(arrCitation) To UBound(arrCitation) - nWds + 1
uE = vbNullString
If IsNumeric(arrCitation(y)) = False And arrCitation(y) <> ";" Then
For y = x To x + nWds - 1: uE = uE & arrCitation(y) & d: Next y
.Item(Trim(uE)) = 0
End If
Next x
End With
With Dic2
.CompareMode = vbBinaryCompare
' Update the Scripting.Dictionary array with the new elements
For z = 0 To Dic1.Count - 1
If Not .Exists(Dic1.Keys(z)) Then
.Add Dic1.Keys(z), 1
Else: .Item(Dic1.Keys(z)) = .Item(Dic1.Keys(z)) + 1: End If
Next
End With
' Clear the dictionary
Dic1.RemoveAll
' Next citation
Next
With Dic2
' Convert the Keys and Items into arrays
AllKeys = .Keys
AllItems = .Items
.RemoveAll ' Clean the dictionary
End With
' Enlarge the array if needed to accomodate more results
If jag = 1 And jag1n1 = 0 Then ReDim newTempArr(UBound(AllKeys) + 1)
' Add results to Array
For z = LBound(AllKeys) To UBound(AllKeys)
If AllItems(z) / CiteCount > 0.01 And Len(Trim(Application.Clean(AllKeys(z)))) > 0 And AllKeys(z) <> ";" Then
r = r + 1
If newTempArr(r) = "" Then
Select Case True
Case jag = 1 And n = 1
newTempArr(r) = AllKeys(z) & Chr(9) & AllItems(z)
jag1n1 = 1
Case jag = 1 And n = 2
newTempArr(r) = Chr(9) & Chr(9) & Chr(9) & _
AllKeys(z) & Chr(9) & AllItems(z)
Case jag = 2 And n = 1
newTempArr(r) = Chr(9) & Chr(9) & Chr(9) & _
AllKeys(z) & Chr(9) & AllItems(z)
Case jag = 2 And n = 2
newTempArr(r) = Chr(9) & Chr(9) & Chr(9) & _
Chr(9) & Chr(9) & Chr(9) & _
AllKeys(z) & Chr(9) & AllItems(z)
Case jag = 3 And n = 1
newTempArr(r) = Chr(9) & Chr(9) & Chr(9) & _
Chr(9) & Chr(9) & Chr(9) & _
Chr(9) & Chr(9) & Chr(9) & _
AllKeys(z) & Chr(9) & AllItems(z)
Case jag = 3 And n = 2
newTempArr(r) = Chr(9) & Chr(9) & Chr(9) & _
Chr(9) & Chr(9) & Chr(9) & _
Chr(9) & Chr(9) & Chr(9) & _
AllKeys(z) & Chr(9) & AllItems(z)
End Select
Else
If Right(newTempArr(r), 1) = Chr(9) Then
newTempArr(r) = newTempArr(r) & Chr(9) & AllKeys(z) & Chr(9) & AllItems(z)
Else
newTempArr(r) = newTempArr(r) & Chr(9) & Chr(9) & AllKeys(z) & Chr(9) & AllItems(z)
End If
End If
DoEvents
End If
Next
If r > TopR Then TopR = r
End Sub