Sorting strings which contains numbers
I need to sort a list of file names that are the output of a mail merge.
Each filename has the same prefix and a serial ending number.
I tried to sort filenames with a quicksort algorithm, but the output appear like this:
- myfileprefix0.sxw
- myfileprefix1.sxw
- myfileprefix10.sxw
- myfileprefix11.sxw
...
- myfileprefix2.sxw
- myfileprefix20.sxw
- myfileprefix21.sxw
...
...
and so on...
The result is formally correct, because files are sorted alphabetically, but an human wouldn't never sorted the list in that way.
This is a common problem and was already solved in many languages and contexts.
What you need is called "natural" sort algorithm.
see for example
http://www.naturalordersort.org/
or search with google for "natural sort algorithm"
The following code contains a very simple implementation of this algorithm.
The main work is performed by the function NaturalStrComp()
This function calls RegExpStrReplace() to make its work.
This one is based on the service css.util.TextSearch
Performances are slow.
StarBasic language is not fast as compiled ones and further it does not offer advanced functions for string crunching natively.
Anyway, maybe some little optimizations are stll possible.
To use this code:
copy it into an empty module and run the Sub Test() on top of the module to see an example.
REM ***** BASIC *****
'_______________________________________________________________________________________________
'
' String sorting functions
'_______________________________________________________________________________________________
'
'
' Declarations:
Option Explicit
'_______________________________________________________________________________________________
Sub Test
Dim List(20)
Dim I As Integer
Dim iCaseSens As Integer
Dim iNatural As Integer
Dim iReversed As Integer
'generate an unsorted list of file names
For I = 0 To 20
List(I) = "mytestfile" & Int(199*rnd) & ".sxw"
Next I
msgbox "not sorted"
msgbox Join( list(), chr(10))
msgbox "alphabetically sorted"
ExtendedSortStringList(list())
msgbox Join( list(), chr(10))
msgbox """natural"" sorted (slow)"
'set up some flags
iCaseSens = 0
iNatural = 1
iReversed = 0
ExtendedSortStringList(list(), iCaseSens, iNatural, iReversed)
msgbox Join( list(), chr(10))
End Sub
'_______________________________________________________________________________________________
Sub ExtendedSortStringList(Data(), _
Optional iCaseSensitive As Integer, _
Optional iNaturalSort As Integer, _
Optional iReversed As Integer)
Dim iCaseSens As Integer
Dim iNatural As Integer
Dim iRev As Integer
Dim iMin As Long
Dim iMax As Long
If Not IsMissing(iCaseSensitive) Then
iCaseSens = iCaseSensitive
End If
If Not IsMissing(iNaturalSort) Then
iNatural = iNaturalSort
End If
If Not IsMissing(iReversed) Then
iRev = iReversed
End If
iMin = LBound(Data())
iMax = UBound(Data())
If iNatural = 0 Then
If iRev = 0 Then
SimpleStringSort(Data(), iCaseSens, iMin, iMax)
Else
SimpleStringSortReversed(Data(), iCaseSens, iMin, iMax)
End If
Else
If iRev = 0 Then
NaturalStringSort(Data(), iCaseSens, iMin, iMax)
Else
NaturalStringSortReversed(Data(), iCaseSens, iMin, iMax)
End If
End if
End Sub
'_______________________________________________________________________________________________
Sub SimpleStringSort(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
Dim Right As Long
Dim Left As Long
If Lower < Upper Then
Left = Lower + 1
Right = Upper + 1
Do While Left < Right
If StrComp(Data(Left), Data(Lower), iCaseSensitive) <= 0 Then
Left = Left + 1
Else
Right = Right - 1
SwapElements(Data(), Left, Right)
End If
Loop
Left = Left - 1
SwapElements(Data(), Lower, Left)
SimpleStringSort(Data(), iCaseSensitive, Lower, Left - 1)
SimpleStringSort(Data(), iCaseSensitive, Right, Upper)
End If
End Sub
'_______________________________________________________________________________________________
Sub SimpleStringSortReversed(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
Dim Right As Long
Dim Left As Long
If Lower < Upper Then
Left = Lower + 1
Right = Upper + 1
Do While Left < Right
If StrComp(Data(Left), Data(Lower), iCaseSensitive) = 1 Then
Left = Left + 1
Else
Right = Right - 1
SwapElements(Data(), Left, Right)
End If
Loop
Left = Left - 1
SwapElements(Data(), Lower, Left)
SimpleStringSortReversed(Data(), iCaseSensitive, Lower, Left - 1)
SimpleStringSortReversed(Data(), iCaseSensitive, Right, Upper)
End If
End Sub
'_______________________________________________________________________________________________
Sub NaturalStringSort(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
Dim Right As Long
Dim Left As Long
If Lower < Upper Then
Left = Lower + 1
Right = Upper + 1
Do While Left < Right
If NaturalStrComp(Data(Left), Data(Lower), iCaseSensitive) <= 0 Then
Left = Left + 1
Else
Right = Right - 1
SwapElements(Data(), Left, Right)
End If
Loop
Left = Left - 1
SwapElements(Data(), Lower, Left)
NaturalStringSort(Data(), iCaseSensitive, Lower, Left - 1)
NaturalStringSort(Data(), iCaseSensitive, Right, Upper)
End If
End Sub
'_______________________________________________________________________________________________
Sub NaturalStringSortReversed(Data(), iCaseSensitive As Integer, ByVal Lower As Long, ByVal Upper As Long)
Dim Right As Long
Dim Left As Long
If Lower < Upper Then
Left = Lower + 1
Right = Upper + 1
Do While Left < Right
If NaturalStrComp(Data(Left), Data(Lower), iCaseSensitive) = 1 Then
Left = Left + 1
Else
Right = Right - 1
SwapElements(Data(), Left, Right)
End If
Loop
Left = Left - 1
SwapElements(Data(), Lower, Left)
NaturalStringSortReversed(Data(), iCaseSensitive, Lower, Left - 1)
NaturalStringSortReversed(Data(), iCaseSensitive, Right, Upper)
End If
End Sub
'_______________________________________________________________________________________________
Function NaturalStrComp(sText1 As String, sText2 As String, iCaseSensitive As Integer) As Integer
Dim sLocText1 As String
Dim sLocText2 As String
Dim mNumList1()
Dim mNumList2()
Dim iNum1 As Integer
Dim iNum2 As Integer
Dim I As Integer
Dim iResult As Integer
'replace numbers with a placeholder
sLocText1 = RegExpStrReplace(sText1, "[0-9]+", "#")
sLocText2 = RegExpStrReplace(sText2, "[0-9]+", "#")
'if the two strings are equal we will evaluate the numbers
If StrComp(sLocText1, sLocText2, iCaseSensitive) = 0 Then
'estract numbers from strings
sLocText1 = RegExpStrReplace(sText1, "[^0-9]+",Chr(0))
sLocText2 = RegExpStrReplace(sText2, "[^0-9]+",Chr(0))
mNumList1() = Split(sLocText1,Chr(0))
mNumList2() = Split(sLocText2,Chr(0))
'note that the two lists have the same number of elements
For I = LBound(mNumList1()) To UBound(mNumList1())
iNum1 = mNumList1(I)
iNum2 = mNumList2(I)
Select Case iNum1
Case Is = iNum2
iResult = 0
Case Is > iNum2
iResult = 1
Exit For
Case Is < iNum2
iResult = -1
Exit For
End Select
Next I
Else
'evaluate strings
iResult = StrComp(sText1, sText2, iCaseSensitive)
End if
NaturalStrComp = iResult
End Function
'_______________________________________________________________________________________________
Function RegExpStrReplace(ByVal sText As String, sSearchRegExp As String, sReplace As String) As String
'Notice:
'in general, this function should allow to set the flag "CaseSensitive".
'Since in this implementation this flag would never be used, in order to preserve
'performances the flag and relative code has been removed
Static oTextSearch As Object
Static oSearchOpts As Object
Dim oResult As Object
Dim iStartPos As Integer
Dim iTextLen As Integer
Dim iMatchStartPos As Integer
Dim iMatchEndPos As Integer
Dim iMatchLen As Integer
Dim sMatchString As String
Dim sLocText As String
'initialize the service only if needed
If IsNull(oTextSearch) Then
oTextSearch = CreateUnoService("{@see com.sun.star.util.TextSearch}")
oSearchOpts = CreateUnoStruct("{@see com.sun.star.util.SearchOptions}")
With oSearchOpts
.searchFlag = {@see com.sun.star.util.SearchFlags:REG_EXTENDED}
.algorithmType = {@see com.sun.star.util.SearchAlgorithms:REGEXP}
End With
End if
oSearchOpts.searchString = sSearchRegExp
oTextSearch.setOptions(oSearchOpts)
'do a first search
oResult = oTextSearch.searchForward(sText, iStartPos, Len(sText) )
'iterate the S&R for all occourrences found
Do While oResult.subRegExpressions > 0
iMatchStartPos = oResult.startOffset(0)
iMatchEndPos = oResult.endOffset(0)
iMatchLen = iMatchEndPos - iMatchStartPos
'replace the match in the copy of the original text
sLocText = Left(sText, iMatchStartPos)
sLocText = sLocText & sReplace
sLocText = sLocText & Right(sText, Len(sText) - iMatchEndPos)
sText = sLocText
'next search will start after the current replaced match
iStartPos = iMatchStartPos + Len(sReplace)
'do the next search
oResult = oTextSearch.searchForward(sText, iStartPos, Len(sText) )
Loop
RegExpStrReplace = sText
End Function
'_______________________________________________________________________________________________
Sub SwapElements(Data(), I As Long, J As Long)
Dim vTemp As Variant
vTemp = Data(I)
Data(I) = Data(J)
Data(J) = vTemp
End Sub