08-22-2022, 05:08 PM
I saw the discussion in MemFile System about fast fileread
That all works fine up to 2GB files
I sometimes have to process huge (100GB+) csv files.
Therefore I created this reader (2x slower but unlimited size):
That all works fine up to 2GB files
I sometimes have to process huge (100GB+) csv files.
Therefore I created this reader (2x slower but unlimited size):
Code: (Select All)
t! = Timer
recs~&& = processBigFile("20.csv", Chr$(10))
Print "Done"; " in"; (Timer - t!); "seconds"
End
Function processBigFile~&& (ifile$, eol$)
Const BLOCKSIZE = 4 * 1024 * 1024 'on average 4MB blocks seems fastest
Dim block As String * BLOCKSIZE
filenum% = FreeFile
Open ifile$ For Random Access Read As filenum% Len = Len(block)
blocks~& = .5 + LOF(filenum%) / Len(block)
buf$ = "": recs~&& = 0: bufpos~& = 0
$Checking:Off
For blck~& = 1 To blocks~&
Get filenum%, blck~&, block: buf$ = Mid$(buf$, bufpos~&) + block
bufpos~& = 1: endline~& = InStr(bufpos~&, buf$, eol$)
Do While endline~& >= bufpos~&
recs~&& = recs~&& + 1
lin$ = Mid$(buf$, bufpos~&, endline~& - bufpos~&)
processLine lin$
bufpos~& = endline~& + Len(eol$): endline~& = InStr(bufpos~&, buf$, eol$)
Loop
Locate , 1, 0: Print recs~&&;
Next blck~&
Print
$Checking:On
buf$ = "": Close
processBigFile~&& = recs~&&
End Function
Sub processLine (lin$)
' do something with lin$
'f3$ = CSV.field$(lin$, 3)
End Sub
Function CSV.field$ (lin$, n%)
Const MAXFIELDS = 100
Static rec$, fld$(1 To MAXFIELDS)
If rec$ <> lin$ Then
rec$ = lin$
cf% = 0: q% = 0: i0% = 0: ll% = Len(rec$)
For i% = 1 To ll%
cc% = Asc(Mid$(rec$, i%, 1))
If cc% = 13 Or cc% = 10 Then
Exit For
ElseIf cc% = 34 Then '34 = "
q% = 1 - q%
ElseIf cc% = 44 And q% = 0 Then '44 = ,
cf% = cf% + 1: fld$(cf%) = Mid$(rec$, i0%, i% - i0%)
i0% = i% + 1
End If
Next i%
cf% = cf% + 1: fld$(cf%) = Mid$(rec$, i0%, i% - i0%)
End If
CSV.field$ = fld$(n%)
End Function
45y and 2M lines of MBASIC>BASICA>QBASIC>QBX>QB64 experience