Processing huge files
#1
I saw the discussion in MemFile System about fast fileread
That all works fine up to 2GB files

I sometimes have to process huge (100GB+) csv files.
Therefore I created this reader (2x slower but unlimited size):
Code: (Select All)
t! = Timer
recs~&& = processBigFile("20.csv", Chr$(10))
Print "Done"; " in"; (Timer - t!); "seconds"
End

Function processBigFile~&& (ifile$, eol$)
  Const BLOCKSIZE = 4 * 1024 * 1024 'on average 4MB blocks seems fastest
  Dim block As String * BLOCKSIZE
  filenum% = FreeFile
  Open ifile$ For Random Access Read As filenum% Len = Len(block)
  blocks~& = .5 + LOF(filenum%) / Len(block)
  buf$ = "": recs~&& = 0: bufpos~& = 0
  $Checking:Off
  For blck~& = 1 To blocks~&
    Get filenum%, blck~&, block: buf$ = Mid$(buf$, bufpos~&) + block
    bufpos~& = 1: endline~& = InStr(bufpos~&, buf$, eol$)
    Do While endline~& >= bufpos~&
      recs~&& = recs~&& + 1
      lin$ = Mid$(buf$, bufpos~&, endline~& - bufpos~&)
      processLine lin$
      bufpos~& = endline~& + Len(eol$): endline~& = InStr(bufpos~&, buf$, eol$)
    Loop
    Locate , 1, 0: Print recs~&&;
  Next blck~&
  Print
  $Checking:On
  buf$ = "": Close
  processBigFile~&& = recs~&&
End Function

Sub processLine (lin$)
  ' do something with lin$
  'f3$ = CSV.field$(lin$, 3)
End Sub

Function CSV.field$ (lin$, n%)
  Const MAXFIELDS = 100
  Static rec$, fld$(1 To MAXFIELDS)
  If rec$ <> lin$ Then
    rec$ = lin$
    cf% = 0: q% = 0: i0% = 0: ll% = Len(rec$)
    For i% = 1 To ll%
      cc% = Asc(Mid$(rec$, i%, 1))
      If cc% = 13 Or cc% = 10 Then
        Exit For
      ElseIf cc% = 34 Then '34 = "
        q% = 1 - q%
      ElseIf cc% = 44 And q% = 0 Then '44 = ,
        cf% = cf% + 1: fld$(cf%) = Mid$(rec$, i0%, i% - i0%)
        i0% = i% + 1
      End If
    Next i%
    cf% = cf% + 1: fld$(cf%) = Mid$(rec$, i0%, i% - i0%)
  End If
  CSV.field$ = fld$(n%)
End Function
45y and 2M lines of MBASIC>BASICA>QBASIC>QBX>QB64 experience
Reply


Messages In This Thread
Processing huge files - by mdijkens - 08-22-2022, 05:08 PM
RE: Processing huge files - by mnrvovrfc - 08-24-2022, 10:13 PM
RE: Processing huge files - by mdijkens - 08-24-2022, 10:44 PM
RE: Processing huge files - by mdijkens - 08-24-2022, 10:38 PM



Users browsing this thread: 2 Guest(s)