ssscoring.flysight

Functions and logic for detecting, validating and manipulating FlySight CSV files, including detection in the file system. The functions in this module assume that a data lake exists somewhere in the file system (whether local or cloud-based).

  1# See: https://github.com/pr3d4t0r/SSScoring/blob/master/LICENSE.txt
  2
  3"""
  4Functions and logic for detecting, validating and manipulating
  5FlySight CSV files, including detection in the file system.  The functions in
  6this module assume that a data lake exists somewhere in the file system (whether
  7local or cloud-based).
  8"""
  9
 10
 11from collections import OrderedDict
 12from io import StringIO
 13from pathlib import Path
 14
 15from ssscoring.constants import FLYSIGHT_1_HEADER
 16from ssscoring.constants import FLYSIGHT_2_HEADER
 17from ssscoring.constants import FLYSIGHT_FILE_ENCODING
 18from ssscoring.constants import IGNORE_LIST
 19from ssscoring.constants import MIN_JUMP_FILE_SIZE
 20from ssscoring.datatypes import FlySightVersion
 21from ssscoring.errors import SSScoringError
 22
 23import csv
 24import os
 25import shutil
 26import tempfile
 27
 28import pandas as pd
 29
 30
 31# +++ functions +++
 32
 33def isCRMangledCSV(fileThing) -> bool:
 34    """
 35    Tests if `fileThing` is an Excel or Dropbox DOS file with lines terminated
 36    in CRCRLF.  These occur when someone opens the file with Excel or some other
 37    tool in a Windows system and saves the file back to the file system,
 38    mangling the original format.
 39
 40    Arguments
 41    ---------
 42        fileThing
 43    A string or `pathlib.Path` object associated with what looks like a FlySight
 44    CR mangled file.
 45
 46    Returns
 47    -------
 48    `True` if the file has one or more lines ending in CRCRLF within the first
 49    512 bytes of data.
 50    """
 51    with open (fileThing, 'rb') as file:
 52        rawData = file.read()
 53        return b'\r\r\n' in rawData
 54
 55
 56def fixCRMangledCSV(fileThing):
 57    """
 58    Open the file associated with `fileThing` and repleace all`\r\r\b` with
 59    `\r\n` EOL markers.
 60
 61    Arguments
 62    ---------
 63        fileThing
 64    A string or `pathlib.Path` object associated with what looks like a FlySight
 65    CR mangled file.
 66
 67    See
 68    ---
 69    `ssscoring.flysight.isCRMangledCSV`
 70    """
 71    with open(fileThing, 'rb') as inputFile:
 72        fileContents = inputFile.read()
 73    fileContents = fileContents.replace(b'\r\r\n', b'\r\n')
 74    with tempfile.NamedTemporaryFile(delete = False) as outputFile:
 75        outputFile.write(fileContents)
 76        tempFileName = outputFile.name
 77    shutil.copy(tempFileName, fileThing)
 78    os.unlink(tempFileName)
 79
 80
 81def skipOverFS2MetadataRowsIn(data: pd.DataFrame) -> pd.DataFrame:
 82    """
 83    Returns a clean dataframe on which any metadata rows within the first 100
 84    are skipped.  This function uses the `time` column to detect valid rows.  A
 85    `time == NaN` is considered invalid and skipped.
 86
 87    Arguments
 88    ---------
 89        data
 90    A FlySight 2 dataframe suspected of having dirty N first rows with metadata
 91
 92    Returns
 93    -------
 94    A FlySight 2 clean dataframe without any leading metadata rows.
 95    """
 96    for ref in range(0,100):
 97        if pd.notnull(data.iloc[ref].time):
 98            break
 99    return data.iloc[ref:]
100
101
102def validFlySightHeaderIn(fileThingCSV) -> bool:
103    """
104    Checks if a file is a CSV in FlySight 1 or FlySight 2 formats.  The checks
105    include:
106
107    - Whether the file is a CSV, using a comma delimiter
108    - Checks for the presence of all the documented FlySight 1 headers
109    - Checks for the presence of the FlySight 2 line 1 identifier
110
111    Arguments
112    ---------
113        fileThingCSV
114    A file thing to verify as a valid FlySight file; can be a string, an
115    instance of `libpath.Path`, or a buffer of `bytes`.
116
117    Returns
118    -------
119    `True` if `fileThingCSV` is a FlySight CSV file, otherwise `False`.
120    """
121    delimiters = [',']
122
123    if isinstance(fileThingCSV, bytes):
124        stream = StringIO(fileThingCSV.decode(FLYSIGHT_FILE_ENCODING))
125    else:
126        stream = open(fileThingCSV, 'r')
127
128    with stream:
129        try:
130            dialect = csv.Sniffer().sniff(stream.readline(), delimiters=delimiters)
131        except csv.Error:
132            return False
133
134        if dialect.delimiter not in delimiters:
135            return False
136        stream.seek(0)
137        try:
138            header = next(csv.reader(stream))
139        except StopIteration:
140            return False
141    return header[0] == '$FLYS' or FLYSIGHT_1_HEADER.issubset(header)
142
143
144def getAllSpeedJumpFilesFrom(dataLake: Path) -> dict:
145    """
146    Get a list of all the speed jump files from a data lake, where data lake is
147    defined as a reachable path that contains one or more FlySight CSV files.
148    This function tests each file to ensure that it's a speed skydive FlySight
149    file in a valid format and length.  It doesn't validate data like versions
150    prior to 1.9.0.
151
152    Arguments
153    ---------
154        dataLake: str
155    A valid (absolute or relative) path name to the top level directory where
156    the data lake starts.
157
158    Returns
159    -------
160    A dictionary of speed jump file names for later SSScoring processing:
161        - keys are the file names
162        - values are a FlySight version string tag
163    """
164    jumpFiles = OrderedDict()
165    for root, dirs, files in os.walk(dataLake):
166        if any(name in root for name in IGNORE_LIST):
167            continue
168        for fileName in files:
169            data = None
170            if '.swp' in fileName: # Ignore Vim, other editors swap file
171                continue
172            if '.CSV' in fileName.upper():
173                version = '1'
174                jumpFileName = Path(root) / fileName
175                stat = os.stat(jumpFileName)
176                if all(x not in fileName for x in ('EVENT', 'SENSOR', 'TRACK')):
177                    # FlySight 1 track format
178                    data = pd.read_csv(jumpFileName, skiprows = (1, 1), index_col = False)
179                elif 'TRACK' in fileName:
180                    # FlySight 2 track custom format
181                    data = pd.read_csv(jumpFileName, names = FLYSIGHT_2_HEADER, skiprows = 6, index_col = False, na_values = ['NA', ])
182                    data = skipOverFS2MetadataRowsIn(data)
183                    data.drop('GNSS', inplace = True, axis = 1)
184                    version = '2'
185                if data is not None and stat.st_size >= MIN_JUMP_FILE_SIZE and validFlySightHeaderIn(jumpFileName):
186                    # explicit because `not data` is ambiguous for dataframes
187                    jumpFiles[jumpFileName] = version
188    jumpFiles = OrderedDict(sorted(jumpFiles.items()))
189    return jumpFiles
190
191
192def detectFlySightFileVersionOf(fileThing) -> FlySightVersion:
193    """
194    Detects the FlySight file version based on its file name and format.
195
196    Arguments
197    ---------
198        fileThing
199    A string, `bytes` buffer or `pathlib.Path` object corresponding to track
200    file.  If string or `pathlib.Path`, it'll be treated as a file.
201
202    Returns
203    -------
204    An instance of `ssscoring.flysight.FlySightVersion` with a valid version
205    symbolic value.
206
207    Errors
208    ------
209    `ssscoring.errors.SSScoringError` if the file is not a CSV and it's some
210    other invalid format.
211    """
212    match fileThing:
213        case Path():
214            fileName = fileThing.as_posix()
215        case str():
216            fileName = fileThing
217            fileThing = Path(fileThing)
218        case bytes():
219            fileName = '00-00-00.CSV'
220        case _:
221            raise SSScoringError('fileThing must be a Path, str, or bytes')
222
223    delimiters =  [',', ]
224    stream = None
225    if not '.CSV' in fileName.upper():
226        raise SSScoringError('Invalid file extension type')
227    if any(x in fileName for x in ('EVENT.CSV', 'SENSOR.CSV')):
228        raise SSScoringError('Only TRACK.CSV v2 files can be processed at this time')
229    if isinstance(fileThing, Path) or isinstance(fileThing, str):
230        if not fileThing.is_file():
231            raise SSScoringError('%s - file not found in data lake' % fileName)
232        if not validFlySightHeaderIn(fileName):
233            raise SSScoringError('CSV is not a valid FlySight file')
234        stream = open(fileName, 'r')
235    elif isinstance(fileThing, bytes):
236        stream = StringIO(fileThing.decode(FLYSIGHT_FILE_ENCODING))
237
238    try:
239        dialect = csv.Sniffer().sniff(stream.readline(), delimiters = delimiters)
240    except:
241        raise SSScoringError('Error while trying to validate %s file format' % fileName)
242    if dialect.delimiter in delimiters:
243        stream.seek(0)
244        header = next(csv.reader(stream))
245    else:
246        raise SSScoringError('CSV uses a different delimiter from FlySigh')
247    if header[0] == '$FLYS':
248        return FlySightVersion.V2
249    elif FLYSIGHT_1_HEADER.issubset(header):
250        return FlySightVersion.V1
251    else:
252        raise SSScoringError('%s file is not a FlySight v1 or v2 file')
253
254
255def readVersion1CSV(fileThing: object) -> pd.DataFrame:
256    """
257    Read a FlySight file version 1 into a dataframe.  It scrubes blank rows that
258    get in the way of correct parsing.
259
260    Arguments
261    ---------
262        fileThing
263    A string or a `pathlib.Path` object.  It can be a relative or an absolute
264    path.
265
266    Returns
267    -------
268    A FlySight dataframe with the original column names, normalized for
269    manipulation as a dataframe instead of a file or CSV object.
270    """
271    return pd.read_csv(fileThing, skiprows = (1, 1), index_col = False)
272
273
274def _tagVersion1From(fileThing: str) -> str:
275    return fileThing.replace('.CSV', '').replace('.csv', '').replace('/data', '').replace('/', ' ').strip()+':v1'
276
277
278def _tagVersion2From(fileThing: str) -> str:
279    if '/' in fileThing:
280        return fileThing.split('/')[-2]+':v2'
281    else:
282        return fileThing.replace('.CSV', '').replace('.csv', '')+':v2'
283
284
285def readVersion2CSV(jumpFile: str) -> pd.DataFrame:
286    """
287    Read a FlySight file version 2 into a dataframe.  It scrubes blank rows that
288    get in the way of correct parsing and drops the `GNSS` column because it
289    just makes dataframe management murkier.
290
291    Arguments
292    ---------
293        fileThing
294    A string or a `pathlib.Path` object.  It can be a relative or an absolute
295    path.
296
297    Returns
298    -------
299    A FlySight dataframe with the original column names, normalized for
300    manipulation as a dataframe instead of a file or CSV object.
301    """
302
303    rawData = pd.read_csv(jumpFile, names = FLYSIGHT_2_HEADER, skiprows = 6, index_col = False, na_values=['NA',])
304    rawData = skipOverFS2MetadataRowsIn(rawData)
305    rawData.drop('GNSS', inplace = True, axis = 1)
306    return rawData
307
308
309def getFlySightDataFromCSVBuffer(buffer:bytes, bufferName:str) -> tuple:
310    """
311    Ingress a buffer with known FlySight or SkyTrax file data for SSScoring
312    processing.
313
314    Arguments
315    ---------
316        buffer
317    A binary data buffer, bag of bytes, containing a known FlySight track file.
318
319        bufferName
320    An arbitrary name for the buffer of type `str`.  It's used for constructing
321    the full buffer tag value for human identification.
322
323    Returns
324    -------
325    A `tuple` with two items:
326        - `rawData` - a dataframe representation of the CSV with the original
327          headers but without the data type header
328        - `tag` - a string with an identifying tag derived from the path name
329          and file version in the form `some name:vX`.  It uses the current
330          path as metadata to infer the name.  There's no semantics enforcement.
331
332    Raises
333    ------
334    `SSScoringError` if the CSV file is invalid in any way.
335    """
336    if not isinstance(buffer, bytes):
337        raise SSScoringError('buffer must be an instance of bytes, a bytes buffer')
338    try:
339        stringIO = StringIO(buffer.decode(FLYSIGHT_FILE_ENCODING))
340    except Exception as e:
341        raise SSScoringError('invalid buffer endcoding - %s' % str(e))
342    try:
343        version = detectFlySightFileVersionOf(buffer)
344    except Exception:
345        tag = '%s:INVALID' % bufferName
346        rawData = None
347    else:
348        if version == FlySightVersion.V1:
349            rawData = readVersion1CSV(stringIO)
350            tag = _tagVersion1From(bufferName)
351        elif version == FlySightVersion.V2:
352            rawData = readVersion2CSV(stringIO)
353            tag = _tagVersion2From(bufferName)
354    return (rawData, tag)
355
356
357def getFlySightDataFromCSVFileName(jumpFile) -> tuple:
358    """
359    Ingress a known FlySight or SkyTrax file into memory for SSScoring
360    processing.
361
362    Arguments
363    ---------
364        jumpFile
365    A string or `pathlib.Path` object; can be a relative or an asbolute path.
366
367    Returns
368    -------
369    A `tuple` with two items:
370        - `rawData` - a dataframe representation of the CSV with the original
371          headers but without the data type header
372        - `tag` - a string with an identifying tag derived from the path name
373          and file version in the form `some name:vX`.  It uses the current
374          path as metadata to infer the name.  There's no semantics enforcement.
375
376    Raises
377    ------
378    `SSScoringError` if the CSV file is invalid in any way.
379    """
380    if isinstance(jumpFile, Path):
381        jumpFile = jumpFile.as_posix()
382    elif isinstance(jumpFile, str):
383        pass
384    else:
385        raise SSScoringError('jumpFile must be a string or a Path object')
386    if not validFlySightHeaderIn(jumpFile):
387        raise SSScoringError('%s is an invalid speed skydiving file')
388    try:
389        version = detectFlySightFileVersionOf(jumpFile)
390    except Exception:
391        tag = 'NA'
392        rawData = None
393    else:
394        if version == FlySightVersion.V1:
395            rawData = readVersion1CSV(jumpFile)
396            tag = _tagVersion1From(jumpFile)
397        elif version == FlySightVersion.V2:
398            rawData = readVersion2CSV(jumpFile)
399            tag = _tagVersion2From(jumpFile)
400    return (rawData, tag)
def isCRMangledCSV(fileThing) -> bool:
34def isCRMangledCSV(fileThing) -> bool:
35    """
36    Tests if `fileThing` is an Excel or Dropbox DOS file with lines terminated
37    in CRCRLF.  These occur when someone opens the file with Excel or some other
38    tool in a Windows system and saves the file back to the file system,
39    mangling the original format.
40
41    Arguments
42    ---------
43        fileThing
44    A string or `pathlib.Path` object associated with what looks like a FlySight
45    CR mangled file.
46
47    Returns
48    -------
49    `True` if the file has one or more lines ending in CRCRLF within the first
50    512 bytes of data.
51    """
52    with open (fileThing, 'rb') as file:
53        rawData = file.read()
54        return b'\r\r\n' in rawData

Tests if fileThing is an Excel or Dropbox DOS file with lines terminated in CRCRLF. These occur when someone opens the file with Excel or some other tool in a Windows system and saves the file back to the file system, mangling the original format.

Arguments

fileThing

A string or pathlib.Path object associated with what looks like a FlySight CR mangled file.

Returns

True if the file has one or more lines ending in CRCRLF within the first 512 bytes of data.

def fixCRMangledCSV(fileThing):
57def fixCRMangledCSV(fileThing):
58    """
59    Open the file associated with `fileThing` and repleace all`\r\r\b` with
60    `\r\n` EOL markers.
61
62    Arguments
63    ---------
64        fileThing
65    A string or `pathlib.Path` object associated with what looks like a FlySight
66    CR mangled file.
67
68    See
69    ---
70    `ssscoring.flysight.isCRMangledCSV`
71    """
72    with open(fileThing, 'rb') as inputFile:
73        fileContents = inputFile.read()
74    fileContents = fileContents.replace(b'\r\r\n', b'\r\n')
75    with tempfile.NamedTemporaryFile(delete = False) as outputFile:
76        outputFile.write(fileContents)
77        tempFileName = outputFile.name
78    shutil.copy(tempFileName, fileThing)
79    os.unlink(tempFileName)

Open the file associated with fileThing and repleace all`

with ` EOL markers.

Arguments
---------
    fileThing
A string or `pathlib.Path` object associated with what looks like a FlySight
CR mangled file.

See
---
`ssscoring.flysight.isCRMangledCSV`
def skipOverFS2MetadataRowsIn(data: pandas.DataFrame) -> pandas.DataFrame:
 82def skipOverFS2MetadataRowsIn(data: pd.DataFrame) -> pd.DataFrame:
 83    """
 84    Returns a clean dataframe on which any metadata rows within the first 100
 85    are skipped.  This function uses the `time` column to detect valid rows.  A
 86    `time == NaN` is considered invalid and skipped.
 87
 88    Arguments
 89    ---------
 90        data
 91    A FlySight 2 dataframe suspected of having dirty N first rows with metadata
 92
 93    Returns
 94    -------
 95    A FlySight 2 clean dataframe without any leading metadata rows.
 96    """
 97    for ref in range(0,100):
 98        if pd.notnull(data.iloc[ref].time):
 99            break
100    return data.iloc[ref:]

Returns a clean dataframe on which any metadata rows within the first 100 are skipped. This function uses the time column to detect valid rows. A time == NaN is considered invalid and skipped.

Arguments

data

A FlySight 2 dataframe suspected of having dirty N first rows with metadata

Returns

A FlySight 2 clean dataframe without any leading metadata rows.

def validFlySightHeaderIn(fileThingCSV) -> bool:
103def validFlySightHeaderIn(fileThingCSV) -> bool:
104    """
105    Checks if a file is a CSV in FlySight 1 or FlySight 2 formats.  The checks
106    include:
107
108    - Whether the file is a CSV, using a comma delimiter
109    - Checks for the presence of all the documented FlySight 1 headers
110    - Checks for the presence of the FlySight 2 line 1 identifier
111
112    Arguments
113    ---------
114        fileThingCSV
115    A file thing to verify as a valid FlySight file; can be a string, an
116    instance of `libpath.Path`, or a buffer of `bytes`.
117
118    Returns
119    -------
120    `True` if `fileThingCSV` is a FlySight CSV file, otherwise `False`.
121    """
122    delimiters = [',']
123
124    if isinstance(fileThingCSV, bytes):
125        stream = StringIO(fileThingCSV.decode(FLYSIGHT_FILE_ENCODING))
126    else:
127        stream = open(fileThingCSV, 'r')
128
129    with stream:
130        try:
131            dialect = csv.Sniffer().sniff(stream.readline(), delimiters=delimiters)
132        except csv.Error:
133            return False
134
135        if dialect.delimiter not in delimiters:
136            return False
137        stream.seek(0)
138        try:
139            header = next(csv.reader(stream))
140        except StopIteration:
141            return False
142    return header[0] == '$FLYS' or FLYSIGHT_1_HEADER.issubset(header)

Checks if a file is a CSV in FlySight 1 or FlySight 2 formats. The checks include:

  • Whether the file is a CSV, using a comma delimiter
  • Checks for the presence of all the documented FlySight 1 headers
  • Checks for the presence of the FlySight 2 line 1 identifier

Arguments

fileThingCSV

A file thing to verify as a valid FlySight file; can be a string, an instance of libpath.Path, or a buffer of bytes.

Returns

True if fileThingCSV is a FlySight CSV file, otherwise False.

def getAllSpeedJumpFilesFrom(dataLake: pathlib.Path) -> dict:
145def getAllSpeedJumpFilesFrom(dataLake: Path) -> dict:
146    """
147    Get a list of all the speed jump files from a data lake, where data lake is
148    defined as a reachable path that contains one or more FlySight CSV files.
149    This function tests each file to ensure that it's a speed skydive FlySight
150    file in a valid format and length.  It doesn't validate data like versions
151    prior to 1.9.0.
152
153    Arguments
154    ---------
155        dataLake: str
156    A valid (absolute or relative) path name to the top level directory where
157    the data lake starts.
158
159    Returns
160    -------
161    A dictionary of speed jump file names for later SSScoring processing:
162        - keys are the file names
163        - values are a FlySight version string tag
164    """
165    jumpFiles = OrderedDict()
166    for root, dirs, files in os.walk(dataLake):
167        if any(name in root for name in IGNORE_LIST):
168            continue
169        for fileName in files:
170            data = None
171            if '.swp' in fileName: # Ignore Vim, other editors swap file
172                continue
173            if '.CSV' in fileName.upper():
174                version = '1'
175                jumpFileName = Path(root) / fileName
176                stat = os.stat(jumpFileName)
177                if all(x not in fileName for x in ('EVENT', 'SENSOR', 'TRACK')):
178                    # FlySight 1 track format
179                    data = pd.read_csv(jumpFileName, skiprows = (1, 1), index_col = False)
180                elif 'TRACK' in fileName:
181                    # FlySight 2 track custom format
182                    data = pd.read_csv(jumpFileName, names = FLYSIGHT_2_HEADER, skiprows = 6, index_col = False, na_values = ['NA', ])
183                    data = skipOverFS2MetadataRowsIn(data)
184                    data.drop('GNSS', inplace = True, axis = 1)
185                    version = '2'
186                if data is not None and stat.st_size >= MIN_JUMP_FILE_SIZE and validFlySightHeaderIn(jumpFileName):
187                    # explicit because `not data` is ambiguous for dataframes
188                    jumpFiles[jumpFileName] = version
189    jumpFiles = OrderedDict(sorted(jumpFiles.items()))
190    return jumpFiles

Get a list of all the speed jump files from a data lake, where data lake is defined as a reachable path that contains one or more FlySight CSV files. This function tests each file to ensure that it's a speed skydive FlySight file in a valid format and length. It doesn't validate data like versions prior to 1.9.0.

Arguments

dataLake: str

A valid (absolute or relative) path name to the top level directory where the data lake starts.

Returns

A dictionary of speed jump file names for later SSScoring processing: - keys are the file names - values are a FlySight version string tag

def detectFlySightFileVersionOf(fileThing) -> ssscoring.datatypes.FlySightVersion:
193def detectFlySightFileVersionOf(fileThing) -> FlySightVersion:
194    """
195    Detects the FlySight file version based on its file name and format.
196
197    Arguments
198    ---------
199        fileThing
200    A string, `bytes` buffer or `pathlib.Path` object corresponding to track
201    file.  If string or `pathlib.Path`, it'll be treated as a file.
202
203    Returns
204    -------
205    An instance of `ssscoring.flysight.FlySightVersion` with a valid version
206    symbolic value.
207
208    Errors
209    ------
210    `ssscoring.errors.SSScoringError` if the file is not a CSV and it's some
211    other invalid format.
212    """
213    match fileThing:
214        case Path():
215            fileName = fileThing.as_posix()
216        case str():
217            fileName = fileThing
218            fileThing = Path(fileThing)
219        case bytes():
220            fileName = '00-00-00.CSV'
221        case _:
222            raise SSScoringError('fileThing must be a Path, str, or bytes')
223
224    delimiters =  [',', ]
225    stream = None
226    if not '.CSV' in fileName.upper():
227        raise SSScoringError('Invalid file extension type')
228    if any(x in fileName for x in ('EVENT.CSV', 'SENSOR.CSV')):
229        raise SSScoringError('Only TRACK.CSV v2 files can be processed at this time')
230    if isinstance(fileThing, Path) or isinstance(fileThing, str):
231        if not fileThing.is_file():
232            raise SSScoringError('%s - file not found in data lake' % fileName)
233        if not validFlySightHeaderIn(fileName):
234            raise SSScoringError('CSV is not a valid FlySight file')
235        stream = open(fileName, 'r')
236    elif isinstance(fileThing, bytes):
237        stream = StringIO(fileThing.decode(FLYSIGHT_FILE_ENCODING))
238
239    try:
240        dialect = csv.Sniffer().sniff(stream.readline(), delimiters = delimiters)
241    except:
242        raise SSScoringError('Error while trying to validate %s file format' % fileName)
243    if dialect.delimiter in delimiters:
244        stream.seek(0)
245        header = next(csv.reader(stream))
246    else:
247        raise SSScoringError('CSV uses a different delimiter from FlySigh')
248    if header[0] == '$FLYS':
249        return FlySightVersion.V2
250    elif FLYSIGHT_1_HEADER.issubset(header):
251        return FlySightVersion.V1
252    else:
253        raise SSScoringError('%s file is not a FlySight v1 or v2 file')

Detects the FlySight file version based on its file name and format.

Arguments

fileThing

A string, bytes buffer or pathlib.Path object corresponding to track file. If string or pathlib.Path, it'll be treated as a file.

Returns

An instance of ssscoring.flysight.FlySightVersion with a valid version symbolic value.

Errors

ssscoring.errors.SSScoringError if the file is not a CSV and it's some other invalid format.

def readVersion1CSV(fileThing: object) -> pandas.DataFrame:
256def readVersion1CSV(fileThing: object) -> pd.DataFrame:
257    """
258    Read a FlySight file version 1 into a dataframe.  It scrubes blank rows that
259    get in the way of correct parsing.
260
261    Arguments
262    ---------
263        fileThing
264    A string or a `pathlib.Path` object.  It can be a relative or an absolute
265    path.
266
267    Returns
268    -------
269    A FlySight dataframe with the original column names, normalized for
270    manipulation as a dataframe instead of a file or CSV object.
271    """
272    return pd.read_csv(fileThing, skiprows = (1, 1), index_col = False)

Read a FlySight file version 1 into a dataframe. It scrubes blank rows that get in the way of correct parsing.

Arguments

fileThing

A string or a pathlib.Path object. It can be a relative or an absolute path.

Returns

A FlySight dataframe with the original column names, normalized for manipulation as a dataframe instead of a file or CSV object.

def readVersion2CSV(jumpFile: str) -> pandas.DataFrame:
286def readVersion2CSV(jumpFile: str) -> pd.DataFrame:
287    """
288    Read a FlySight file version 2 into a dataframe.  It scrubes blank rows that
289    get in the way of correct parsing and drops the `GNSS` column because it
290    just makes dataframe management murkier.
291
292    Arguments
293    ---------
294        fileThing
295    A string or a `pathlib.Path` object.  It can be a relative or an absolute
296    path.
297
298    Returns
299    -------
300    A FlySight dataframe with the original column names, normalized for
301    manipulation as a dataframe instead of a file or CSV object.
302    """
303
304    rawData = pd.read_csv(jumpFile, names = FLYSIGHT_2_HEADER, skiprows = 6, index_col = False, na_values=['NA',])
305    rawData = skipOverFS2MetadataRowsIn(rawData)
306    rawData.drop('GNSS', inplace = True, axis = 1)
307    return rawData

Read a FlySight file version 2 into a dataframe. It scrubes blank rows that get in the way of correct parsing and drops the GNSS column because it just makes dataframe management murkier.

Arguments

fileThing

A string or a pathlib.Path object. It can be a relative or an absolute path.

Returns

A FlySight dataframe with the original column names, normalized for manipulation as a dataframe instead of a file or CSV object.

def getFlySightDataFromCSVBuffer(buffer: bytes, bufferName: str) -> tuple:
310def getFlySightDataFromCSVBuffer(buffer:bytes, bufferName:str) -> tuple:
311    """
312    Ingress a buffer with known FlySight or SkyTrax file data for SSScoring
313    processing.
314
315    Arguments
316    ---------
317        buffer
318    A binary data buffer, bag of bytes, containing a known FlySight track file.
319
320        bufferName
321    An arbitrary name for the buffer of type `str`.  It's used for constructing
322    the full buffer tag value for human identification.
323
324    Returns
325    -------
326    A `tuple` with two items:
327        - `rawData` - a dataframe representation of the CSV with the original
328          headers but without the data type header
329        - `tag` - a string with an identifying tag derived from the path name
330          and file version in the form `some name:vX`.  It uses the current
331          path as metadata to infer the name.  There's no semantics enforcement.
332
333    Raises
334    ------
335    `SSScoringError` if the CSV file is invalid in any way.
336    """
337    if not isinstance(buffer, bytes):
338        raise SSScoringError('buffer must be an instance of bytes, a bytes buffer')
339    try:
340        stringIO = StringIO(buffer.decode(FLYSIGHT_FILE_ENCODING))
341    except Exception as e:
342        raise SSScoringError('invalid buffer endcoding - %s' % str(e))
343    try:
344        version = detectFlySightFileVersionOf(buffer)
345    except Exception:
346        tag = '%s:INVALID' % bufferName
347        rawData = None
348    else:
349        if version == FlySightVersion.V1:
350            rawData = readVersion1CSV(stringIO)
351            tag = _tagVersion1From(bufferName)
352        elif version == FlySightVersion.V2:
353            rawData = readVersion2CSV(stringIO)
354            tag = _tagVersion2From(bufferName)
355    return (rawData, tag)

Ingress a buffer with known FlySight or SkyTrax file data for SSScoring processing.

Arguments

buffer

A binary data buffer, bag of bytes, containing a known FlySight track file.

bufferName

An arbitrary name for the buffer of type str. It's used for constructing the full buffer tag value for human identification.

Returns

A tuple with two items: - rawData - a dataframe representation of the CSV with the original headers but without the data type header - tag - a string with an identifying tag derived from the path name and file version in the form some name:vX. It uses the current path as metadata to infer the name. There's no semantics enforcement.

Raises

SSScoringError if the CSV file is invalid in any way.

def getFlySightDataFromCSVFileName(jumpFile) -> tuple:
358def getFlySightDataFromCSVFileName(jumpFile) -> tuple:
359    """
360    Ingress a known FlySight or SkyTrax file into memory for SSScoring
361    processing.
362
363    Arguments
364    ---------
365        jumpFile
366    A string or `pathlib.Path` object; can be a relative or an asbolute path.
367
368    Returns
369    -------
370    A `tuple` with two items:
371        - `rawData` - a dataframe representation of the CSV with the original
372          headers but without the data type header
373        - `tag` - a string with an identifying tag derived from the path name
374          and file version in the form `some name:vX`.  It uses the current
375          path as metadata to infer the name.  There's no semantics enforcement.
376
377    Raises
378    ------
379    `SSScoringError` if the CSV file is invalid in any way.
380    """
381    if isinstance(jumpFile, Path):
382        jumpFile = jumpFile.as_posix()
383    elif isinstance(jumpFile, str):
384        pass
385    else:
386        raise SSScoringError('jumpFile must be a string or a Path object')
387    if not validFlySightHeaderIn(jumpFile):
388        raise SSScoringError('%s is an invalid speed skydiving file')
389    try:
390        version = detectFlySightFileVersionOf(jumpFile)
391    except Exception:
392        tag = 'NA'
393        rawData = None
394    else:
395        if version == FlySightVersion.V1:
396            rawData = readVersion1CSV(jumpFile)
397            tag = _tagVersion1From(jumpFile)
398        elif version == FlySightVersion.V2:
399            rawData = readVersion2CSV(jumpFile)
400            tag = _tagVersion2From(jumpFile)
401    return (rawData, tag)

Ingress a known FlySight or SkyTrax file into memory for SSScoring processing.

Arguments

jumpFile

A string or pathlib.Path object; can be a relative or an asbolute path.

Returns

A tuple with two items: - rawData - a dataframe representation of the CSV with the original headers but without the data type header - tag - a string with an identifying tag derived from the path name and file version in the form some name:vX. It uses the current path as metadata to infer the name. There's no semantics enforcement.

Raises

SSScoringError if the CSV file is invalid in any way.