ssscoring.flysight
Functions and logic for detecting, validating and manipulating FlySight CSV files, including detection in the file system. The functions in this module assume that a data lake exists somewhere in the file system (whether local or cloud-based).
1# See: https://github.com/pr3d4t0r/SSScoring/blob/master/LICENSE.txt 2 3""" 4Functions and logic for detecting, validating and manipulating 5FlySight CSV files, including detection in the file system. The functions in 6this module assume that a data lake exists somewhere in the file system (whether 7local or cloud-based). 8""" 9 10 11from collections import OrderedDict 12from io import StringIO 13from pathlib import Path 14 15from ssscoring.constants import FLYSIGHT_1_HEADER 16from ssscoring.constants import FLYSIGHT_2_HEADER 17from ssscoring.constants import FLYSIGHT_FILE_ENCODING 18from ssscoring.constants import IGNORE_LIST 19from ssscoring.constants import MIN_JUMP_FILE_SIZE 20from ssscoring.datatypes import FlySightVersion 21from ssscoring.errors import SSScoringError 22 23import csv 24import os 25import shutil 26import tempfile 27 28import pandas as pd 29 30 31# +++ functions +++ 32 33def isCRMangledCSV(fileThing) -> bool: 34 """ 35 Tests if `fileThing` is an Excel or Dropbox DOS file with lines terminated 36 in CRCRLF. These occur when someone opens the file with Excel or some other 37 tool in a Windows system and saves the file back to the file system, 38 mangling the original format. 39 40 Arguments 41 --------- 42 fileThing 43 A string or `pathlib.Path` object associated with what looks like a FlySight 44 CR mangled file. 45 46 Returns 47 ------- 48 `True` if the file has one or more lines ending in CRCRLF within the first 49 512 bytes of data. 50 """ 51 with open (fileThing, 'rb') as file: 52 rawData = file.read() 53 return b'\r\r\n' in rawData 54 55 56def fixCRMangledCSV(fileThing): 57 """ 58 Open the file associated with `fileThing` and repleace all`\r\r\b` with 59 `\r\n` EOL markers. 60 61 Arguments 62 --------- 63 fileThing 64 A string or `pathlib.Path` object associated with what looks like a FlySight 65 CR mangled file. 66 67 See 68 --- 69 `ssscoring.flysight.isCRMangledCSV` 70 """ 71 with open(fileThing, 'rb') as inputFile: 72 fileContents = inputFile.read() 73 fileContents = fileContents.replace(b'\r\r\n', b'\r\n') 74 with tempfile.NamedTemporaryFile(delete = False) as outputFile: 75 outputFile.write(fileContents) 76 tempFileName = outputFile.name 77 shutil.copy(tempFileName, fileThing) 78 os.unlink(tempFileName) 79 80 81def skipOverFS2MetadataRowsIn(data: pd.DataFrame) -> pd.DataFrame: 82 """ 83 Returns a clean dataframe on which any metadata rows within the first 100 84 are skipped. This function uses the `time` column to detect valid rows. A 85 `time == NaN` is considered invalid and skipped. 86 87 Arguments 88 --------- 89 data 90 A FlySight 2 dataframe suspected of having dirty N first rows with metadata 91 92 Returns 93 ------- 94 A FlySight 2 clean dataframe without any leading metadata rows. 95 """ 96 for ref in range(0,100): 97 if pd.notnull(data.iloc[ref].time): 98 break 99 return data.iloc[ref:] 100 101 102def validFlySightHeaderIn(fileThingCSV) -> bool: 103 """ 104 Checks if a file is a CSV in FlySight 1 or FlySight 2 formats. The checks 105 include: 106 107 - Whether the file is a CSV, using a comma delimiter 108 - Checks for the presence of all the documented FlySight 1 headers 109 - Checks for the presence of the FlySight 2 line 1 identifier 110 111 Arguments 112 --------- 113 fileThingCSV 114 A file thing to verify as a valid FlySight file; can be a string, an 115 instance of `libpath.Path`, or a buffer of `bytes`. 116 117 Returns 118 ------- 119 `True` if `fileThingCSV` is a FlySight CSV file, otherwise `False`. 120 """ 121 delimiters = [','] 122 123 if isinstance(fileThingCSV, bytes): 124 stream = StringIO(fileThingCSV.decode(FLYSIGHT_FILE_ENCODING)) 125 else: 126 stream = open(fileThingCSV, 'r') 127 128 with stream: 129 try: 130 dialect = csv.Sniffer().sniff(stream.readline(), delimiters=delimiters) 131 except csv.Error: 132 return False 133 134 if dialect.delimiter not in delimiters: 135 return False 136 stream.seek(0) 137 try: 138 header = next(csv.reader(stream)) 139 except StopIteration: 140 return False 141 return header[0] == '$FLYS' or FLYSIGHT_1_HEADER.issubset(header) 142 143 144def getAllSpeedJumpFilesFrom(dataLake: Path) -> dict: 145 """ 146 Get a list of all the speed jump files from a data lake, where data lake is 147 defined as a reachable path that contains one or more FlySight CSV files. 148 This function tests each file to ensure that it's a speed skydive FlySight 149 file in a valid format and length. It doesn't validate data like versions 150 prior to 1.9.0. 151 152 Arguments 153 --------- 154 dataLake: str 155 A valid (absolute or relative) path name to the top level directory where 156 the data lake starts. 157 158 Returns 159 ------- 160 A dictionary of speed jump file names for later SSScoring processing: 161 - keys are the file names 162 - values are a FlySight version string tag 163 """ 164 jumpFiles = OrderedDict() 165 for root, dirs, files in os.walk(dataLake): 166 if any(name in root for name in IGNORE_LIST): 167 continue 168 for fileName in files: 169 data = None 170 if '.swp' in fileName: # Ignore Vim, other editors swap file 171 continue 172 if '.CSV' in fileName.upper(): 173 version = '1' 174 jumpFileName = Path(root) / fileName 175 stat = os.stat(jumpFileName) 176 if all(x not in fileName for x in ('EVENT', 'SENSOR', 'TRACK')): 177 # FlySight 1 track format 178 data = pd.read_csv(jumpFileName, skiprows = (1, 1), index_col = False) 179 elif 'TRACK' in fileName: 180 # FlySight 2 track custom format 181 data = pd.read_csv(jumpFileName, names = FLYSIGHT_2_HEADER, skiprows = 6, index_col = False, na_values = ['NA', ]) 182 data = skipOverFS2MetadataRowsIn(data) 183 data.drop('GNSS', inplace = True, axis = 1) 184 version = '2' 185 if data is not None and stat.st_size >= MIN_JUMP_FILE_SIZE and validFlySightHeaderIn(jumpFileName): 186 # explicit because `not data` is ambiguous for dataframes 187 jumpFiles[jumpFileName] = version 188 jumpFiles = OrderedDict(sorted(jumpFiles.items())) 189 return jumpFiles 190 191 192def detectFlySightFileVersionOf(fileThing) -> FlySightVersion: 193 """ 194 Detects the FlySight file version based on its file name and format. 195 196 Arguments 197 --------- 198 fileThing 199 A string, `bytes` buffer or `pathlib.Path` object corresponding to track 200 file. If string or `pathlib.Path`, it'll be treated as a file. 201 202 Returns 203 ------- 204 An instance of `ssscoring.flysight.FlySightVersion` with a valid version 205 symbolic value. 206 207 Errors 208 ------ 209 `ssscoring.errors.SSScoringError` if the file is not a CSV and it's some 210 other invalid format. 211 """ 212 match fileThing: 213 case Path(): 214 fileName = fileThing.as_posix() 215 case str(): 216 fileName = fileThing 217 fileThing = Path(fileThing) 218 case bytes(): 219 fileName = '00-00-00.CSV' 220 case _: 221 raise SSScoringError('fileThing must be a Path, str, or bytes') 222 223 delimiters = [',', ] 224 stream = None 225 if not '.CSV' in fileName.upper(): 226 raise SSScoringError('Invalid file extension type') 227 if any(x in fileName for x in ('EVENT.CSV', 'SENSOR.CSV')): 228 raise SSScoringError('Only TRACK.CSV v2 files can be processed at this time') 229 if isinstance(fileThing, Path) or isinstance(fileThing, str): 230 if not fileThing.is_file(): 231 raise SSScoringError('%s - file not found in data lake' % fileName) 232 if not validFlySightHeaderIn(fileName): 233 raise SSScoringError('CSV is not a valid FlySight file') 234 stream = open(fileName, 'r') 235 elif isinstance(fileThing, bytes): 236 stream = StringIO(fileThing.decode(FLYSIGHT_FILE_ENCODING)) 237 238 try: 239 dialect = csv.Sniffer().sniff(stream.readline(), delimiters = delimiters) 240 except: 241 raise SSScoringError('Error while trying to validate %s file format' % fileName) 242 if dialect.delimiter in delimiters: 243 stream.seek(0) 244 header = next(csv.reader(stream)) 245 else: 246 raise SSScoringError('CSV uses a different delimiter from FlySigh') 247 if header[0] == '$FLYS': 248 return FlySightVersion.V2 249 elif FLYSIGHT_1_HEADER.issubset(header): 250 return FlySightVersion.V1 251 else: 252 raise SSScoringError('%s file is not a FlySight v1 or v2 file') 253 254 255def readVersion1CSV(fileThing: object) -> pd.DataFrame: 256 """ 257 Read a FlySight file version 1 into a dataframe. It scrubes blank rows that 258 get in the way of correct parsing. 259 260 Arguments 261 --------- 262 fileThing 263 A string or a `pathlib.Path` object. It can be a relative or an absolute 264 path. 265 266 Returns 267 ------- 268 A FlySight dataframe with the original column names, normalized for 269 manipulation as a dataframe instead of a file or CSV object. 270 """ 271 return pd.read_csv(fileThing, skiprows = (1, 1), index_col = False) 272 273 274def _tagVersion1From(fileThing: str) -> str: 275 return fileThing.replace('.CSV', '').replace('.csv', '').replace('/data', '').replace('/', ' ').strip()+':v1' 276 277 278def _tagVersion2From(fileThing: str) -> str: 279 if '/' in fileThing: 280 return fileThing.split('/')[-2]+':v2' 281 else: 282 return fileThing.replace('.CSV', '').replace('.csv', '')+':v2' 283 284 285def readVersion2CSV(jumpFile: str) -> pd.DataFrame: 286 """ 287 Read a FlySight file version 2 into a dataframe. It scrubes blank rows that 288 get in the way of correct parsing and drops the `GNSS` column because it 289 just makes dataframe management murkier. 290 291 Arguments 292 --------- 293 fileThing 294 A string or a `pathlib.Path` object. It can be a relative or an absolute 295 path. 296 297 Returns 298 ------- 299 A FlySight dataframe with the original column names, normalized for 300 manipulation as a dataframe instead of a file or CSV object. 301 """ 302 303 rawData = pd.read_csv(jumpFile, names = FLYSIGHT_2_HEADER, skiprows = 6, index_col = False, na_values=['NA',]) 304 rawData = skipOverFS2MetadataRowsIn(rawData) 305 rawData.drop('GNSS', inplace = True, axis = 1) 306 return rawData 307 308 309def getFlySightDataFromCSVBuffer(buffer:bytes, bufferName:str) -> tuple: 310 """ 311 Ingress a buffer with known FlySight or SkyTrax file data for SSScoring 312 processing. 313 314 Arguments 315 --------- 316 buffer 317 A binary data buffer, bag of bytes, containing a known FlySight track file. 318 319 bufferName 320 An arbitrary name for the buffer of type `str`. It's used for constructing 321 the full buffer tag value for human identification. 322 323 Returns 324 ------- 325 A `tuple` with two items: 326 - `rawData` - a dataframe representation of the CSV with the original 327 headers but without the data type header 328 - `tag` - a string with an identifying tag derived from the path name 329 and file version in the form `some name:vX`. It uses the current 330 path as metadata to infer the name. There's no semantics enforcement. 331 332 Raises 333 ------ 334 `SSScoringError` if the CSV file is invalid in any way. 335 """ 336 if not isinstance(buffer, bytes): 337 raise SSScoringError('buffer must be an instance of bytes, a bytes buffer') 338 try: 339 stringIO = StringIO(buffer.decode(FLYSIGHT_FILE_ENCODING)) 340 except Exception as e: 341 raise SSScoringError('invalid buffer endcoding - %s' % str(e)) 342 try: 343 version = detectFlySightFileVersionOf(buffer) 344 except Exception: 345 tag = '%s:INVALID' % bufferName 346 rawData = None 347 else: 348 if version == FlySightVersion.V1: 349 rawData = readVersion1CSV(stringIO) 350 tag = _tagVersion1From(bufferName) 351 elif version == FlySightVersion.V2: 352 rawData = readVersion2CSV(stringIO) 353 tag = _tagVersion2From(bufferName) 354 return (rawData, tag) 355 356 357def getFlySightDataFromCSVFileName(jumpFile) -> tuple: 358 """ 359 Ingress a known FlySight or SkyTrax file into memory for SSScoring 360 processing. 361 362 Arguments 363 --------- 364 jumpFile 365 A string or `pathlib.Path` object; can be a relative or an asbolute path. 366 367 Returns 368 ------- 369 A `tuple` with two items: 370 - `rawData` - a dataframe representation of the CSV with the original 371 headers but without the data type header 372 - `tag` - a string with an identifying tag derived from the path name 373 and file version in the form `some name:vX`. It uses the current 374 path as metadata to infer the name. There's no semantics enforcement. 375 376 Raises 377 ------ 378 `SSScoringError` if the CSV file is invalid in any way. 379 """ 380 if isinstance(jumpFile, Path): 381 jumpFile = jumpFile.as_posix() 382 elif isinstance(jumpFile, str): 383 pass 384 else: 385 raise SSScoringError('jumpFile must be a string or a Path object') 386 if not validFlySightHeaderIn(jumpFile): 387 raise SSScoringError('%s is an invalid speed skydiving file') 388 try: 389 version = detectFlySightFileVersionOf(jumpFile) 390 except Exception: 391 tag = 'NA' 392 rawData = None 393 else: 394 if version == FlySightVersion.V1: 395 rawData = readVersion1CSV(jumpFile) 396 tag = _tagVersion1From(jumpFile) 397 elif version == FlySightVersion.V2: 398 rawData = readVersion2CSV(jumpFile) 399 tag = _tagVersion2From(jumpFile) 400 return (rawData, tag)
34def isCRMangledCSV(fileThing) -> bool: 35 """ 36 Tests if `fileThing` is an Excel or Dropbox DOS file with lines terminated 37 in CRCRLF. These occur when someone opens the file with Excel or some other 38 tool in a Windows system and saves the file back to the file system, 39 mangling the original format. 40 41 Arguments 42 --------- 43 fileThing 44 A string or `pathlib.Path` object associated with what looks like a FlySight 45 CR mangled file. 46 47 Returns 48 ------- 49 `True` if the file has one or more lines ending in CRCRLF within the first 50 512 bytes of data. 51 """ 52 with open (fileThing, 'rb') as file: 53 rawData = file.read() 54 return b'\r\r\n' in rawData
Tests if fileThing is an Excel or Dropbox DOS file with lines terminated
in CRCRLF. These occur when someone opens the file with Excel or some other
tool in a Windows system and saves the file back to the file system,
mangling the original format.
Arguments
fileThing
A string or pathlib.Path object associated with what looks like a FlySight
CR mangled file.
Returns
True if the file has one or more lines ending in CRCRLF within the first
512 bytes of data.
57def fixCRMangledCSV(fileThing): 58 """ 59 Open the file associated with `fileThing` and repleace all`\r\r\b` with 60 `\r\n` EOL markers. 61 62 Arguments 63 --------- 64 fileThing 65 A string or `pathlib.Path` object associated with what looks like a FlySight 66 CR mangled file. 67 68 See 69 --- 70 `ssscoring.flysight.isCRMangledCSV` 71 """ 72 with open(fileThing, 'rb') as inputFile: 73 fileContents = inputFile.read() 74 fileContents = fileContents.replace(b'\r\r\n', b'\r\n') 75 with tempfile.NamedTemporaryFile(delete = False) as outputFile: 76 outputFile.write(fileContents) 77 tempFileName = outputFile.name 78 shutil.copy(tempFileName, fileThing) 79 os.unlink(tempFileName)
Open the file associated with fileThing and repleace all`
with
` EOL markers.
Arguments
---------
fileThing
A string or `pathlib.Path` object associated with what looks like a FlySight
CR mangled file.
See
---
`ssscoring.flysight.isCRMangledCSV`
82def skipOverFS2MetadataRowsIn(data: pd.DataFrame) -> pd.DataFrame: 83 """ 84 Returns a clean dataframe on which any metadata rows within the first 100 85 are skipped. This function uses the `time` column to detect valid rows. A 86 `time == NaN` is considered invalid and skipped. 87 88 Arguments 89 --------- 90 data 91 A FlySight 2 dataframe suspected of having dirty N first rows with metadata 92 93 Returns 94 ------- 95 A FlySight 2 clean dataframe without any leading metadata rows. 96 """ 97 for ref in range(0,100): 98 if pd.notnull(data.iloc[ref].time): 99 break 100 return data.iloc[ref:]
Returns a clean dataframe on which any metadata rows within the first 100
are skipped. This function uses the time column to detect valid rows. A
time == NaN is considered invalid and skipped.
Arguments
data
A FlySight 2 dataframe suspected of having dirty N first rows with metadata
Returns
A FlySight 2 clean dataframe without any leading metadata rows.
103def validFlySightHeaderIn(fileThingCSV) -> bool: 104 """ 105 Checks if a file is a CSV in FlySight 1 or FlySight 2 formats. The checks 106 include: 107 108 - Whether the file is a CSV, using a comma delimiter 109 - Checks for the presence of all the documented FlySight 1 headers 110 - Checks for the presence of the FlySight 2 line 1 identifier 111 112 Arguments 113 --------- 114 fileThingCSV 115 A file thing to verify as a valid FlySight file; can be a string, an 116 instance of `libpath.Path`, or a buffer of `bytes`. 117 118 Returns 119 ------- 120 `True` if `fileThingCSV` is a FlySight CSV file, otherwise `False`. 121 """ 122 delimiters = [','] 123 124 if isinstance(fileThingCSV, bytes): 125 stream = StringIO(fileThingCSV.decode(FLYSIGHT_FILE_ENCODING)) 126 else: 127 stream = open(fileThingCSV, 'r') 128 129 with stream: 130 try: 131 dialect = csv.Sniffer().sniff(stream.readline(), delimiters=delimiters) 132 except csv.Error: 133 return False 134 135 if dialect.delimiter not in delimiters: 136 return False 137 stream.seek(0) 138 try: 139 header = next(csv.reader(stream)) 140 except StopIteration: 141 return False 142 return header[0] == '$FLYS' or FLYSIGHT_1_HEADER.issubset(header)
Checks if a file is a CSV in FlySight 1 or FlySight 2 formats. The checks include:
- Whether the file is a CSV, using a comma delimiter
- Checks for the presence of all the documented FlySight 1 headers
- Checks for the presence of the FlySight 2 line 1 identifier
Arguments
fileThingCSV
A file thing to verify as a valid FlySight file; can be a string, an
instance of libpath.Path, or a buffer of bytes.
Returns
True if fileThingCSV is a FlySight CSV file, otherwise False.
145def getAllSpeedJumpFilesFrom(dataLake: Path) -> dict: 146 """ 147 Get a list of all the speed jump files from a data lake, where data lake is 148 defined as a reachable path that contains one or more FlySight CSV files. 149 This function tests each file to ensure that it's a speed skydive FlySight 150 file in a valid format and length. It doesn't validate data like versions 151 prior to 1.9.0. 152 153 Arguments 154 --------- 155 dataLake: str 156 A valid (absolute or relative) path name to the top level directory where 157 the data lake starts. 158 159 Returns 160 ------- 161 A dictionary of speed jump file names for later SSScoring processing: 162 - keys are the file names 163 - values are a FlySight version string tag 164 """ 165 jumpFiles = OrderedDict() 166 for root, dirs, files in os.walk(dataLake): 167 if any(name in root for name in IGNORE_LIST): 168 continue 169 for fileName in files: 170 data = None 171 if '.swp' in fileName: # Ignore Vim, other editors swap file 172 continue 173 if '.CSV' in fileName.upper(): 174 version = '1' 175 jumpFileName = Path(root) / fileName 176 stat = os.stat(jumpFileName) 177 if all(x not in fileName for x in ('EVENT', 'SENSOR', 'TRACK')): 178 # FlySight 1 track format 179 data = pd.read_csv(jumpFileName, skiprows = (1, 1), index_col = False) 180 elif 'TRACK' in fileName: 181 # FlySight 2 track custom format 182 data = pd.read_csv(jumpFileName, names = FLYSIGHT_2_HEADER, skiprows = 6, index_col = False, na_values = ['NA', ]) 183 data = skipOverFS2MetadataRowsIn(data) 184 data.drop('GNSS', inplace = True, axis = 1) 185 version = '2' 186 if data is not None and stat.st_size >= MIN_JUMP_FILE_SIZE and validFlySightHeaderIn(jumpFileName): 187 # explicit because `not data` is ambiguous for dataframes 188 jumpFiles[jumpFileName] = version 189 jumpFiles = OrderedDict(sorted(jumpFiles.items())) 190 return jumpFiles
Get a list of all the speed jump files from a data lake, where data lake is defined as a reachable path that contains one or more FlySight CSV files. This function tests each file to ensure that it's a speed skydive FlySight file in a valid format and length. It doesn't validate data like versions prior to 1.9.0.
Arguments
dataLake: str
A valid (absolute or relative) path name to the top level directory where the data lake starts.
Returns
A dictionary of speed jump file names for later SSScoring processing: - keys are the file names - values are a FlySight version string tag
193def detectFlySightFileVersionOf(fileThing) -> FlySightVersion: 194 """ 195 Detects the FlySight file version based on its file name and format. 196 197 Arguments 198 --------- 199 fileThing 200 A string, `bytes` buffer or `pathlib.Path` object corresponding to track 201 file. If string or `pathlib.Path`, it'll be treated as a file. 202 203 Returns 204 ------- 205 An instance of `ssscoring.flysight.FlySightVersion` with a valid version 206 symbolic value. 207 208 Errors 209 ------ 210 `ssscoring.errors.SSScoringError` if the file is not a CSV and it's some 211 other invalid format. 212 """ 213 match fileThing: 214 case Path(): 215 fileName = fileThing.as_posix() 216 case str(): 217 fileName = fileThing 218 fileThing = Path(fileThing) 219 case bytes(): 220 fileName = '00-00-00.CSV' 221 case _: 222 raise SSScoringError('fileThing must be a Path, str, or bytes') 223 224 delimiters = [',', ] 225 stream = None 226 if not '.CSV' in fileName.upper(): 227 raise SSScoringError('Invalid file extension type') 228 if any(x in fileName for x in ('EVENT.CSV', 'SENSOR.CSV')): 229 raise SSScoringError('Only TRACK.CSV v2 files can be processed at this time') 230 if isinstance(fileThing, Path) or isinstance(fileThing, str): 231 if not fileThing.is_file(): 232 raise SSScoringError('%s - file not found in data lake' % fileName) 233 if not validFlySightHeaderIn(fileName): 234 raise SSScoringError('CSV is not a valid FlySight file') 235 stream = open(fileName, 'r') 236 elif isinstance(fileThing, bytes): 237 stream = StringIO(fileThing.decode(FLYSIGHT_FILE_ENCODING)) 238 239 try: 240 dialect = csv.Sniffer().sniff(stream.readline(), delimiters = delimiters) 241 except: 242 raise SSScoringError('Error while trying to validate %s file format' % fileName) 243 if dialect.delimiter in delimiters: 244 stream.seek(0) 245 header = next(csv.reader(stream)) 246 else: 247 raise SSScoringError('CSV uses a different delimiter from FlySigh') 248 if header[0] == '$FLYS': 249 return FlySightVersion.V2 250 elif FLYSIGHT_1_HEADER.issubset(header): 251 return FlySightVersion.V1 252 else: 253 raise SSScoringError('%s file is not a FlySight v1 or v2 file')
Detects the FlySight file version based on its file name and format.
Arguments
fileThing
A string, bytes buffer or pathlib.Path object corresponding to track
file. If string or pathlib.Path, it'll be treated as a file.
Returns
An instance of ssscoring.flysight.FlySightVersion with a valid version
symbolic value.
Errors
ssscoring.errors.SSScoringError if the file is not a CSV and it's some
other invalid format.
256def readVersion1CSV(fileThing: object) -> pd.DataFrame: 257 """ 258 Read a FlySight file version 1 into a dataframe. It scrubes blank rows that 259 get in the way of correct parsing. 260 261 Arguments 262 --------- 263 fileThing 264 A string or a `pathlib.Path` object. It can be a relative or an absolute 265 path. 266 267 Returns 268 ------- 269 A FlySight dataframe with the original column names, normalized for 270 manipulation as a dataframe instead of a file or CSV object. 271 """ 272 return pd.read_csv(fileThing, skiprows = (1, 1), index_col = False)
Read a FlySight file version 1 into a dataframe. It scrubes blank rows that get in the way of correct parsing.
Arguments
fileThing
A string or a pathlib.Path object. It can be a relative or an absolute
path.
Returns
A FlySight dataframe with the original column names, normalized for manipulation as a dataframe instead of a file or CSV object.
286def readVersion2CSV(jumpFile: str) -> pd.DataFrame: 287 """ 288 Read a FlySight file version 2 into a dataframe. It scrubes blank rows that 289 get in the way of correct parsing and drops the `GNSS` column because it 290 just makes dataframe management murkier. 291 292 Arguments 293 --------- 294 fileThing 295 A string or a `pathlib.Path` object. It can be a relative or an absolute 296 path. 297 298 Returns 299 ------- 300 A FlySight dataframe with the original column names, normalized for 301 manipulation as a dataframe instead of a file or CSV object. 302 """ 303 304 rawData = pd.read_csv(jumpFile, names = FLYSIGHT_2_HEADER, skiprows = 6, index_col = False, na_values=['NA',]) 305 rawData = skipOverFS2MetadataRowsIn(rawData) 306 rawData.drop('GNSS', inplace = True, axis = 1) 307 return rawData
Read a FlySight file version 2 into a dataframe. It scrubes blank rows that
get in the way of correct parsing and drops the GNSS column because it
just makes dataframe management murkier.
Arguments
fileThing
A string or a pathlib.Path object. It can be a relative or an absolute
path.
Returns
A FlySight dataframe with the original column names, normalized for manipulation as a dataframe instead of a file or CSV object.
310def getFlySightDataFromCSVBuffer(buffer:bytes, bufferName:str) -> tuple: 311 """ 312 Ingress a buffer with known FlySight or SkyTrax file data for SSScoring 313 processing. 314 315 Arguments 316 --------- 317 buffer 318 A binary data buffer, bag of bytes, containing a known FlySight track file. 319 320 bufferName 321 An arbitrary name for the buffer of type `str`. It's used for constructing 322 the full buffer tag value for human identification. 323 324 Returns 325 ------- 326 A `tuple` with two items: 327 - `rawData` - a dataframe representation of the CSV with the original 328 headers but without the data type header 329 - `tag` - a string with an identifying tag derived from the path name 330 and file version in the form `some name:vX`. It uses the current 331 path as metadata to infer the name. There's no semantics enforcement. 332 333 Raises 334 ------ 335 `SSScoringError` if the CSV file is invalid in any way. 336 """ 337 if not isinstance(buffer, bytes): 338 raise SSScoringError('buffer must be an instance of bytes, a bytes buffer') 339 try: 340 stringIO = StringIO(buffer.decode(FLYSIGHT_FILE_ENCODING)) 341 except Exception as e: 342 raise SSScoringError('invalid buffer endcoding - %s' % str(e)) 343 try: 344 version = detectFlySightFileVersionOf(buffer) 345 except Exception: 346 tag = '%s:INVALID' % bufferName 347 rawData = None 348 else: 349 if version == FlySightVersion.V1: 350 rawData = readVersion1CSV(stringIO) 351 tag = _tagVersion1From(bufferName) 352 elif version == FlySightVersion.V2: 353 rawData = readVersion2CSV(stringIO) 354 tag = _tagVersion2From(bufferName) 355 return (rawData, tag)
Ingress a buffer with known FlySight or SkyTrax file data for SSScoring processing.
Arguments
buffer
A binary data buffer, bag of bytes, containing a known FlySight track file.
bufferName
An arbitrary name for the buffer of type str. It's used for constructing
the full buffer tag value for human identification.
Returns
A tuple with two items:
- rawData - a dataframe representation of the CSV with the original
headers but without the data type header
- tag - a string with an identifying tag derived from the path name
and file version in the form some name:vX. It uses the current
path as metadata to infer the name. There's no semantics enforcement.
Raises
SSScoringError if the CSV file is invalid in any way.
358def getFlySightDataFromCSVFileName(jumpFile) -> tuple: 359 """ 360 Ingress a known FlySight or SkyTrax file into memory for SSScoring 361 processing. 362 363 Arguments 364 --------- 365 jumpFile 366 A string or `pathlib.Path` object; can be a relative or an asbolute path. 367 368 Returns 369 ------- 370 A `tuple` with two items: 371 - `rawData` - a dataframe representation of the CSV with the original 372 headers but without the data type header 373 - `tag` - a string with an identifying tag derived from the path name 374 and file version in the form `some name:vX`. It uses the current 375 path as metadata to infer the name. There's no semantics enforcement. 376 377 Raises 378 ------ 379 `SSScoringError` if the CSV file is invalid in any way. 380 """ 381 if isinstance(jumpFile, Path): 382 jumpFile = jumpFile.as_posix() 383 elif isinstance(jumpFile, str): 384 pass 385 else: 386 raise SSScoringError('jumpFile must be a string or a Path object') 387 if not validFlySightHeaderIn(jumpFile): 388 raise SSScoringError('%s is an invalid speed skydiving file') 389 try: 390 version = detectFlySightFileVersionOf(jumpFile) 391 except Exception: 392 tag = 'NA' 393 rawData = None 394 else: 395 if version == FlySightVersion.V1: 396 rawData = readVersion1CSV(jumpFile) 397 tag = _tagVersion1From(jumpFile) 398 elif version == FlySightVersion.V2: 399 rawData = readVersion2CSV(jumpFile) 400 tag = _tagVersion2From(jumpFile) 401 return (rawData, tag)
Ingress a known FlySight or SkyTrax file into memory for SSScoring processing.
Arguments
jumpFile
A string or pathlib.Path object; can be a relative or an asbolute path.
Returns
A tuple with two items:
- rawData - a dataframe representation of the CSV with the original
headers but without the data type header
- tag - a string with an identifying tag derived from the path name
and file version in the form some name:vX. It uses the current
path as metadata to infer the name. There's no semantics enforcement.
Raises
SSScoringError if the CSV file is invalid in any way.