Module pynut.pynut

Binary NutMeg Reader for Python

Expand source code
""" Binary NutMeg Reader for Python """

import os
import re
import errno
import string
import random
from typing import NamedTuple
import numpy as np
import pandas as pd

def _read_next_line_pattern( raw: bytes, pattern: str, reverse: bool = False
                           ) -> str:
    pattern_enc = pattern.encode()
    pattern_idx = raw.rfind(pattern_enc) if reverse else raw.find(pattern_enc)
    return_idx  = raw.find(b'\n', pattern_idx)
    dec         = raw[pattern_idx:return_idx].decode()
    return dec.split(pattern + ':')[1].strip() if dec else ''

def _read_next_block_pattern( raw: bytes, pattern1: str, pattern2: str
                            , reverse: bool = True ) -> dict:
    enc1   = pattern1.encode()
    enc2   = pattern2.encode()
    p1_idx = raw.rfind(enc1) if reverse else raw.find(enc1)
    p2_idx = raw.find(enc2, p1_idx)
    dec    = raw[p1_idx:p2_idx].decode().removeprefix(pattern1).split('\n')
    return { j[1]: {'index': j[0], 'unit': j[2].split(' ')[0] }
             for j in [ i.split('\t') for i in [ d.strip() for d in dec ]
                      ] if j[0] }

def _get_analys_type(plot_name: str) -> str:
    analysis_pattern = "`(.*?)'"
    analysis_match   = re.search(analysis_pattern, plot_name)
    return analysis_match.group(1) if analysis_match\
            else 'dummy_' + ''.join(random.sample(string.ascii_letters, 5))

NutPlot = NamedTuple( 'NutPlot'
                    , [ ( 'plot_name', str )
                      , ( 'flags', str )
                      , ( 'n_points', int )
                      , ( 'variables', list[str] )
                      , ( 'data', np.array )
                      ] )

NutMeg = NamedTuple( 'NutMeg'
                   , [ ( 'title', str )
                     , ( 'date', str )
                     , ( 'plots', dict[str, NutPlot] )
                     ] )

def parse_plot(raw_plot: bytes, values_id: bytes = b'\nBinary:\n') -> NutPlot:
    """
    Parse plot segment of raw data into NutPlot object.
    """
    plot_name   = _read_next_line_pattern(raw_plot, 'Plotname')
    flags       = _read_next_line_pattern(raw_plot, 'Flags')
    n_variables = int(_read_next_line_pattern(raw_plot, 'No. Variables'))
    n_points    = int(_read_next_line_pattern(raw_plot, 'No. Points'))
    variables   = _read_next_block_pattern(raw_plot, 'Variables:', 'Binary:')
    dtypes      = np.dtype( { 'names': list(variables.keys())
                            , 'formats': ( n_variables
                                         * ( [np.complex128]
                                             if 'complex' in flags
                                             else [np.float64])) } )
    data_start  = raw_plot.find(values_id) + len(values_id)
    raw_data    = raw_plot[data_start:None]
    data        = np.frombuffer( raw_data, dtype = dtypes
                               , count = max(1, n_points))
    return NutPlot( plot_name = plot_name
                  , flags     = flags
                  , n_points  = n_points
                  , variables = variables
                  , data      = data )

def to_df(nut: NutPlot) -> pd.DataFrame:
    """ Turn NutPlot into pandas DataFrame. """
    return pd.DataFrame(nut.data.byteswap().newbyteorder())

def read_raw( file_name: str, plots_id: bytes = b'Plotname'
                ) -> NutMeg:
    """
    Parse NutMag raw/binary file.
    """
    if not os.path.isfile(file_name):
        raise( FileNotFoundError( errno.ENOENT
                                , os.strerror(errno.ENOENT)
                                , file_name ) )

    with open(file_name, 'rb') as raw_file:
        raw_data = raw_file.read()

    title     = _read_next_line_pattern(raw_data, 'Title')
    date      = _read_next_line_pattern(raw_data, 'Date')
    psx       = [ idx.start() for idx in re.compile(plots_id).finditer(raw_data) ]
    pex       = psx[1:] + [len(raw_data)]
    raw_plots = [ raw_data[sx:ex] for sx,ex in zip(psx,pex) ]
    plots     = { _get_analys_type( _read_next_line_pattern(plt, 'Plotname')
                              ): parse_plot(plt) for plt in raw_plots }
    return NutMeg( title = title
                 , date  = date
                 , plots = plots )

def plot_dict(nut: NutMeg) -> dict[str, pd.DataFrame]:
    """ Named Tuple as Dict with DataFrames. """
    return { n: to_df(p) for n,p in nut.plots.items() }

Functions

def parse_plot(raw_plot: bytes, values_id: bytes = b'\nBinary:\n') ‑> NutPlot

Parse plot segment of raw data into NutPlot object.

Expand source code
def parse_plot(raw_plot: bytes, values_id: bytes = b'\nBinary:\n') -> NutPlot:
    """
    Parse plot segment of raw data into NutPlot object.
    """
    plot_name   = _read_next_line_pattern(raw_plot, 'Plotname')
    flags       = _read_next_line_pattern(raw_plot, 'Flags')
    n_variables = int(_read_next_line_pattern(raw_plot, 'No. Variables'))
    n_points    = int(_read_next_line_pattern(raw_plot, 'No. Points'))
    variables   = _read_next_block_pattern(raw_plot, 'Variables:', 'Binary:')
    dtypes      = np.dtype( { 'names': list(variables.keys())
                            , 'formats': ( n_variables
                                         * ( [np.complex128]
                                             if 'complex' in flags
                                             else [np.float64])) } )
    data_start  = raw_plot.find(values_id) + len(values_id)
    raw_data    = raw_plot[data_start:None]
    data        = np.frombuffer( raw_data, dtype = dtypes
                               , count = max(1, n_points))
    return NutPlot( plot_name = plot_name
                  , flags     = flags
                  , n_points  = n_points
                  , variables = variables
                  , data      = data )
def plot_dict(nut: NutMeg) ‑> dict[str, pandas.core.frame.DataFrame]

Named Tuple as Dict with DataFrames.

Expand source code
def plot_dict(nut: NutMeg) -> dict[str, pd.DataFrame]:
    """ Named Tuple as Dict with DataFrames. """
    return { n: to_df(p) for n,p in nut.plots.items() }
def read_raw(file_name: str, plots_id: bytes = b'Plotname') ‑> NutMeg

Parse NutMag raw/binary file.

Expand source code
def read_raw( file_name: str, plots_id: bytes = b'Plotname'
                ) -> NutMeg:
    """
    Parse NutMag raw/binary file.
    """
    if not os.path.isfile(file_name):
        raise( FileNotFoundError( errno.ENOENT
                                , os.strerror(errno.ENOENT)
                                , file_name ) )

    with open(file_name, 'rb') as raw_file:
        raw_data = raw_file.read()

    title     = _read_next_line_pattern(raw_data, 'Title')
    date      = _read_next_line_pattern(raw_data, 'Date')
    psx       = [ idx.start() for idx in re.compile(plots_id).finditer(raw_data) ]
    pex       = psx[1:] + [len(raw_data)]
    raw_plots = [ raw_data[sx:ex] for sx,ex in zip(psx,pex) ]
    plots     = { _get_analys_type( _read_next_line_pattern(plt, 'Plotname')
                              ): parse_plot(plt) for plt in raw_plots }
    return NutMeg( title = title
                 , date  = date
                 , plots = plots )
def to_df(nut: NutPlot) ‑> pandas.core.frame.DataFrame

Turn NutPlot into pandas DataFrame.

Expand source code
def to_df(nut: NutPlot) -> pd.DataFrame:
    """ Turn NutPlot into pandas DataFrame. """
    return pd.DataFrame(nut.data.byteswap().newbyteorder())

Classes

class NutMeg (title: str, date: str, plots: dict[str, NutPlot])

NutMeg(title, date, plots)

Ancestors

  • builtins.tuple

Instance variables

var date : str

Alias for field number 1

var plots : dict[str, NutPlot]

Alias for field number 2

var title : str

Alias for field number 0

class NutPlot (plot_name: str, flags: str, n_points: int, variables: list[str], data: )

NutPlot(plot_name, flags, n_points, variables, data)

Ancestors

  • builtins.tuple

Instance variables

var data

Alias for field number 4

var flags : str

Alias for field number 1

var n_points : int

Alias for field number 2

var plot_name : str

Alias for field number 0

var variables : list[str]

Alias for field number 3