siglatools.institution_extracters package#

Submodules#

siglatools.institution_extracters.constants module#

class siglatools.institution_extracters.constants.GoogleSheetsFormat[source]#

Bases: object

composite_variable = 'composite-variable'#
institution_and_composite_variable = 'institution-and-composite-variable'#
multiple_sigla_answer_variable = 'multiple-sigla-answer-variable'#
standard_institution = 'standard-institution'#
class siglatools.institution_extracters.constants.GoogleSheetsInfoField[source]#

Bases: object

sheet_id = 'sheet_id'#
sheet_title = 'sheet_title'#
spreadsheet_title = 'spreadsheet_title'#
class siglatools.institution_extracters.constants.MetaDataField[source]#

Bases: object

data_type = 'data_type'#
date_of_next_uv_column = 'date_of_next_uv_column'#
end_column = 'end_column'#
end_row = 'end_row'#
format = 'format'#
start_column = 'start_column'#
start_row = 'start_row'#
variable_heading = 'variable_heading'#
variable_name = 'variable_name'#

siglatools.institution_extracters.exceptions module#

exception siglatools.institution_extracters.exceptions.IncompleteColumnRangeInA1Notation(info: ErrorInfo)[source]#

Bases: BaseError

exception siglatools.institution_extracters.exceptions.InvalidDateRange(info: ErrorInfo)[source]#

Bases: BaseError

exception siglatools.institution_extracters.exceptions.InvalidRangeInA1Notation(info: ErrorInfo)[source]#

Bases: BaseError

exception siglatools.institution_extracters.exceptions.UnableToAccessSpreadsheet(info: ErrorInfo)[source]#

Bases: BaseError

exception siglatools.institution_extracters.exceptions.UnableToCreateFormattedSheetData(info: ErrorInfo)[source]#

Bases: BaseError

exception siglatools.institution_extracters.exceptions.UnrecognizedGoogleSheetsFormat(info: ErrorInfo)[source]#

Bases: BaseError

siglatools.institution_extracters.google_sheets_institution_extracter module#

class siglatools.institution_extracters.google_sheets_institution_extracter.A1Notation(sheet_id: str, sheet_title: str, start_row: int, end_row: int, start_column: Optional[str] = None, end_column: Optional[str] = None)[source]#

Bases: tuple

A1 notation refers to a group of cells within a bounding rectangle in a sheet. This doesn’t capture all possible A1 notations because start_row and end_row are required, but they don’t have to be.

Attributes:
sheet_id: str

The id of the sheet that contains a group cells

sheet_title: str

The title of the sheet that contains a group of cells.

start_row: int

The top row boundary of a group of cells.

end_row: int

The bottom row boundary of a group of cells.

start_column: Optional[str] = None

The left column boundary of a group of cells.

end_column: Optional[str] = None

The right column boundary of a group of cells

Create new instance of A1Notation(sheet_id, sheet_title, start_row, end_row, start_column, end_column)

end_column: Optional[str]#
end_row: int#
raise_for_validity() None[source]#

Raise an error if the a1 notation is invalid. https://developers.google.com/sheets/api/guides/concepts#a1_notation

For a description of an A1 notation, please view the A1Notation class attributes.

sheet_id: str#
sheet_title: str#
start_column: Optional[str]#
start_row: int#
class siglatools.institution_extracters.google_sheets_institution_extracter.GoogleSheetsInstitutionExtracter(credentials_path: str)[source]#

Bases: object

get_spreadsheet_data(spreadsheet_id: str) List[SheetData][source]#

Get the spreadsheet data given a spreadsheet id.

Parameters:

spreadsheet_id (str) – The id of the spreadsheet.

Returns:

spreadsheet_data – The spreadsheet data. Please the SheetData class to view its attributes.

Return type:

List[SheetData]

get_spreadsheet_ids(master_spreadsheet_id: str) List[str][source]#

Get the list of spreadsheet ids from a master spreadsheet.

Parameters:

master_spreadsheet_id (str) – The id of the master spreadsheet.

Returns:

spreadsheet_ids – The list of spreadsheet ids.

Return type:

List[str]

google_sheets_format_to_function_dict = {'composite-variable': <function _get_composite_variable>, 'institution-and-composite-variable': <function _get_composite_variable>, 'multiple-sigla-answer-variable': <function _get_multilple_sigla_answer_variable>, 'standard-institution': <function _get_standard_institution>}#
static process_sheet_data(sheet_data: SheetData) FormattedSheetData[source]#

Process a sheet to get its data in a format ready to consumed by DB.

Parameters:

sheet_data (SheetData) – The data of the sheet.

Returns:

formatted_sheet_data – The data in reqired format.

Return type:

FormattedSheetData

siglatools.institution_extracters.utils module#

class siglatools.institution_extracters.utils.FormattedSheetData(spreadsheet_id: str, spreadsheet_title: str, sheet_id: str, sheet_title: str, meta_data: Dict[str, str], formatted_data: List)[source]#

Bases: tuple

The formatted data from a Google Sheet.

Attributes:
spreadsheet_id: str

The id of the spreadsheet that contains the sheet.

spreadsheet_title: str

The title of the spreadsheet that contains the sheet.

sheet_id: str

The id of the sheet.

sheet_title: str

The title of the sheet.

meta_data: Dict[str, str]

The meta data of the sheet, found in the first two rows.

formatted_data: List

The formatted data of the sheet.

Create new instance of FormattedSheetData(spreadsheet_id, spreadsheet_title, sheet_id, sheet_title, meta_data, formatted_data)

formatted_data: List#
meta_data: Dict[str, str]#
sheet_id: str#
sheet_title: str#
spreadsheet_id: str#
spreadsheet_title: str#
class siglatools.institution_extracters.utils.SheetData(spreadsheet_id: str, spreadsheet_title: str, sheet_id: str, sheet_title: str, meta_data: Dict[str, str], data: List[List[str]], next_uv_dates: Optional[List[str]])[source]#

Bases: tuple

The extracted data from a Google Sheet.

Attributes:
spreadsheet_id: str

The id of the spreadsheet that contains the sheet.

spreadsheet_title: str

The title of spreadsheet that contains the sheet.

sheet_id: str

The id of the sheet.

sheet_title: str

The title of the sheet.

meta_data: Dict[str, str]

The meta data of the sheet, found in the first two rows.

data: List[List[str]]

The data of the sheet.

next_uv_dates: Optional[List[str]]

Dates of next update and verify.

Create new instance of SheetData(spreadsheet_id, spreadsheet_title, sheet_id, sheet_title, meta_data, data, next_uv_dates)

data: List[List[str]]#
meta_data: Dict[str, str]#
next_uv_dates: Optional[List[str]]#
sheet_id: str#
sheet_title: str#
spreadsheet_id: str#
spreadsheet_title: str#
siglatools.institution_extracters.utils.convert_col_to_name(col: int) str[source]#

Convert a zero indexed column cell reference to a string.

Parameters:

col (int) – The cell column.

Returns:

col_str – The column style string.

Return type:

str

siglatools.institution_extracters.utils.convert_rowcol_to_A1_name(row: int, col: int) str[source]#

Converts row and col to an A1 name.

Parameters:
  • row (int) – The row number.

  • col (int) – The column number.

Returns:

A1_cell – The A1 cell str.

Return type:

str

siglatools.institution_extracters.utils.create_institution_sub_category(sub_categories: str) List[str][source]#

Create a list of institution sub categories.

Parameters:

sub_categories (str) – The list of sub categories, separated by ;.

Returns:

sub_categories_list – The list of sub categories.

Return type:

List[str]

Module contents#