Table

Initializes an instance of a Table with the unique identifier atlas_id, table_name and optional list of columns.

Parameters:

atlas_id (str) –

Atlas ID.
table_name (str) –

The name of the table to be initialized.
columns (List[Column], default: None ) –

List of column objects representing the columns in the table.

Usage

from polly.auth import Polly from polly.atlas import Atlas, Table, Column

Polly.auth("")

table = Table(atlas_id="atlas_1", table_name="patient_exposure")

add_column

add_column(column)

Adds a new column to the table.

Parameters:

column (Column) –

The Column object representing the column to add.

Returns:

Column –

The Column object that was added to the table.

Examples:

>>> new_column = Column(column_name='patient_age', data_type='int')
>>> added_column = patient_table.add_column(column=new_column)

add_rows

add_rows(rows)

Adds new rows to the table.

Parameters:

rows (List[dict]) –

A list of dictionaries representing rows to be added.

Examples:

>>> patient_table = Table(atlas_id='atlas_1', table_name='patient')
>>> rows = [
>>>     {"patient_id": "P0311", "patient_age": 23},
>>>     {"patient_id": "P0312", "patient_age": 24},
>>> ]
>>> patient_table.add_rows(rows)

delete_column

delete_column(column_name)

Delete a column from the table based on its name.

Parameters:

column_name (str) –

The name of the column to be deleted

Examples:

>>> patient_table = Table(atlas_id='atlas_1', table_name='patient')
>>> patient_table.delete_column(column_name='patient_age')

delete_rows

delete_rows(rows)

Delete rows from the table based on the column value

Parameters:

rows (List[dict]) –

A list of key-value pairs representing rows to delete, where the key is the primary key column name and value is the corresponding entry.

Examples:

>>> patient_table = Table(atlas_id='atlas_1',table_name='patient')
>>> rows = [
>>>     {'patient_id': 'P0311'},
>>>     {'patient_id': 'P0322'}
>>> ]
>>> patient_table.delete_rows(rows=rows)

get_column

get_column(column_name)

Retrieve a specific column from the table based on its name.

Parameters:

column_name (str) –

The name of the column to retrieve.

Returns:

Column –

The Column object representing the specified column.

Raises:

ValueError –

If no column with the specified name is found in the table.

Examples:

>>> patient_table = Table(atlas_id='atlas_1', table_name='patient')
>>> column = patient_table.get_column(column_name='patient_id')

head

head()

Retrieve the first five rows of the table as a Pandas DataFrame.

Returns:

DataFrame –

A Pandas DataFrame containing the first five rows of the table.

Examples:

>>> patient_table = Table(atlas_id='atlas_1', table_name='patient')
>>> head_df = patient_table.head()

iter_rows

iter_rows(page_size)

Iterate over the rows of the table in a paginated manner.

Parameters:

page_size (Optional[int]) –

Page size for iteration over the table. Defaults to 500000 rows.

Yields:

List[Dict[str, Any]] –

A list of dictionaries representing rows of the table, with column names as keys and corresponding values.

Examples:

>>> patient_table = Table(atlas_id='atlas_1', table_name='patient')
>>> for page_rows in patient_table.iter_rows():
>>>     for row in page_rows:

list_columns

list_columns()

Retrieve the list of columns associated with the table.

Returns:

List[Column] –

A list of Column objects representing the columns in the table.

Examples:

>>> patient_table = Table(atlas_id='atlas_1', table_name='patient')
>>> columns = patient_table.list_columns()

rename_column

rename_column(old_column_name, new_column_name)

Rename the name of a column in the table.

Parameters:

old_column_name (str) –

The current name of the column to rename.
new_column_name (str) –

The new name to assign to the column.

Returns:

None –

None.

Examples:

>>> table = Table(atlas_id='my_atlas', table_name='patient')
>>> table.rename_column(old_column_name='age', new_column_name='patient_age')

to_df

to_df()

Return the complete table as a Pandas DataFrame.

Returns:

DataFrame –

A Pandas DataFrame containing the data from the table.

Examples:

>>> patient_table = Table(atlas_id='atlas_1', table_name='patient')
>>> df = patient_table.to_df()

update_rows

update_rows(rows)

Update rows in the table based on provided row data.

Parameters:

rows (List[dict]) –

A list of dictionaries representing the rows to update.

Examples:

>>> patient_table = Table(atlas_id='atlas_1', table_name='patient')
>>> rows = [
>>>    {"patient_id": "P0311", "patient_age": 23},
>>>    {"patient_id": "P0322", "patient_age": 24},
>>> ]
>>> patient_table.update_rows(rows=rows)

Examples

A table is a collection of user data. The table represents a database table and stores the user's data.

You can access it through the Table class.

from polly.auth import Polly
from polly.atlas import Atlas, Table

Polly.auth("<access_key>")

exposure_table = Table(atlas_id="atlas_1", table_name="patient_exposure")
print(exposure_table)

# Table(
#   table_name='patient_exposure', 
#   columns = [
#     Column(column_name="curated_patient_id", data_type="text"),
#     Column(column_name="alcohol_history", data_type="boolean"),
#     Column(column_name="alcohol_intensity", data_type="text"),
#     Column(column_name="tobacco_smoking", data_type="integer"),
#   ]     
# )

View first 5 rows of the table

df=exposure_table.head()
print(df)

# curated_patient_id  alcohol_history alcohol_intensity  tobacco_smoking
#              P0031             True              High               10
#              P0032            False              None                0
#              P0033             True          Moderate                5
#              P0034             True               Low                2
#              P0035            False              None                0

Add a new column

We can't add a new column as the primary key if the table already has one. Also, we can't delete the primary key column from the table.

bmi_column = exposure_table.add_column(Column(column_name="bmi", data_type="integer"))
print(bmi_column)

# Column(column_name='bmi', data_type='integer', primary_key=True)

Iterate over the rows of the table.

It iterates in batches of 500 records

for page in my_table.iter_rows():
  for record in page:
    print(record)

#{'curated_patient_id': 'P0031', 'alcohol_history': True, 'alcohol_intensity': 'High', 'tobacco_smoking': 10, 'bmi': None}
#{'curated_patient_id': 'P0032', 'alcohol_history': False, 'alcohol_intensity': 'None', 'tobacco_smoking': 0, 'bmi': None}
#{'curated_patient_id': 'P0033', 'alcohol_history': True, 'alcohol_intensity': 'Moderate', 'tobacco_smoking': 5, 'bmi': None}
#{'curated_patient_id': 'P0034', 'alcohol_history': True, 'alcohol_intensity': 'Low', 'tobacco_smoking': 2, 'bmi': None}
#{'patient_id': 'P0035', 'alcohol_history': False, 'alcohol_intensity': 'None', 'tobacco_smoking': 0, 'bmi': None}

Load the entire table data into a dataframe

df = exposure.to_df()

# curated_patient_id  alcohol_history alcohol_intensity  tobacco_smoking   bmi
#              P0031             True              High               10  None
#              P0032            False              None                3  None
#              P0033             True          Moderate                5  None
#              P0034             True               Low                2  None
#              P0035            False              None                1  None
#              P0036            True               None                2  None
#              P0037            False              None                0  None