From ecd70e58456c2fe5975739cb15ed7fdcfc674a03 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Wed, 17 Jul 2019 15:17:13 +0100 Subject: [PATCH 1/4] dateValidator and tableFromSql added --- codonPython/dateValidator.py | 26 ++++++++++++++++ codonPython/tableFromSql.py | 60 ++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 codonPython/dateValidator.py create mode 100644 codonPython/tableFromSql.py diff --git a/codonPython/dateValidator.py b/codonPython/dateValidator.py new file mode 100644 index 0000000..7abb8fe --- /dev/null +++ b/codonPython/dateValidator.py @@ -0,0 +1,26 @@ +import re + +def validDate(string: str): + """Validates stringtype dates of type `dd/mm/yyyy`, `dd-mm-yyyy` or `dd.mm.yyyy` from years 1900 - 9999. Leap year support included. + + Arguments: + string: str + Date to be validated + + Outputs: + boolean + Whether the date is valid or not + + >>> validDate("11/02/1996") + True + + >>> validDate("29/02/2016") + True + + >>> validDate("43/01/1996") + False + """ + if re.match(r"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[13-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:19|[2-9]\d)?\d{2})$", string, flags=0): + return True + else: + return False \ No newline at end of file diff --git a/codonPython/tableFromSql.py b/codonPython/tableFromSql.py new file mode 100644 index 0000000..a817262 --- /dev/null +++ b/codonPython/tableFromSql.py @@ -0,0 +1,60 @@ +from sqlalchemy import create_engine +import pandas as pd + +def tableFromSql(server, database, table_name, user="", password="", schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None): + '''Returns a SQL table in a DataFrame. + + Convert a table stored in SQL Server 2016 into a pandas dataframe. Requires the odbc driver for SQL Server. + + Args: + server: string + Name of the SQL server + + database: string + Name of the SQL database + + user: string, default: "" + If verification is required, name of the user + + password: string, default: "" + If verification is required, password of the user + + table_name : string + Name of SQL table in database. + + schema : string, default None + Name of SQL schema in database to query (if database flavor supports this). Uses default schema if None (default). + + index_col : string or list of strings, optional, default: None + Column(s) to set as index(MultiIndex). + + coerce_float : boolean, default True + Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point. Can result in loss of Precision. + + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of {column_name: format string} where format string is strftime compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of {column_name: arg dict}, where the arg dict corresponds to the keyword arguments of pandas.to_datetime() Especially useful with databases without native Datetime support, such as SQLite. + + columns : list, default: None + List of column names to select from SQL table + + chunksize : int, default None + If specified, returns an iterator where chunksize is the number of rows to include in each chunk. + + + Returns: + pd.DataFrame + Dataframe of the table requested from sql server + + >>> tableFromSql("myServer2", "myDatabase2", "myTable2") + pd.DataFrame + + >>> tableFromSql("myServer", "myDatabase", "myTable", schema="specialSchema", columns=["col_1", "col_3"]) + pd.DataFrame + ''' + + uri = "mssql+pyodbc://{}:{}@{}/{}?driver=SQL Server Native Client 11.0".format(user, password, server, database) + engine = create_engine(uri) + + return pd.read_sql_table(table_name, engine, schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None) \ No newline at end of file From 18f95f064dc36d24356b33255f47ea7c1c272211 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Thu, 18 Jul 2019 09:45:11 +0100 Subject: [PATCH 2/4] Delete files --- codonPython/dateValidator.py | 26 ---------------- codonPython/tableFromSql.py | 60 ------------------------------------ 2 files changed, 86 deletions(-) delete mode 100644 codonPython/dateValidator.py delete mode 100644 codonPython/tableFromSql.py diff --git a/codonPython/dateValidator.py b/codonPython/dateValidator.py deleted file mode 100644 index 7abb8fe..0000000 --- a/codonPython/dateValidator.py +++ /dev/null @@ -1,26 +0,0 @@ -import re - -def validDate(string: str): - """Validates stringtype dates of type `dd/mm/yyyy`, `dd-mm-yyyy` or `dd.mm.yyyy` from years 1900 - 9999. Leap year support included. - - Arguments: - string: str - Date to be validated - - Outputs: - boolean - Whether the date is valid or not - - >>> validDate("11/02/1996") - True - - >>> validDate("29/02/2016") - True - - >>> validDate("43/01/1996") - False - """ - if re.match(r"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[13-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:19|[2-9]\d)?\d{2})$", string, flags=0): - return True - else: - return False \ No newline at end of file diff --git a/codonPython/tableFromSql.py b/codonPython/tableFromSql.py deleted file mode 100644 index a817262..0000000 --- a/codonPython/tableFromSql.py +++ /dev/null @@ -1,60 +0,0 @@ -from sqlalchemy import create_engine -import pandas as pd - -def tableFromSql(server, database, table_name, user="", password="", schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None): - '''Returns a SQL table in a DataFrame. - - Convert a table stored in SQL Server 2016 into a pandas dataframe. Requires the odbc driver for SQL Server. - - Args: - server: string - Name of the SQL server - - database: string - Name of the SQL database - - user: string, default: "" - If verification is required, name of the user - - password: string, default: "" - If verification is required, password of the user - - table_name : string - Name of SQL table in database. - - schema : string, default None - Name of SQL schema in database to query (if database flavor supports this). Uses default schema if None (default). - - index_col : string or list of strings, optional, default: None - Column(s) to set as index(MultiIndex). - - coerce_float : boolean, default True - Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point. Can result in loss of Precision. - - parse_dates : list or dict, default: None - - List of column names to parse as dates. - - Dict of {column_name: format string} where format string is strftime compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps. - - Dict of {column_name: arg dict}, where the arg dict corresponds to the keyword arguments of pandas.to_datetime() Especially useful with databases without native Datetime support, such as SQLite. - - columns : list, default: None - List of column names to select from SQL table - - chunksize : int, default None - If specified, returns an iterator where chunksize is the number of rows to include in each chunk. - - - Returns: - pd.DataFrame - Dataframe of the table requested from sql server - - >>> tableFromSql("myServer2", "myDatabase2", "myTable2") - pd.DataFrame - - >>> tableFromSql("myServer", "myDatabase", "myTable", schema="specialSchema", columns=["col_1", "col_3"]) - pd.DataFrame - ''' - - uri = "mssql+pyodbc://{}:{}@{}/{}?driver=SQL Server Native Client 11.0".format(user, password, server, database) - engine = create_engine(uri) - - return pd.read_sql_table(table_name, engine, schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None) \ No newline at end of file From a8851ba2ecb49157b5aec9a48e0e01ee668946d2 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Thu, 18 Jul 2019 16:08:54 +0100 Subject: [PATCH 3/4] New functions added --- codonPython/dateValidator.py | 35 +++++++++++++++++ codonPython/nhsNumberGenerator.py | 39 +++++++++++++++++++ codonPython/tableFromSql.py | 65 +++++++++++++++++++++++++++++++ setup.py | 4 ++ 4 files changed, 143 insertions(+) create mode 100644 codonPython/dateValidator.py create mode 100644 codonPython/nhsNumberGenerator.py create mode 100644 codonPython/tableFromSql.py diff --git a/codonPython/dateValidator.py b/codonPython/dateValidator.py new file mode 100644 index 0000000..57893ea --- /dev/null +++ b/codonPython/dateValidator.py @@ -0,0 +1,35 @@ +import re + + +def validDate(date_string: str)->bool: + """ + Validates stringtype dates of type `dd/mm/yyyy`, `dd-mm-yyyy` or `dd.mm.yyyy` from + years 1900-9999. Leap year support included. + + Parameters + ---------- + date_string : str + Date to be validated + + Returns + ---------- + boolean + Whether the date is valid or not + + Examples + --------- + >>> validDate("11/02/1996") + True + >>> validDate("29/02/2016") + True + >>> validDate("43/01/1996") + False + """ + + # This regex string will validate dates of type `dd/mm/yyyy`, `dd-mm-yyyy` or `dd.mm.yyyy` + # from years 1900 - 9999. Leap year support included. Regex string from + # https://stackoverflow.com/questions/15491894/regex-to-validate-date-format-dd-mm-yyyy + if re.match(r"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[13-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:19|[2-9]\d)?\d{2})$", date_string, flags=0): + return True + else: + return False \ No newline at end of file diff --git a/codonPython/nhsNumberGenerator.py b/codonPython/nhsNumberGenerator.py new file mode 100644 index 0000000..5d6d8b0 --- /dev/null +++ b/codonPython/nhsNumberGenerator.py @@ -0,0 +1,39 @@ +import random + + +def nhsNumberGenerator(to_generate: int)->list: + """ + Generates random NHS number(s) compliant with modulus 11 checks recorded + in the data dictonary. + https://www.datadictionary.nhs.uk/data_dictionary/attributes/n/nhs/nhs_number_de.asp?shownav=1 + + Parameters + ---------- + to_generate : int + number of NHS numbers to generate + + Returns + ---------- + generated : list + List of randomly generated NHS numbers + + Examples + --------- + >>> random.seed(42) + >>> nhsNumberGenerator(2) + [7865793030, 1933498560] + """ + + generated = [] + while len(generated) < to_generate: + # Random 9 digit number starting with non-zero digit + number = random.randint(100000000, 999999999) + digits = [int(digit) for digit in str(number)] + # Apply weighting to digits + weighted_digits = [(10 - index) * digit for (index, digit) in enumerate(digits)] + # Sum of all weighted digits must be a multiple of 11 to be valid. + if sum(weighted_digits) % 11 == 0: + # Add check digit to valid number + number = int(str(number) + "0") + generated.append(number) + return generated diff --git a/codonPython/tableFromSql.py b/codonPython/tableFromSql.py new file mode 100644 index 0000000..e250c32 --- /dev/null +++ b/codonPython/tableFromSql.py @@ -0,0 +1,65 @@ +from sqlalchemy import create_engine +import pandas as pd + + +def tableFromSql(server: str, database: str, table_name: str, user: str = "", password: str = "", schema: str = None, index_col: str = None, coerce_float: bool = True, parse_dates: list = None, columns: list = None, chunksize: int = None): + ''' + Returns a SQL table in a DataFrame. + + Convert a table stored in SQL Server 2016 into a pandas dataframe. + Uses sqlalchemy and pandas. + + Parameters + ---------- + server : string + Name of the SQL server + database : string + Name of the SQL database + user : string, default: "" + If verification is required, name of the user + password : string, default: "" + If verification is required, password of the user + table_name : string + Name of SQL table in database. + schema : string, default : None + Name of SQL schema in database to query (if database flavor supports this). Uses + default schema if None (default). + index_col : string or list of strings, default : None + Column(s) to set as index(MultiIndex). + coerce_float : boolean, default : True + Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) + to floating point. Can result in loss of Precision. + parse_dates : list or dict, default : None + - List of column names to parse as dates. + - Dict of {column_name: format string} where format string is strftime compatible in + case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing + integer timestamps. + - Dict of {column_name: arg dict}, where the arg dict corresponds to the keyword + arguments of pandas.to_datetime() Especially useful with databases without native + Datetime support, such as SQLite. + columns : list, default : None + List of column names to select from SQL table + chunksize : int, default : None + If specified, returns an iterator where chunksize is the number of rows to include + in each chunk. + + Returns + ---------- + pd.DataFrame + Dataframe of the table requested from sql server + + Examples + --------- + # >>> tableFromSql("myServer2", "myDatabase2", "myTable2") + # pd.DataFrame + # >>> tableFromSql("myServer", "myDatabase", "myTable", schema="specialSchema", columns=["col_1", "col_3"]) + # pd.DataFrame + ''' + + try: + uri = "mssql+pyodbc://{}:{}@{}/{}?driver=SQL Server Native Client 11.0".format(user, password, server, database) + engine = create_engine(uri) + return pd.read_sql_table(table_name, engine, schema=schema, index_col=index_col, coerce_float=coerce_float, parse_dates=parse_dates, columns=columns, chunksize=chunksize) + except Exception as error: + raise error + diff --git a/setup.py b/setup.py index edda5a8..c2d62ea 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,10 @@ packages=['codonPython',], install_required=[ 'numpy', + 're', + 'pandas', + 'random', + 'sqlalchemy' ], author='NHS Digital DIS Team', author_email='paul.ellingham@nhs.net', From d04029eab3707f5dbd1a7945839b609a662704fa Mon Sep 17 00:00:00 2001 From: Murdo <49711704+murd0@users.noreply.github.com> Date: Mon, 13 Jan 2020 16:05:54 +0000 Subject: [PATCH 4/4] Create pull_request_template.md --- .github/pull_request_template.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..b75f15c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1 @@ +# PR template..