diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..b75f15c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1 @@ +# PR template.. diff --git a/codonPython/dateValidator.py b/codonPython/dateValidator.py new file mode 100644 index 0000000..57893ea --- /dev/null +++ b/codonPython/dateValidator.py @@ -0,0 +1,35 @@ +import re + + +def validDate(date_string: str)->bool: + """ + Validates stringtype dates of type `dd/mm/yyyy`, `dd-mm-yyyy` or `dd.mm.yyyy` from + years 1900-9999. Leap year support included. + + Parameters + ---------- + date_string : str + Date to be validated + + Returns + ---------- + boolean + Whether the date is valid or not + + Examples + --------- + >>> validDate("11/02/1996") + True + >>> validDate("29/02/2016") + True + >>> validDate("43/01/1996") + False + """ + + # This regex string will validate dates of type `dd/mm/yyyy`, `dd-mm-yyyy` or `dd.mm.yyyy` + # from years 1900 - 9999. Leap year support included. Regex string from + # https://stackoverflow.com/questions/15491894/regex-to-validate-date-format-dd-mm-yyyy + if re.match(r"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[13-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:19|[2-9]\d)?\d{2})$", date_string, flags=0): + return True + else: + return False \ No newline at end of file diff --git a/codonPython/nhsNumberGenerator.py b/codonPython/nhsNumberGenerator.py new file mode 100644 index 0000000..5d6d8b0 --- /dev/null +++ b/codonPython/nhsNumberGenerator.py @@ -0,0 +1,39 @@ +import random + + +def nhsNumberGenerator(to_generate: int)->list: + """ + Generates random NHS number(s) compliant with modulus 11 checks recorded + in the data dictonary. + https://www.datadictionary.nhs.uk/data_dictionary/attributes/n/nhs/nhs_number_de.asp?shownav=1 + + Parameters + ---------- + to_generate : int + number of NHS numbers to generate + + Returns + ---------- + generated : list + List of randomly generated NHS numbers + + Examples + --------- + >>> random.seed(42) + >>> nhsNumberGenerator(2) + [7865793030, 1933498560] + """ + + generated = [] + while len(generated) < to_generate: + # Random 9 digit number starting with non-zero digit + number = random.randint(100000000, 999999999) + digits = [int(digit) for digit in str(number)] + # Apply weighting to digits + weighted_digits = [(10 - index) * digit for (index, digit) in enumerate(digits)] + # Sum of all weighted digits must be a multiple of 11 to be valid. + if sum(weighted_digits) % 11 == 0: + # Add check digit to valid number + number = int(str(number) + "0") + generated.append(number) + return generated diff --git a/codonPython/tableFromSql.py b/codonPython/tableFromSql.py new file mode 100644 index 0000000..e250c32 --- /dev/null +++ b/codonPython/tableFromSql.py @@ -0,0 +1,65 @@ +from sqlalchemy import create_engine +import pandas as pd + + +def tableFromSql(server: str, database: str, table_name: str, user: str = "", password: str = "", schema: str = None, index_col: str = None, coerce_float: bool = True, parse_dates: list = None, columns: list = None, chunksize: int = None): + ''' + Returns a SQL table in a DataFrame. + + Convert a table stored in SQL Server 2016 into a pandas dataframe. + Uses sqlalchemy and pandas. + + Parameters + ---------- + server : string + Name of the SQL server + database : string + Name of the SQL database + user : string, default: "" + If verification is required, name of the user + password : string, default: "" + If verification is required, password of the user + table_name : string + Name of SQL table in database. + schema : string, default : None + Name of SQL schema in database to query (if database flavor supports this). Uses + default schema if None (default). + index_col : string or list of strings, default : None + Column(s) to set as index(MultiIndex). + coerce_float : boolean, default : True + Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) + to floating point. Can result in loss of Precision. + parse_dates : list or dict, default : None + - List of column names to parse as dates. + - Dict of {column_name: format string} where format string is strftime compatible in + case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing + integer timestamps. + - Dict of {column_name: arg dict}, where the arg dict corresponds to the keyword + arguments of pandas.to_datetime() Especially useful with databases without native + Datetime support, such as SQLite. + columns : list, default : None + List of column names to select from SQL table + chunksize : int, default : None + If specified, returns an iterator where chunksize is the number of rows to include + in each chunk. + + Returns + ---------- + pd.DataFrame + Dataframe of the table requested from sql server + + Examples + --------- + # >>> tableFromSql("myServer2", "myDatabase2", "myTable2") + # pd.DataFrame + # >>> tableFromSql("myServer", "myDatabase", "myTable", schema="specialSchema", columns=["col_1", "col_3"]) + # pd.DataFrame + ''' + + try: + uri = "mssql+pyodbc://{}:{}@{}/{}?driver=SQL Server Native Client 11.0".format(user, password, server, database) + engine = create_engine(uri) + return pd.read_sql_table(table_name, engine, schema=schema, index_col=index_col, coerce_float=coerce_float, parse_dates=parse_dates, columns=columns, chunksize=chunksize) + except Exception as error: + raise error + diff --git a/setup.py b/setup.py index edda5a8..c2d62ea 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,10 @@ packages=['codonPython',], install_required=[ 'numpy', + 're', + 'pandas', + 'random', + 'sqlalchemy' ], author='NHS Digital DIS Team', author_email='paul.ellingham@nhs.net',