datasus_db.datasources.auxiliar
1import logging 2import polars as pl 3from ..pl_utils import fill_empty, Column, to_schema 4from ..ftp import fetch_from_zip 5from ..cnv import to_dataframe 6from ..datasus import import_from_ftp 7from ..dbf import read_as_df 8 9 10MUNICIPIO_TABLE = "AUX_MUNICIPIO" 11UF_TABLE = "AUX_UF" 12CID10_DOENCA_TABLE = "AUX_CID10_DOENCA" 13 14 15def import_auxiliar_tables(db_file="datasus.db"): 16 """Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...) 17 18 Args: 19 db_file (str, optional): path to the duckdb file in which the data will be imported to. 20 21 --- 22 23 Extra: 24 - **Municipio data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/auxiliar/municipio.pdf 25 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip 26 """ 27 logging.info(f"⏳ [AUX_TABLES] Starting import...") 28 29 import_from_ftp( 30 [CID10_DOENCA_TABLE, MUNICIPIO_TABLE, UF_TABLE], 31 ["/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip*"], 32 fetch_sim_auxiliar, 33 db_file=db_file, 34 ) 35 36 37def fetch_sim_auxiliar(ftp_path: str): 38 cid10_file = "TABELAS/CID10.DBF" 39 municipio_file = "TABELAS/CADMUN.DBF" 40 uf_file = "TABELAS/TABUF.DBF" 41 files = fetch_from_zip(ftp_path, [cid10_file, municipio_file, uf_file]) 42 43 cid10_df = read_as_df(cid10_file, files[cid10_file], encoding="cp850") 44 municipio_df = read_as_df(municipio_file, files[municipio_file], encoding="cp850") 45 uf_df = read_as_df(uf_file, files[uf_file], encoding="cp850") 46 47 return { 48 CID10_DOENCA_TABLE: map_cid10(cid10_df), 49 MUNICIPIO_TABLE: map_municipio(municipio_df), 50 UF_TABLE: map_uf(uf_df), 51 } 52 53 54def map_cid10(df: pl.DataFrame): 55 df = df.with_columns(fill_empty(None)) 56 57 return to_schema( 58 df, 59 [ 60 Column("CID10", pl.Utf8), 61 Column("OPC", pl.Utf8), 62 Column("CAT", pl.Utf8), 63 Column("SUBCAT", pl.Utf8), 64 Column("DESCR", pl.Utf8), 65 Column("RESTRSEXO", pl.UInt8), 66 ], 67 ) 68 69 70def map_municipio(df: pl.DataFrame): 71 df = df.with_columns(fill_empty(None)) 72 73 return to_schema( 74 df, 75 [ 76 Column("MUNCOD", pl.UInt32), 77 Column("MUNCODDV", pl.UInt32), 78 Column("SITUACAO", pl.Utf8), 79 Column("MUNSINP", pl.UInt32), 80 Column("MUNSIAFI", pl.UInt32), 81 Column("MUNNOME", pl.Utf8), 82 Column("MUNNOMEX", pl.Utf8), 83 Column("OBSERV", pl.Utf8), 84 Column("MUNSINON", pl.Utf8), 85 Column("MUNSINONDV", pl.Utf8), 86 Column("AMAZONIA", pl.Utf8), 87 Column("FRONTEIRA", pl.Utf8), 88 Column("CAPITAL", pl.Utf8), 89 Column("UFCOD", pl.UInt8), 90 Column("MESOCOD", pl.UInt16), 91 Column("MICROCOD", pl.UInt16), 92 Column("MSAUDCOD", pl.UInt16), 93 Column("RSAUDCOD", pl.UInt16), 94 Column("CSAUDCOD", pl.UInt16), 95 Column("RMETRCOD", pl.UInt16), 96 Column("AGLCOD", pl.UInt16), 97 Column("ANOINST", pl.UInt16), 98 Column("ANOEXT", pl.UInt16), 99 Column("SUCESSOR", pl.UInt32), 100 Column("LATITUDE", pl.Float64), 101 Column("LONGITUDE", pl.Float64), 102 Column("ALTITUDE", pl.Float64), 103 Column("AREA", pl.Float64), 104 ], 105 ) 106 107 108def map_uf(df: pl.DataFrame): 109 return to_schema( 110 df, 111 [ 112 Column("SIGLA_UF", pl.Utf8), 113 Column("CODIGO", pl.UInt8), 114 Column("DESCRICAO", pl.Utf8), 115 ], 116 ) 117 118 119def fetch_painel_oncologia_auxiliar(ftp_path: str): 120 municipio_file = "CNV/br_municip.cnv" 121 uf_file = "CNV/br_uf.cnv" 122 123 files = fetch_from_zip(ftp_path, [municipio_file, uf_file]) 124 125 def read_as_df(file_name: str, id_dtype: pl.UInt32): 126 cnv_bytes = files[file_name] 127 df = to_dataframe(cnv_bytes, id_dtype=id_dtype) 128 return df.with_columns( 129 pl.col("NOME").str.split(" ").list.slice(1).list.join(" ") 130 ) 131 132 return { 133 MUNICIPIO_TABLE: read_as_df(municipio_file, id_dtype=pl.UInt32), 134 UF_TABLE: read_as_df(uf_file, id_dtype=pl.UInt8), 135 }
MUNICIPIO_TABLE =
'AUX_MUNICIPIO'
UF_TABLE =
'AUX_UF'
CID10_DOENCA_TABLE =
'AUX_CID10_DOENCA'
def
import_auxiliar_tables(db_file='datasus.db'):
16def import_auxiliar_tables(db_file="datasus.db"): 17 """Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...) 18 19 Args: 20 db_file (str, optional): path to the duckdb file in which the data will be imported to. 21 22 --- 23 24 Extra: 25 - **Municipio data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/auxiliar/municipio.pdf 26 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip 27 """ 28 logging.info(f"⏳ [AUX_TABLES] Starting import...") 29 30 import_from_ftp( 31 [CID10_DOENCA_TABLE, MUNICIPIO_TABLE, UF_TABLE], 32 ["/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip*"], 33 fetch_sim_auxiliar, 34 db_file=db_file, 35 )
Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...)
Arguments:
- db_file (str, optional): path to the duckdb file in which the data will be imported to.
Extra:
- Municipio data description: https://github.com/mymatsubara/datasus-db/blob/main/docs/auxiliar/municipio.pdf
- ftp path: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip
def
fetch_sim_auxiliar(ftp_path: str):
38def fetch_sim_auxiliar(ftp_path: str): 39 cid10_file = "TABELAS/CID10.DBF" 40 municipio_file = "TABELAS/CADMUN.DBF" 41 uf_file = "TABELAS/TABUF.DBF" 42 files = fetch_from_zip(ftp_path, [cid10_file, municipio_file, uf_file]) 43 44 cid10_df = read_as_df(cid10_file, files[cid10_file], encoding="cp850") 45 municipio_df = read_as_df(municipio_file, files[municipio_file], encoding="cp850") 46 uf_df = read_as_df(uf_file, files[uf_file], encoding="cp850") 47 48 return { 49 CID10_DOENCA_TABLE: map_cid10(cid10_df), 50 MUNICIPIO_TABLE: map_municipio(municipio_df), 51 UF_TABLE: map_uf(uf_df), 52 }
def
map_cid10(df: polars.dataframe.frame.DataFrame):
def
map_municipio(df: polars.dataframe.frame.DataFrame):
71def map_municipio(df: pl.DataFrame): 72 df = df.with_columns(fill_empty(None)) 73 74 return to_schema( 75 df, 76 [ 77 Column("MUNCOD", pl.UInt32), 78 Column("MUNCODDV", pl.UInt32), 79 Column("SITUACAO", pl.Utf8), 80 Column("MUNSINP", pl.UInt32), 81 Column("MUNSIAFI", pl.UInt32), 82 Column("MUNNOME", pl.Utf8), 83 Column("MUNNOMEX", pl.Utf8), 84 Column("OBSERV", pl.Utf8), 85 Column("MUNSINON", pl.Utf8), 86 Column("MUNSINONDV", pl.Utf8), 87 Column("AMAZONIA", pl.Utf8), 88 Column("FRONTEIRA", pl.Utf8), 89 Column("CAPITAL", pl.Utf8), 90 Column("UFCOD", pl.UInt8), 91 Column("MESOCOD", pl.UInt16), 92 Column("MICROCOD", pl.UInt16), 93 Column("MSAUDCOD", pl.UInt16), 94 Column("RSAUDCOD", pl.UInt16), 95 Column("CSAUDCOD", pl.UInt16), 96 Column("RMETRCOD", pl.UInt16), 97 Column("AGLCOD", pl.UInt16), 98 Column("ANOINST", pl.UInt16), 99 Column("ANOEXT", pl.UInt16), 100 Column("SUCESSOR", pl.UInt32), 101 Column("LATITUDE", pl.Float64), 102 Column("LONGITUDE", pl.Float64), 103 Column("ALTITUDE", pl.Float64), 104 Column("AREA", pl.Float64), 105 ], 106 )
def
map_uf(df: polars.dataframe.frame.DataFrame):
def
fetch_painel_oncologia_auxiliar(ftp_path: str):
120def fetch_painel_oncologia_auxiliar(ftp_path: str): 121 municipio_file = "CNV/br_municip.cnv" 122 uf_file = "CNV/br_uf.cnv" 123 124 files = fetch_from_zip(ftp_path, [municipio_file, uf_file]) 125 126 def read_as_df(file_name: str, id_dtype: pl.UInt32): 127 cnv_bytes = files[file_name] 128 df = to_dataframe(cnv_bytes, id_dtype=id_dtype) 129 return df.with_columns( 130 pl.col("NOME").str.split(" ").list.slice(1).list.join(" ") 131 ) 132 133 return { 134 MUNICIPIO_TABLE: read_as_df(municipio_file, id_dtype=pl.UInt32), 135 UF_TABLE: read_as_df(uf_file, id_dtype=pl.UInt8), 136 }