avwx.parsing.sanitization.cleaners.wind
Cleaners for wind elements.
1"""Cleaners for wind elements.""" 2 3import re 4 5from avwx.parsing.core import is_unknown 6from avwx.parsing.sanitization.base import CleanItem, RemoveItem 7 8WIND_REMV = ("/", "-", "{", "}", "(N)", "(E)", "(S)", "(W)") 9 10WIND_REPL = { 11 "O": "0", 12 "|": "1", 13 "MPSM": "MPS", # conflict with SM 14 "FG": "G", 15 "GG": "G", 16 "GT": "G", 17 "GS": "G", 18 "SQ": "G", 19 "CT": "KT", 20 "JT": "KT", 21 "SM": "KT", 22 "KTKT": "KT", # Must come before TK 23 "TK": "KT", 24 "LKT": "KT", 25 "ZKT": "KT", 26 "KKT": "KT", 27 "JKT": "KT", 28 "KLT": "KT", 29 "TKT": "KT", 30 "GKT": "KT", 31 "PKT": "KT", 32 "XKT": "KT", 33 "VRBL": "VRB", # Not caught in WIND_VRB 34} 35 36WIND_VRB = ("WBB",) 37 38KT_PATTERN = re.compile(r"\b[\w\d]*\d{2}K[^T]\b") 39 40 41def sanitize_wind(text: str) -> str: 42 """Fix rare wind issues that may be too broad otherwise.""" 43 for rep in WIND_REMV: 44 text = text.replace(rep, "") 45 for key, rep in WIND_REPL.items(): 46 text = text.replace(key, rep) 47 if len(text) > 4 and not text.startswith("VRB") and not text[:3].isdigit(): 48 # Catches majority of cases where at least two valid letters are found 49 if len(set(text[:4]).intersection({"V", "R", "B"})) > 1: 50 for i, char in enumerate(text): 51 if char.isdigit(): 52 text = f"VRB{text[i:]}" 53 break 54 else: 55 for key in WIND_VRB: 56 if text.startswith(key): 57 zero = "0" if key[-1] == "0" else "" 58 text = text.replace(key, f"VRB{zero}") 59 break 60 # Final check for end units. Remainder of string would be fixed at this point if valid 61 # For now, it's only checking for K(T) since that is most instances 62 # The parser can still handle/expect missing and spearated units 63 if KT_PATTERN.match(text): 64 text = f"{text[:-1]}T" 65 if text.endswith("K"): 66 text += "T" 67 return text 68 69 70class EmptyWind(RemoveItem): 71 """Remove empty wind /////KT.""" 72 73 def can_handle(self, item: str) -> bool: 74 return item.endswith("KT") and is_unknown(item[:-2]) 75 76 77# TODO: Generalize to find anywhere in wind. Maybe add to other wind sans? 78class MisplaceWindKT(CleanItem): 79 """Fix misplaced KT 22022KTG40.""" 80 81 def can_handle(self, item: str) -> bool: 82 return len(item) == 10 and "KTG" in item and item[:5].isdigit() 83 84 def clean(self, item: str) -> str: 85 return item.replace("KTG", "G") + "KT" 86 87 88class DoubleGust(CleanItem): 89 """Fix gust double G. 90 Ex: 360G17G32KT 91 """ 92 93 def can_handle(self, item: str) -> bool: 94 return len(item) > 10 and item.endswith("KT") and item[3] == "G" 95 96 def clean(self, item: str) -> str: 97 return item[:3] + item[4:] 98 99 100class WindLeadingMistype(CleanItem): 101 """Fix leading character mistypes in wind.""" 102 103 def can_handle(self, item: str) -> bool: 104 return ( 105 len(item) > 7 106 and not item[0].isdigit() 107 and not item.startswith("VRB") 108 and item.endswith("KT") 109 and not item.startswith("WS") 110 ) 111 112 def clean(self, item: str) -> str: 113 while item and not item[0].isdigit() and not item.startswith("VRB"): 114 item = item[1:] 115 return item 116 117 118class NonGGust(CleanItem): 119 """Fix non-G gust. 120 Ex: 14010-15KT 121 """ 122 123 def can_handle(self, item: str) -> bool: 124 return len(item) == 10 and item.endswith("KT") and item[5] != "G" 125 126 def clean(self, item: str) -> str: 127 return f"{item[:5]}G{item[6:]}" 128 129 130class RemoveVrbLeadingDigits(CleanItem): 131 """Fix leading digits on VRB wind. 132 Ex: 2VRB02KT 133 """ 134 135 def can_handle(self, item: str) -> bool: 136 return len(item) > 7 and item.endswith("KT") and "VRB" in item and item[0].isdigit() and "Z" not in item 137 138 def clean(self, item: str) -> str: 139 while item[0].isdigit(): 140 item = item[1:] 141 return item
WIND_REMV =
('/', '-', '{', '}', '(N)', '(E)', '(S)', '(W)')
WIND_REPL =
{'O': '0', '|': '1', 'MPSM': 'MPS', 'FG': 'G', 'GG': 'G', 'GT': 'G', 'GS': 'G', 'SQ': 'G', 'CT': 'KT', 'JT': 'KT', 'SM': 'KT', 'KTKT': 'KT', 'TK': 'KT', 'LKT': 'KT', 'ZKT': 'KT', 'KKT': 'KT', 'JKT': 'KT', 'KLT': 'KT', 'TKT': 'KT', 'GKT': 'KT', 'PKT': 'KT', 'XKT': 'KT', 'VRBL': 'VRB'}
WIND_VRB =
('WBB',)
KT_PATTERN =
re.compile('\\b[\\w\\d]*\\d{2}K[^T]\\b')
def
sanitize_wind(text: str) -> str:
42def sanitize_wind(text: str) -> str: 43 """Fix rare wind issues that may be too broad otherwise.""" 44 for rep in WIND_REMV: 45 text = text.replace(rep, "") 46 for key, rep in WIND_REPL.items(): 47 text = text.replace(key, rep) 48 if len(text) > 4 and not text.startswith("VRB") and not text[:3].isdigit(): 49 # Catches majority of cases where at least two valid letters are found 50 if len(set(text[:4]).intersection({"V", "R", "B"})) > 1: 51 for i, char in enumerate(text): 52 if char.isdigit(): 53 text = f"VRB{text[i:]}" 54 break 55 else: 56 for key in WIND_VRB: 57 if text.startswith(key): 58 zero = "0" if key[-1] == "0" else "" 59 text = text.replace(key, f"VRB{zero}") 60 break 61 # Final check for end units. Remainder of string would be fixed at this point if valid 62 # For now, it's only checking for K(T) since that is most instances 63 # The parser can still handle/expect missing and spearated units 64 if KT_PATTERN.match(text): 65 text = f"{text[:-1]}T" 66 if text.endswith("K"): 67 text += "T" 68 return text
Fix rare wind issues that may be too broad otherwise.
71class EmptyWind(RemoveItem): 72 """Remove empty wind /////KT.""" 73 74 def can_handle(self, item: str) -> bool: 75 return item.endswith("KT") and is_unknown(item[:-2])
Remove empty wind /////KT.
Inherited Members
79class MisplaceWindKT(CleanItem): 80 """Fix misplaced KT 22022KTG40.""" 81 82 def can_handle(self, item: str) -> bool: 83 return len(item) == 10 and "KTG" in item and item[:5].isdigit() 84 85 def clean(self, item: str) -> str: 86 return item.replace("KTG", "G") + "KT"
Fix misplaced KT 22022KTG40.
def
can_handle(self, item: str) -> bool:
82 def can_handle(self, item: str) -> bool: 83 return len(item) == 10 and "KTG" in item and item[:5].isdigit()
Return True if the element can and needs to be cleaned.
Inherited Members
89class DoubleGust(CleanItem): 90 """Fix gust double G. 91 Ex: 360G17G32KT 92 """ 93 94 def can_handle(self, item: str) -> bool: 95 return len(item) > 10 and item.endswith("KT") and item[3] == "G" 96 97 def clean(self, item: str) -> str: 98 return item[:3] + item[4:]
Fix gust double G. Ex: 360G17G32KT
def
can_handle(self, item: str) -> bool:
94 def can_handle(self, item: str) -> bool: 95 return len(item) > 10 and item.endswith("KT") and item[3] == "G"
Return True if the element can and needs to be cleaned.
Inherited Members
101class WindLeadingMistype(CleanItem): 102 """Fix leading character mistypes in wind.""" 103 104 def can_handle(self, item: str) -> bool: 105 return ( 106 len(item) > 7 107 and not item[0].isdigit() 108 and not item.startswith("VRB") 109 and item.endswith("KT") 110 and not item.startswith("WS") 111 ) 112 113 def clean(self, item: str) -> str: 114 while item and not item[0].isdigit() and not item.startswith("VRB"): 115 item = item[1:] 116 return item
Fix leading character mistypes in wind.
def
can_handle(self, item: str) -> bool:
104 def can_handle(self, item: str) -> bool: 105 return ( 106 len(item) > 7 107 and not item[0].isdigit() 108 and not item.startswith("VRB") 109 and item.endswith("KT") 110 and not item.startswith("WS") 111 )
Return True if the element can and needs to be cleaned.
def
clean(self, item: str) -> str:
113 def clean(self, item: str) -> str: 114 while item and not item[0].isdigit() and not item.startswith("VRB"): 115 item = item[1:] 116 return item
Clean the raw string.
Inherited Members
119class NonGGust(CleanItem): 120 """Fix non-G gust. 121 Ex: 14010-15KT 122 """ 123 124 def can_handle(self, item: str) -> bool: 125 return len(item) == 10 and item.endswith("KT") and item[5] != "G" 126 127 def clean(self, item: str) -> str: 128 return f"{item[:5]}G{item[6:]}"
Fix non-G gust. Ex: 14010-15KT
def
can_handle(self, item: str) -> bool:
124 def can_handle(self, item: str) -> bool: 125 return len(item) == 10 and item.endswith("KT") and item[5] != "G"
Return True if the element can and needs to be cleaned.
Inherited Members
131class RemoveVrbLeadingDigits(CleanItem): 132 """Fix leading digits on VRB wind. 133 Ex: 2VRB02KT 134 """ 135 136 def can_handle(self, item: str) -> bool: 137 return len(item) > 7 and item.endswith("KT") and "VRB" in item and item[0].isdigit() and "Z" not in item 138 139 def clean(self, item: str) -> str: 140 while item[0].isdigit(): 141 item = item[1:] 142 return item
Fix leading digits on VRB wind. Ex: 2VRB02KT
def
can_handle(self, item: str) -> bool:
136 def can_handle(self, item: str) -> bool: 137 return len(item) > 7 and item.endswith("KT") and "VRB" in item and item[0].isdigit() and "Z" not in item
Return True if the element can and needs to be cleaned.
def
clean(self, item: str) -> str:
139 def clean(self, item: str) -> str: 140 while item[0].isdigit(): 141 item = item[1:] 142 return item
Clean the raw string.