avwx.parsing.sanitization.cleaners.wind

Cleaners for wind elements.

  1"""Cleaners for wind elements."""
  2
  3import re
  4
  5from avwx.parsing.core import is_unknown
  6from avwx.parsing.sanitization.base import CleanItem, RemoveItem
  7
  8WIND_REMV = ("/", "-", "{", "}", "(N)", "(E)", "(S)", "(W)")
  9
 10WIND_REPL = {
 11    "O": "0",
 12    "|": "1",
 13    "MPSM": "MPS",  # conflict with SM
 14    "FG": "G",
 15    "GG": "G",
 16    "GT": "G",
 17    "GS": "G",
 18    "SQ": "G",
 19    "CT": "KT",
 20    "JT": "KT",
 21    "SM": "KT",
 22    "KTKT": "KT",  # Must come before TK
 23    "TK": "KT",
 24    "LKT": "KT",
 25    "ZKT": "KT",
 26    "KKT": "KT",
 27    "JKT": "KT",
 28    "KLT": "KT",
 29    "TKT": "KT",
 30    "GKT": "KT",
 31    "PKT": "KT",
 32    "XKT": "KT",
 33    "VRBL": "VRB",  # Not caught in WIND_VRB
 34}
 35
 36WIND_VRB = ("WBB",)
 37
 38KT_PATTERN = re.compile(r"\b[\w\d]*\d{2}K[^T]\b")
 39
 40
 41def sanitize_wind(text: str) -> str:
 42    """Fix rare wind issues that may be too broad otherwise."""
 43    for rep in WIND_REMV:
 44        text = text.replace(rep, "")
 45    for key, rep in WIND_REPL.items():
 46        text = text.replace(key, rep)
 47    if len(text) > 4 and not text.startswith("VRB") and not text[:3].isdigit():
 48        # Catches majority of cases where at least two valid letters are found
 49        if len(set(text[:4]).intersection({"V", "R", "B"})) > 1:
 50            for i, char in enumerate(text):
 51                if char.isdigit():
 52                    text = f"VRB{text[i:]}"
 53                    break
 54        else:
 55            for key in WIND_VRB:
 56                if text.startswith(key):
 57                    zero = "0" if key[-1] == "0" else ""
 58                    text = text.replace(key, f"VRB{zero}")
 59                    break
 60    # Final check for end units. Remainder of string would be fixed at this point if valid
 61    # For now, it's only checking for K(T) since that is most instances
 62    # The parser can still handle/expect missing and spearated units
 63    if KT_PATTERN.match(text):
 64        text = f"{text[:-1]}T"
 65    if text.endswith("K"):
 66        text += "T"
 67    return text
 68
 69
 70class EmptyWind(RemoveItem):
 71    """Remove empty wind /////KT."""
 72
 73    def can_handle(self, item: str) -> bool:
 74        return item.endswith("KT") and is_unknown(item[:-2])
 75
 76
 77# TODO: Generalize to find anywhere in wind. Maybe add to other wind sans?
 78class MisplaceWindKT(CleanItem):
 79    """Fix misplaced KT 22022KTG40."""
 80
 81    def can_handle(self, item: str) -> bool:
 82        return len(item) == 10 and "KTG" in item and item[:5].isdigit()
 83
 84    def clean(self, item: str) -> str:
 85        return item.replace("KTG", "G") + "KT"
 86
 87
 88class DoubleGust(CleanItem):
 89    """Fix gust double G.
 90    Ex: 360G17G32KT
 91    """
 92
 93    def can_handle(self, item: str) -> bool:
 94        return len(item) > 10 and item.endswith("KT") and item[3] == "G"
 95
 96    def clean(self, item: str) -> str:
 97        return item[:3] + item[4:]
 98
 99
100class WindLeadingMistype(CleanItem):
101    """Fix leading character mistypes in wind."""
102
103    def can_handle(self, item: str) -> bool:
104        return (
105            len(item) > 7
106            and not item[0].isdigit()
107            and not item.startswith("VRB")
108            and item.endswith("KT")
109            and not item.startswith("WS")
110        )
111
112    def clean(self, item: str) -> str:
113        while item and not item[0].isdigit() and not item.startswith("VRB"):
114            item = item[1:]
115        return item
116
117
118class NonGGust(CleanItem):
119    """Fix non-G gust.
120    Ex: 14010-15KT
121    """
122
123    def can_handle(self, item: str) -> bool:
124        return len(item) == 10 and item.endswith("KT") and item[5] != "G"
125
126    def clean(self, item: str) -> str:
127        return f"{item[:5]}G{item[6:]}"
128
129
130class RemoveVrbLeadingDigits(CleanItem):
131    """Fix leading digits on VRB wind.
132    Ex: 2VRB02KT
133    """
134
135    def can_handle(self, item: str) -> bool:
136        return len(item) > 7 and item.endswith("KT") and "VRB" in item and item[0].isdigit() and "Z" not in item
137
138    def clean(self, item: str) -> str:
139        while item[0].isdigit():
140            item = item[1:]
141        return item
WIND_REMV = ('/', '-', '{', '}', '(N)', '(E)', '(S)', '(W)')
WIND_REPL = {'O': '0', '|': '1', 'MPSM': 'MPS', 'FG': 'G', 'GG': 'G', 'GT': 'G', 'GS': 'G', 'SQ': 'G', 'CT': 'KT', 'JT': 'KT', 'SM': 'KT', 'KTKT': 'KT', 'TK': 'KT', 'LKT': 'KT', 'ZKT': 'KT', 'KKT': 'KT', 'JKT': 'KT', 'KLT': 'KT', 'TKT': 'KT', 'GKT': 'KT', 'PKT': 'KT', 'XKT': 'KT', 'VRBL': 'VRB'}
WIND_VRB = ('WBB',)
KT_PATTERN = re.compile('\\b[\\w\\d]*\\d{2}K[^T]\\b')
def sanitize_wind(text: str) -> str:
42def sanitize_wind(text: str) -> str:
43    """Fix rare wind issues that may be too broad otherwise."""
44    for rep in WIND_REMV:
45        text = text.replace(rep, "")
46    for key, rep in WIND_REPL.items():
47        text = text.replace(key, rep)
48    if len(text) > 4 and not text.startswith("VRB") and not text[:3].isdigit():
49        # Catches majority of cases where at least two valid letters are found
50        if len(set(text[:4]).intersection({"V", "R", "B"})) > 1:
51            for i, char in enumerate(text):
52                if char.isdigit():
53                    text = f"VRB{text[i:]}"
54                    break
55        else:
56            for key in WIND_VRB:
57                if text.startswith(key):
58                    zero = "0" if key[-1] == "0" else ""
59                    text = text.replace(key, f"VRB{zero}")
60                    break
61    # Final check for end units. Remainder of string would be fixed at this point if valid
62    # For now, it's only checking for K(T) since that is most instances
63    # The parser can still handle/expect missing and spearated units
64    if KT_PATTERN.match(text):
65        text = f"{text[:-1]}T"
66    if text.endswith("K"):
67        text += "T"
68    return text

Fix rare wind issues that may be too broad otherwise.

71class EmptyWind(RemoveItem):
72    """Remove empty wind /////KT."""
73
74    def can_handle(self, item: str) -> bool:
75        return item.endswith("KT") and is_unknown(item[:-2])

Remove empty wind /////KT.

def can_handle(self, item: str) -> bool:
74    def can_handle(self, item: str) -> bool:
75        return item.endswith("KT") and is_unknown(item[:-2])

Return True if the element can and needs to be cleaned.

class MisplaceWindKT(avwx.parsing.sanitization.cleaners.base.CleanItem):
79class MisplaceWindKT(CleanItem):
80    """Fix misplaced KT 22022KTG40."""
81
82    def can_handle(self, item: str) -> bool:
83        return len(item) == 10 and "KTG" in item and item[:5].isdigit()
84
85    def clean(self, item: str) -> str:
86        return item.replace("KTG", "G") + "KT"

Fix misplaced KT 22022KTG40.

def can_handle(self, item: str) -> bool:
82    def can_handle(self, item: str) -> bool:
83        return len(item) == 10 and "KTG" in item and item[:5].isdigit()

Return True if the element can and needs to be cleaned.

def clean(self, item: str) -> str:
85    def clean(self, item: str) -> str:
86        return item.replace("KTG", "G") + "KT"

Clean the raw string.

class DoubleGust(avwx.parsing.sanitization.cleaners.base.CleanItem):
89class DoubleGust(CleanItem):
90    """Fix gust double G.
91    Ex: 360G17G32KT
92    """
93
94    def can_handle(self, item: str) -> bool:
95        return len(item) > 10 and item.endswith("KT") and item[3] == "G"
96
97    def clean(self, item: str) -> str:
98        return item[:3] + item[4:]

Fix gust double G. Ex: 360G17G32KT

def can_handle(self, item: str) -> bool:
94    def can_handle(self, item: str) -> bool:
95        return len(item) > 10 and item.endswith("KT") and item[3] == "G"

Return True if the element can and needs to be cleaned.

def clean(self, item: str) -> str:
97    def clean(self, item: str) -> str:
98        return item[:3] + item[4:]

Clean the raw string.

class WindLeadingMistype(avwx.parsing.sanitization.cleaners.base.CleanItem):
101class WindLeadingMistype(CleanItem):
102    """Fix leading character mistypes in wind."""
103
104    def can_handle(self, item: str) -> bool:
105        return (
106            len(item) > 7
107            and not item[0].isdigit()
108            and not item.startswith("VRB")
109            and item.endswith("KT")
110            and not item.startswith("WS")
111        )
112
113    def clean(self, item: str) -> str:
114        while item and not item[0].isdigit() and not item.startswith("VRB"):
115            item = item[1:]
116        return item

Fix leading character mistypes in wind.

def can_handle(self, item: str) -> bool:
104    def can_handle(self, item: str) -> bool:
105        return (
106            len(item) > 7
107            and not item[0].isdigit()
108            and not item.startswith("VRB")
109            and item.endswith("KT")
110            and not item.startswith("WS")
111        )

Return True if the element can and needs to be cleaned.

def clean(self, item: str) -> str:
113    def clean(self, item: str) -> str:
114        while item and not item[0].isdigit() and not item.startswith("VRB"):
115            item = item[1:]
116        return item

Clean the raw string.

119class NonGGust(CleanItem):
120    """Fix non-G gust.
121    Ex: 14010-15KT
122    """
123
124    def can_handle(self, item: str) -> bool:
125        return len(item) == 10 and item.endswith("KT") and item[5] != "G"
126
127    def clean(self, item: str) -> str:
128        return f"{item[:5]}G{item[6:]}"

Fix non-G gust. Ex: 14010-15KT

def can_handle(self, item: str) -> bool:
124    def can_handle(self, item: str) -> bool:
125        return len(item) == 10 and item.endswith("KT") and item[5] != "G"

Return True if the element can and needs to be cleaned.

def clean(self, item: str) -> str:
127    def clean(self, item: str) -> str:
128        return f"{item[:5]}G{item[6:]}"

Clean the raw string.

class RemoveVrbLeadingDigits(avwx.parsing.sanitization.cleaners.base.CleanItem):
131class RemoveVrbLeadingDigits(CleanItem):
132    """Fix leading digits on VRB wind.
133    Ex: 2VRB02KT
134    """
135
136    def can_handle(self, item: str) -> bool:
137        return len(item) > 7 and item.endswith("KT") and "VRB" in item and item[0].isdigit() and "Z" not in item
138
139    def clean(self, item: str) -> str:
140        while item[0].isdigit():
141            item = item[1:]
142        return item

Fix leading digits on VRB wind. Ex: 2VRB02KT

def can_handle(self, item: str) -> bool:
136    def can_handle(self, item: str) -> bool:
137        return len(item) > 7 and item.endswith("KT") and "VRB" in item and item[0].isdigit() and "Z" not in item

Return True if the element can and needs to be cleaned.

def clean(self, item: str) -> str:
139    def clean(self, item: str) -> str:
140        while item[0].isdigit():
141            item = item[1:]
142        return item

Clean the raw string.