If you want to separate ALLCaps to all_caps and expect numbers in your string you still don’t need to do two separate runs just use | This expression ((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z])) can handle just about every scenario in the book
def to_snake_case(not_snake_case):
final =''for i in xrange(len(not_snake_case)):
item = not_snake_case[i]if i < len(not_snake_case)-1:
next_char_will_be_underscored =(
not_snake_case[i+1]=="_"or
not_snake_case[i+1]==" "or
not_snake_case[i+1].isupper())if(item ==" "or item =="_")and next_char_will_be_underscored:continueelif(item ==" "or item =="_"):
final +="_"elif item.isupper():
final +="_"+item.lower()else:
final += item
if final[0]=="_":
final = final[1:]return final
>>> to_snake_case("RegularExpressionsAreFunky")'regular_expressions_are_funky'>>> to_snake_case("RegularExpressionsAre Funky")'regular_expressions_are_funky'>>> to_snake_case("RegularExpressionsAre_Funky")'regular_expressions_are_funky'
Personally I am not sure how anything using regular expressions in python can be described as elegant. Most answers here are just doing “code golf” type RE tricks. Elegant coding is supposed to be easily understood.
def to_snake_case(not_snake_case):
final = ''
for i in xrange(len(not_snake_case)):
item = not_snake_case[i]
if i < len(not_snake_case) - 1:
next_char_will_be_underscored = (
not_snake_case[i+1] == "_" or
not_snake_case[i+1] == " " or
not_snake_case[i+1].isupper()
)
if (item == " " or item == "_") and next_char_will_be_underscored:
continue
elif (item == " " or item == "_"):
final += "_"
elif item.isupper():
final += "_"+item.lower()
else:
final += item
if final[0] == "_":
final = final[1:]
return final
>>> to_snake_case("RegularExpressionsAreFunky")
'regular_expressions_are_funky'
>>> to_snake_case("RegularExpressionsAre Funky")
'regular_expressions_are_funky'
>>> to_snake_case("RegularExpressionsAre_Funky")
'regular_expressions_are_funky'
回答 5
re如果可能,我宁愿避免:
def to_camelcase(s):return''.join(['_'+ c.lower()if c.isupper()else c for c in s]).lstrip('_')
''.join('_'+c.lower() if c.isupper() else c for c in "DeathToCamelCase").strip('_')
re.sub("(.)([A-Z])", r'\1_\2', 'DeathToCamelCase').lower()
回答 7
我认为此解决方案比以前的答案更直接:
import re
def convert (camel_input):
words = re.findall(r'[A-Z]?[a-z]+|[A-Z]{2,}(?=[A-Z][a-z]|\d|\W|$)|\d+', camel_input)return'_'.join(map(str.lower, words))# Let's test it
test_strings =['CamelCase','camelCamelCase','Camel2Camel2Case','getHTTPResponseCode','get200HTTPResponseCode','getHTTP200ResponseCode','HTTPResponseCode','ResponseHTTP','ResponseHTTP2','Fun?!awesome','Fun?!Awesome','10CoolDudes','20coolDudes']for test_string in test_strings:print(convert(test_string))
[A-Z]?[a-z]+: Consecutive lower-case letters that optionally start with an upper-case letter.
[A-Z]{2,}(?=[A-Z][a-z]|\d|\W|$): Two or more consecutive upper-case letters. It uses a lookahead to exclude the last upper-case letter if it is followed by a lower-case letter.
\d+: Consecutive numbers.
By using re.findall we get a list of individual “words” that can be converted to lower-case and joined with underscores.
I don’t get idea why using both .sub() calls? :) I’m not regex guru, but I simplified function to this one, which is suitable for my certain needs, I just needed a solution to convert camelCasedVars from POST request to vars_with_underscore:
It does not work with such names like getHTTPResponse, cause I heard it is bad naming convention (should be like getHttpResponse, it’s obviously, that it’s much easier memorize this form).
回答 9
这是我的解决方案:
def un_camel(text):""" Converts a CamelCase name into an under_score name.
>>> un_camel('CamelCase')
'camel_case'
>>> un_camel('getHTTPResponseCode')
'get_http_response_code'
"""
result =[]
pos =0while pos < len(text):if text[pos].isupper():if pos-1>0and text[pos-1].islower()or pos-1>0and \
pos+1< len(text)and text[pos+1].islower():
result.append("_%s"% text[pos].lower())else:
result.append(text[pos].lower())else:
result.append(text[pos])
pos +=1return"".join(result)
def un_camel(text):
""" Converts a CamelCase name into an under_score name.
>>> un_camel('CamelCase')
'camel_case'
>>> un_camel('getHTTPResponseCode')
'get_http_response_code'
"""
result = []
pos = 0
while pos < len(text):
if text[pos].isupper():
if pos-1 > 0 and text[pos-1].islower() or pos-1 > 0 and \
pos+1 < len(text) and text[pos+1].islower():
result.append("_%s" % text[pos].lower())
else:
result.append(text[pos].lower())
else:
result.append(text[pos])
pos += 1
return "".join(result)
It supports those corner cases discussed in the comments. For instance, it’ll convert getHTTPResponseCode to get_http_response_code like it should.
回答 10
有趣的是:
>>>def un_camel(input):... output =[input[0].lower()]...for c in input[1:]:...if c in('ABCDEFGHIJKLMNOPQRSTUVWXYZ'):... output.append('_')... output.append(c.lower())...else:... output.append(c)...return str.join('', output)...>>> un_camel("camel_case")'camel_case'>>> un_camel("CamelCase")'camel_case'
或者,更多乐趣在于:
>>> un_camel =lambda i: i[0].lower()+ str.join('',("_"+ c.lower()if c in"ABCDEFGHIJKLMNOPQRSTUVWXYZ"else c for c in i[1:]))>>> un_camel("camel_case")'camel_case'>>> un_camel("CamelCase")'camel_case'
>>> def un_camel(input):
... output = [input[0].lower()]
... for c in input[1:]:
... if c in ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
... output.append('_')
... output.append(c.lower())
... else:
... output.append(c)
... return str.join('', output)
...
>>> un_camel("camel_case")
'camel_case'
>>> un_camel("CamelCase")
'camel_case'
Or, more for the fun of it:
>>> un_camel = lambda i: i[0].lower() + str.join('', ("_" + c.lower() if c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" else c for c in i[1:]))
>>> un_camel("camel_case")
'camel_case'
>>> un_camel("CamelCase")
'camel_case'
回答 11
使用正则表达式可能是最短的,但是此解决方案更具可读性:
def to_snake_case(s):
snake ="".join(["_"+c.lower()if c.isupper()else c for c in s])return snake[1:]if snake.startswith("_")else snake
Using regexes may be the shortest, but this solution is way more readable:
def to_snake_case(s):
snake = "".join(["_"+c.lower() if c.isupper() else c for c in s])
return snake[1:] if snake.startswith("_") else snake
回答 12
如此众多的复杂方法…只需找到所有“ Titled”组并将其小写变体加下划线即可。
>>>import re
>>>def camel_to_snake(string):... groups = re.findall('([A-z0-9][a-z]*)', string)...return'_'.join([i.lower()for i in groups])...>>> camel_to_snake('ABCPingPongByTheWay2KWhereIsOurBorderlands3???')'a_b_c_ping_pong_by_the_way_2_k_where_is_our_borderlands_3'
So many complicated methods… Just find all “Titled” group and join its lower cased variant with underscore.
>>> import re
>>> def camel_to_snake(string):
... groups = re.findall('([A-z0-9][a-z]*)', string)
... return '_'.join([i.lower() for i in groups])
...
>>> camel_to_snake('ABCPingPongByTheWay2KWhereIsOurBorderlands3???')
'a_b_c_ping_pong_by_the_way_2_k_where_is_our_borderlands_3'
If you don’t want make numbers like first character of group or separate group – you can use ([A-z][a-z0-9]*) mask.
def splitSymbol(s):
si, ci, state =0,0,0# start_index, current_index '''
state bits:
0: no yields
1: lower yields
2: lower yields - 1
4: upper yields
8: digit yields
16: other yields
32 : upper sequence mark
'''for c in s:if c.islower():if state &1:yield s[si:ci]
si = ci
elif state &2:yield s[si:ci -1]
si = ci -1
state =4|8|16
ci +=1elif c.isupper():if state &4:yield s[si:ci]
si = ci
if state &32:
state =2|8|16|32else:
state =8|16|32
ci +=1elif c.isdigit():if state &8:yield s[si:ci]
si = ci
state =1|4|16
ci +=1else:if state &16:yield s[si:ci]
state =0
ci +=1# eat ci
si = ci
print(' : ', c, bin(state))if state:yield s[si:ci]def camelcaseToUnderscore(s):return'_'.join(splitSymbol(s))
This is not a elegant method, is a very ‘low level’ implementation of a simple state machine (bitfield state machine), possibly the most anti pythonic mode to resolve this, however re module also implements a too complex state machine to resolve this simple task, so i think this is a good solution.
def splitSymbol(s):
si, ci, state = 0, 0, 0 # start_index, current_index
'''
state bits:
0: no yields
1: lower yields
2: lower yields - 1
4: upper yields
8: digit yields
16: other yields
32 : upper sequence mark
'''
for c in s:
if c.islower():
if state & 1:
yield s[si:ci]
si = ci
elif state & 2:
yield s[si:ci - 1]
si = ci - 1
state = 4 | 8 | 16
ci += 1
elif c.isupper():
if state & 4:
yield s[si:ci]
si = ci
if state & 32:
state = 2 | 8 | 16 | 32
else:
state = 8 | 16 | 32
ci += 1
elif c.isdigit():
if state & 8:
yield s[si:ci]
si = ci
state = 1 | 4 | 16
ci += 1
else:
if state & 16:
yield s[si:ci]
state = 0
ci += 1 # eat ci
si = ci
print(' : ', c, bin(state))
if state:
yield s[si:ci]
def camelcaseToUnderscore(s):
return '_'.join(splitSymbol(s))
splitsymbol can parses all case types: UpperSEQUENCEInterleaved, under_score, BIG_SYMBOLS and cammelCasedMethods
def uncamelize(s):
buff, l = '', []
for ltr in s:
if ltr.isupper():
if buff:
l.append(buff)
buff = ''
buff += ltr
l.append(buff)
return '_'.join(l).lower()
import re
def convert(name):
return re.sub(r'([A-Z]*)([A-Z][a-z]+)', lambda x: (x.group(1) + '_' if x.group(1) else '') + x.group(2) + '_', name).rstrip('_').lower()
We look for capital letters that are precedeed by any number of (or zero) capital letters, and followed by any number of lowercase characters.
An underscore is placed just before the occurence of the last capital letter found in the group, and one can be placed before that capital letter in case it is preceded by other capital letters.
If there are trailing underscores, remove those.
Finally, the whole result string is changed to lower case.
EDIT: It should also be pretty easy to see that there’s room for improvement for cases like “Test”, because the underscore is unconditionally inserted.
Here’s something I did to change the headers on a tab-delimited file. I’m omitting the part where I only edited the first line of the file. You could adapt it to Python pretty easily with the re library. This also includes separating out numbers (but keeps the digits together). I did it in two steps because that was easier than telling it not to put an underscore at the start of a line or tab.
Step One…find uppercase letters or integers preceded by lowercase letters, and precede them with an underscore:
Search:
([a-z]+)([A-Z]|[0-9]+)
Replacement:
\1_\l\2/
Step Two…take the above and run it again to convert all caps to lowercase:
Concise without regular expressions, but HTTPResponseCode=> httpresponse_code:
def from_camel(name):
"""
ThisIsCamelCase ==> this_is_camel_case
"""
name = name.replace("_", "")
_cas = lambda _x : [_i.isupper() for _i in _x]
seq = zip(_cas(name[1:-1]), _cas(name[2:]))
ss = [_x + 1 for _x, (_i, _j) in enumerate(seq) if (_i, _j) == (False, True)]
return "".join([ch + "_" if _x in ss else ch for _x, ch in numerate(name.lower())])
def camelify(out):
return (''.join(["_"+x.lower() if i<len(out)-1 and x.isupper() and out[i+1].islower()
else x.lower()+"_" if i<len(out)-1 and x.islower() and out[i+1].isupper()
else x.lower() for i,x in enumerate(list(out))])).lstrip('_').replace('__','_')
import re
def camelcase_to_underscore(s):return re.sub(r'(^|[a-z])([A-Z])',lambda m:'_'.join([i.lower()for i in m.groups()if i]),
s)
如果您愿意的话,显然可以对速度进行一点点优化。
import re
CC2US_RE = re.compile(r'(^|[a-z])([A-Z])')def _replace(match):return'_'.join([i.lower()for i in match.groups()if i])def camelcase_to_underscores(s):return CC2US_RE.sub(_replace, s)
import re
def camelcase_to_underscore(s):
return re.sub(r'(^|[a-z])([A-Z])',
lambda m: '_'.join([i.lower() for i in m.groups() if i]),
s)
This could obviously be optimized for speed a tiny bit if you want to.
import re
CC2US_RE = re.compile(r'(^|[a-z])([A-Z])')
def _replace(match):
return '_'.join([i.lower() for i in match.groups() if i])
def camelcase_to_underscores(s):
return CC2US_RE.sub(_replace, s)
回答 28
def convert(camel_str):
temp_list =[]for letter in camel_str:if letter.islower():
temp_list.append(letter)else:
temp_list.append('_')
temp_list.append(letter)
result ="".join(temp_list)return result.lower()
def convert(camel_str):
temp_list = []
for letter in camel_str:
if letter.islower():
temp_list.append(letter)
else:
temp_list.append('_')
temp_list.append(letter)
result = "".join(temp_list)
return result.lower()