본문 바로가기

[IT/Programming]/Algorithm/Database

Regular Expression (정규 표현식), and match/replace method in JavaScript, JAVA, and Python

반응형
# Regular Expression (정규 표현식), and match/replace method in JavaScript, JAVA, and Python 정규 표현식 테스트. replace, match, split 등에 쓰임. 꽤나 유용. 새로운 프로그래밍 언어(?)를 만들때에도 필수적으로 쓰일듯한. JavaScript 말고도 대부분의 언어 (Java, C, C++, LabVIEW, PHP, Perl 등등등) 에서 이런게 다들 비슷하게 구현되어 있음. 그런데 비슷하긴한데, 다들 조금씩은 달라서 헷갈리는게 많긴 한듯. 특히 JAVA, Python 이... JavaScript 가 제일 깔끔한듯한 느낌적인 느낌. ## PH
  • 2024-02-24 : Python 추가.
  • 2024-01-07 : Small edit. (?:)
  • 2015-12-15 : ref 하나 추가. To SEE (version up).
  • 2015-01-20 : First posting. (이 이전에 작성한거 같긴 한데... 정확한 날짜 찾기 귀찮.)
## TOC ## JavaScript ### match, replace




### exec, replace

```[.linenums.lang-html#pre-code-regex1]
```/



#### Example



### 캡처하지 않는 그룹화 (?:), Lookahead (?=), Negative Lookahead (?!), Lookbehind (?<=), Negative Lookbehind (?<!) ```[.linenums.lang-html#pre-code-regex-not-group] ```/ ## JAVA ```[.linenums.lang-java] import java.util.regex.Pattern; import java.util.regex.Matcher; public class RegExExample { public static void main(String... args) { Pattern p=Pattern.compile("a*b"); Matcher m=p.matcher("aaaaab"); boolean b=m.matches(); System.out.println(b); String fileStr="This is [--something--] stupid."; Pattern ptnReplacer=Pattern.compile("\\[--[\\s\\S]+?--\\]"); Matcher matchReplacer=ptnReplacer.matcher(fileStr); int start=0; while (start<fileStr.length()) { if (matchReplacer.find(start)) { System.out.println( "before : "+fileStr.substring(start, matchReplacer.start()) ); System.out.println( "matched : "+matchReplacer.group() ); start=matchReplacer.end(); } else { System.out.println( "before : "+fileStr.substring(start) ); System.out.println( "No match." ); start=fileStr.length(); } } // replaced=replaced.replaceAll(Pattern.quote(langMap.get(k,0)), Matcher.quoteReplacement(langMap.get(k,i))); } } ```/ ### FileMap.java ```[.scrollable.lang-java] package recoeve.db; import java.lang.StringBuilder; import java.util.Set; import java.util.HashSet; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.File; import java.io.FileReader; import java.io.IOException; public class FileMap { private static final String[] referersAllowed={ "localhost" , "recoeve.net" // , "www.recoeve.net" // , "127.0.0.1" // , "172.30.1.18" , "kipid.tistory.com", "tistory1.daumcdn.net" }; private static final String filePath="C:/Recoeve/CDN/"; private static final String[] fileNames={ "favicon.ico" , "jquery.js" , "link.png", "icon-Twitter.png", "icon-Facebook.png", "icon-Kakao.png", "icon-Recoeve.png", "icon-Whatsapp.png" , "cc.png", "by.png", "nc.png", "nd.png" , "docuK-2.3.css", "docuK-prepare-2.3.js", "docuK-postProcess-2.3.js" , "right-to-commit-suicide.html" }; private static final String txtFilePath="C:/Recoeve/sources/recoeve/db/html/"; private static final String[] txtFileNames={ "jquery.js", "prepare.js" , "robots.txt" , "log-in.html", "verify.html", "changePwd.html", "log-out.html" , "user-page.html", "remember-me.html" }; private static final int txtFileMapSize=100; private static final int fileLangMapSize=100; // # of languages translated to support. public static Set<String> refererSet; public static Map<String, String> fileMap; // fileMap.get("fileName") public static Map<String, Map<String, String>> txtFileMap; // txtFileMap.get("txtFileName").get("lang") public static StrArray langMap; public static final Pattern ptnReplacer=Pattern.compile("\\[--[\\s\\S]+?--\\]"); static { refererSet=new HashSet<String>(); for (String referer: referersAllowed) { refererSet.add(referer); } fileMap=new HashMap<String, String>(); for (String fileName: fileNames) { fileMap.put(fileName, filePath+fileName); } txtFileMap=new HashMap<String, Map<String, String>>(txtFileMapSize); File file=null; String fileStr=null; file=new File(txtFilePath+"lang.txt"); if (file.exists()) { try { StringBuilder sb=new StringBuilder(); int ch; FileReader reader=new FileReader(file); while((ch=reader.read())!=-1) { sb.append((char)ch); } reader.close(); fileStr=sb.toString(); } catch (IOException e) { System.out.println(e); } finally { file=null; } } langMap=new StrArray(fileStr, true, true); // System.out.println(langMap); fileStr=null; for (String txtFileName: txtFileNames) { file=new File(txtFilePath+txtFileName); if (file.exists()) { try { StringBuilder sb=new StringBuilder(); int ch; FileReader reader=new FileReader(file); while((ch=reader.read())!=-1) { sb.append((char)ch); } reader.close(); fileStr=sb.toString(); } catch (IOException e) { System.out.println(e); } finally { file=null; } } if (fileStr!=null) { txtFileMap.put(txtFileName, new HashMap<String, String>(fileLangMapSize)); Map<String, String> fileLangMap=txtFileMap.get(txtFileName); fileLangMap.put("df", fileStr); // default. ArrayList<String> strList=strToList(fileStr); if (strList.size()>1) { int colSize=langMap.getColSizeAtRow(0); for (int k=2;k<colSize;k++) { String lang=langMap.get(0,k); if (!lang.equals("desc")) { fileLangMap.put(lang, replaceStr(strList, lang)); // after replacing langMap. } } } fileStr=null; } } } public FileMap() {} public static boolean refererAllowed(String host) { // return refererSet.contains(host); return true; } public static String getCDNFile(String fileName) { return fileMap.get(fileName); } public static ArrayList<String> strToList(String fileStr) { if (fileStr==null) { return null; } int start=0; Matcher matchReplacer=ptnReplacer.matcher(fileStr); ArrayList<String> strList=new ArrayList<String>(); while (start<fileStr.length()) { if (matchReplacer.find(start)) { strList.add(fileStr.substring(start, matchReplacer.start())); strList.add(matchReplacer.group()); start=matchReplacer.end(); } else { strList.add(fileStr.substring(start)); start=fileStr.length(); } } return strList; } public static String replaceStr(ArrayList<String> strList, String lang) { String strReplaced=""; String replaced=null; for (int i=0;i<strList.size();i++) { if (i%2==0) { strReplaced+=strList.get(i); } else { replaced=langMap.get(strList.get(i), lang); if (replaced==null||replaced.isEmpty()||replaced.equals("-")) { replaced=langMap.get(strList.get(i), "en"); // "en" is default lang. } if (replaced==null) { replaced=strList.get(i); } strReplaced+=replaced; } } return strReplaced; } public static String replaceStr(String str, String lang) { return replaceStr(strToList(str), lang); } public static String get(String txtFileName, String lang) { Map<String, String> fileLangMap=txtFileMap.get(txtFileName); if (fileLangMap==null) {return null;} String res=fileLangMap.get(lang); if (res==null) { res=fileLangMap.get("df"); } return res; } public static void main(String... args) { System.out.println(FileMap.replaceStr("[--Reco--] [--Edit--]", "ko")); // System.out.println(FileMap.refererAllowed("localhost")); // System.out.println(Pattern.quote("[a-d]")); } } ```/ ### FileMapWithVar.java ```[.scrollable.lang-java] package recoeve.db; import java.lang.StringBuilder; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.File; import java.io.FileReader; import java.io.IOException; public class FileMapWithVar { private static String filePath="C:/Recoeve/sources/recoeve/db/html/"; private static final String[] fileNames={ "user-page.html" , "signed-up.html" , "reco.html" , "multireco.html" // , "redirect.html", "remember-me.html" }; private static final int fileMapSize=50; private static final int fileLangMapSize=10; public static Map<String, Map<String, ArrayList<String>>> fileMap; // fileMap.get("fileName").get("lang") public static final Pattern ptnReplacer=Pattern.compile("\\[--[\\s\\S]+?--\\]"); public static final Pattern ptnVariable=Pattern.compile("\\{--[\\s\\S]+?--\\}"); static { fileMap=new HashMap<String, Map<String, ArrayList<String>>>(fileMapSize); File file=null; String fileStr=null; file=new File(filePath+"lang.txt"); if (file.exists()) { try { StringBuilder sb=new StringBuilder(); int ch; FileReader reader=new FileReader(file); while((ch=reader.read())!=-1) { sb.append((char)ch); } reader.close(); fileStr=sb.toString(); } catch (IOException e) { System.out.println(e); } finally { file=null; } } StrArray langMap=new StrArray(fileStr, true, true); // System.out.println(langMap); fileStr=null; for (String fileName: fileNames) { file=new File(filePath+fileName); if (file.exists()) { try { StringBuilder sb=new StringBuilder(); int ch; FileReader reader=new FileReader(file); while((ch=reader.read())!=-1) { sb.append((char)ch); } reader.close(); fileStr=sb.toString(); } catch (IOException e) { System.out.println(e); } finally { file=null; } } if (fileStr!=null) { // System.out.println("\nfileName : "+fileName); fileMap.put(fileName, new HashMap<String, ArrayList<String>>(fileLangMapSize)); Map<String, ArrayList<String>> fileLangMap=fileMap.get(fileName); ArrayList<String> strListVars=new ArrayList<String>(); Matcher matchVariable=ptnVariable.matcher(fileStr); // default int start=0; while (start<fileStr.length()) { if (matchVariable.find(start)) { strListVars.add(fileStr.substring(start, matchVariable.start())); strListVars.add(matchVariable.group()); start=matchVariable.end(); } else { strListVars.add(fileStr.substring(start)); start=fileStr.length(); } } fileLangMap.put("df", strListVars); // default. ArrayList<String> strList=new ArrayList<String>(); Matcher matchReplacer=ptnReplacer.matcher(fileStr); start=0; while (start<fileStr.length()) { if (matchReplacer.find(start)) { strList.add(fileStr.substring(start, matchReplacer.start())); strList.add(matchReplacer.group()); start=matchReplacer.end(); } else { strList.add(fileStr.substring(start)); start=fileStr.length(); } } if (strList.size()>1) { int colSize=langMap.getColSizeAtRow(0); for (int k=2;k<colSize;k++) { String lang=langMap.get(0,k); if (!lang.equals("desc")) { String strReplaced=""; String replaced=null; for (int i=0;i<strList.size();i++) { if (i%2==0) { strReplaced+=strList.get(i); } else { replaced=langMap.get(strList.get(i), lang); if (replaced==null||replaced.isEmpty()||replaced.equals("-")) { replaced=langMap.get(strList.get(i), "en"); // "en" is default lang. } if (replaced==null) { replaced=strList.get(i); } strReplaced+=replaced; } } strListVars=new ArrayList<String>(); matchVariable=ptnVariable.matcher(strReplaced); // [--lang--] replaced start=0; while (start<strReplaced.length()) { if (matchVariable.find(start)) { strListVars.add(strReplaced.substring(start, matchVariable.start())); strListVars.add(matchVariable.group()); start=matchVariable.end(); } else { strListVars.add(strReplaced.substring(start)); start=strReplaced.length(); } } fileLangMap.put(lang, strListVars); // after replacing langMap. } } } fileStr=null; } } } public FileMapWithVar() {} public static String get(String fileName, String lang, Map<String,String> varMap) { Map<String, ArrayList<String>> fileLangMap=fileMap.get(fileName); if (fileLangMap==null) {return null;} ArrayList<String> strList=fileLangMap.get(lang); if (strList==null) { strList=fileLangMap.get("df"); } String res=""; String replaced=null; for (int i=0;i<strList.size();i++) { if (i%2==0) { res+=strList.get(i); } else { replaced=varMap.get(strList.get(i)); if (replaced==null) { replaced=strList.get(i); } res+=replaced; } } return res; } public static void main(String... args) { Map<String,String> varMap=new HashMap<String,String>(); varMap.put("{--userIndex--}", "10000"); varMap.put("{--userId--}", "id"); varMap.put("{--user email--}", "id@email.com"); // System.out.println(FileMapWithVar.get("multireco.html", "df", null)); } } ```/ ### Recoeve.net - Lang.txt 출처: Google docu :: Recoeve.net - Lang.txt
## Python ```[.scrollable.lang-py] # Regular Expressions in Python import re pattern = re.compile("^[A-Z]+$") print(pattern.search("Hello World")) print(pattern.search("HELLO WORLD")) print(pattern.search("HELLOWORLD")) match = pattern.search("HELLOWORLD") print(match) # <re.Match object; span=(0, 10), match='HELLOWORLD'> print(match.group(0)) # HELLOWORLD # print(match.group(1)) # IndexError: no such group print() pattern = re.compile("[A-Z]{3}") matches = pattern.finditer("HELLOWORLD") print(matches) # <callable_iterator object at 0x000001CBF4327E20> print() for m in matches: print(m) """ <re.Match object; span=(0, 3), match='HEL'> <re.Match object; span=(3, 6), match='LOW'> <re.Match object; span=(6, 9), match='ORL'> """ print() findall = pattern.findall("HELLOWORLD") print(findall) # ['HEL', 'LOW', 'ORL'] pattern1 = re.compile("[^\\w]+") findall = pattern1.findall("This: is a pencil.") print(findall) # [': ', ' ', ' ', '.'] str = "This: is a pencil_." strReplaced = re.sub(r"[^\w]+", "", str).lower() # \w = [a-zA-Z0-9_] print(strReplaced) # thisisapencil_ print(strReplaced[::-1]) # _licnepasisiht print() p = "a***" p = re.sub(r"\*+", "*", p) print(p) # a* ```/ ## RRA

    Regular Expression

  1. Wiki - Regular Expressions (정규 표현식)
  2. opentutorials.org - 정규 표현식
  3. www.regexper.com; 정규 표현식을 시각화해서 보여주는 도구
  4. JavaScript

  5. inpa.tistory.com :: 📚 JavaScript 정규 표현식 문법 총정리 + 응용 예제
  6. MDN - JavaScript Guide - Regular Expressions; and MDN - RegExp.exec()
  7. w3schools.com - JavaScript RegExp Object
  8. Stackoverflow - How do you pass a variable to a Regular Expression JavaScript?, 2009-01-30, asked by JC Grubbs
  9. JAVA

  10. JAVA SE 8 API - Class java.util.regex.Pattern
  11. JAVA SE 8 API - Class java.util.regex.Matcher
반응형