1). solr 웹 루트 경로의 라이브러리 폴더에 다음 jar 파일을 복사하여 넣는다.
solr 웹 루트 라이브러리 경로: /BiO/program/solr/current/server/solr-webapp/webapp/WEB-INF/lib
복사 jar 파일
- solr-dataimporthandler-7.4.0.jar
- solr-dataimporthandler-extras-7.4.0.jar
- mysql-connector-java-5.1.38-bin.jar
2). data-config.xml 작성
[MySQL 데이터 연결]
<?xml version="1.0" encoding="UTF-8" ?>
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://192.168.***.***:3306/ksso"
user="***" password="***" name="ksso"/>
<document name="headwords">
<entity pk="user_id" name="ksso_user" dataSource="ksso" query="select * from ksso.kuser" transformer="RegexTransformer">
<field column="user_name" name="user_name" />
<field column="user_id" name="user_id" />
<field column="organization" name="organization" />
<field column="position" name="position" />
<field column="email_adress" name="email_adress" />
<field column="regdate" name="regdate" />
<field column="last_login" name="last_login" />
<field column="identity_number" name="identity_number" />
</entity>
</document>
</dataConfig>
[파일 데이터 연결]
<dataConfig>
<dataSource name="ds1" type="FileDataSource" />
<document>
<entity name="ngrams" processor="LineEntityProcessor" url="E:/Projects/Data/words-txt.csv" dataSource="ds1" transformer="RegexTransformer">
<field column="rawLine" regex="^"(.*)"\t(.*)$" groupNames="name,count"/>
</entity>
</document>
</dataConfig>
3). solrconfig.xml 설정
[라이브러리 추가]
<lib dir="${solr.install.dir:../../../..}/contrib/dataimporthandler/lib" regex=".*\.jar" />
<lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-\d.*\.jar" />
[HDFS 연결 설정]
<directoryFactory name="DirectoryFactory" class="solr.HdfsDirectoryFactory">
<str name="solr.hdfs.home">hdfs://localhost:9000/solr</str>
<bool name="solr.hdfs.blockcache.enabled">true</bool>
<int name="solr.hdfs.blockcache.slab.count">1</int>
<bool name="solr.hdfs.blockcache.direct.memory.allocation">true</bool>
<int name="solr.hdfs.blockcache.blocksperbank">16384</int>
<bool name="solr.hdfs.blockcache.read.enabled">true</bool>
<bool name="solr.hdfs.nrtcachingdirectory.enable">true</bool>
<int name="solr.hdfs.nrtcachingdirectory.maxmergesizemb">16</int>
<int name="solr.hdfs.nrtcachingdirectory.maxcachedmb">192</int>
</directoryFactory>
[데이터 핸들러 설정]
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
</requestHandler>
3). managed-schema 설정
[Solr 구동 시 HDFS 연결 설정을 위하여 solr.in.sh 파일 설정 추가]
SOLR_OPTS="$SOLR_OPTS -Dsolr.autoSoftCommit.maxTime=3000"
SOLR_OPTS="$SOLR_OPTS -Dsolr.autoCommit.maxTime=60000"
SOLR_OPTS="$SOLR_OPTS -Dsolr.clustering.enabled=true"
SOLR_OPTS="$SOLR_OPTS -Dsolr.directoryFactory=HdfsDirectoryFactory"
SOLR_OPTS="$SOLR_OPTS -Dsolr.lock.type=hdfs"
SOLR_OPTS="$SOLR_OPTS -Dsolr.hdfs.home=hdfs://localhost:9000/solr"
SOLR_OPTS="$SOLR_OPTS -Dsolr.updatelog=hdfs://localhost:9000/solr/log"
SOLR_OPTS="$SOLR_OPTS -Dsolr.hdfs.confdir=/BiO/program/hadoop/current/conf"
4). managed-schema 설정
<fields>
<field name="user_name" type="text_ko" indexed="true" stored="true"/>
<field name="user_id" type="string" indexed="true" stored="true" multiValued="false" />
<field name="organization" type="text_ko" indexed="true" stored="true"/>
<field name="position" type="text_ko" indexed="true" stored="true"/>
<field name="email_adress" type="string" indexed="true" stored="true"/>
<field name="regdate" type="string" indexed="true" stored="true"/>
<field name="last_login" type="string" indexed="true" stored="true"/>
<field name="identity_number" type="string" indexed="true" stored="true"/>
</fields>
<uniqueKey>user_id</uniqueKey>
Korean BioInformation Center(KOBIC) Korea Research Institute of Bioscience & Biotechnology Address: #52 Eoeun-dong, Yuseong-gu, Deajeon, 305-806, KOREA +82-10-9936-2261 e-mail: kogun82@kribb.re.kr Blog: kogun82.tistory.com Homepage: www.kobic.re.kr
포스팅이 좋았다면 "좋아요❤️" 또는 "구독👍🏻" 해주세요!