|
|
|
One thread in:
Thread[JMS SessionPool Worker-5056,5,ASF Session Pool Threads] java.net.SocketInputStream.socketRead0(Native Method) java.net.SocketInputStream.read(Unknown Source) org.postgresql.core.VisibleBufferedInputStream.readMore(VisibleBufferedInputStream.java:135) org.postgresql.core.VisibleBufferedInputStream.ensureBytes(VisibleBufferedInputStream.java:104) org.postgresql.core.VisibleBufferedInputStream.read(VisibleBufferedInputStream.java:73) org.postgresql.core.PGStream.ReceiveChar(PGStream.java:255) org.postgresql.core.v2.QueryExecutorImpl.processResults(QueryExecutorImpl.java:408) org.postgresql.core.v2.QueryExecutorImpl.execute(QueryExecutorImpl.java:364) org.postgresql.core.v2.QueryExecutorImpl.execute(QueryExecutorImpl.java:291) org.postgresql.jdbc2.AbstractJdbc2Statement.executeBatch(AbstractJdbc2Statement.java:2592) org.jboss.resource.adapter.jdbc.CachedPreparedStatement.executeBatch(CachedPreparedStatement.java:461) org.jboss.resource.adapter.jdbc.WrappedStatement.executeBatch(WrappedStatement.java:487) org.hibernate.jdbc.BatchingBatcher.doExecuteBatch(BatchingBatcher.java:48) org.hibernate.jdbc.AbstractBatcher.executeBatch(AbstractBatcher.java:246) org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:235) org.hibernate.engine.ActionQueue.executeActions(ActionQueue.java:140) org.hibernate.event.def.AbstractFlushingEventListener.performExecutions(AbstractFlushingEventListener.java:298) org.hibernate.event.def.DefaultFlushEventListener.onFlush(DefaultFlushEventListener.java:27) org.hibernate.impl.SessionImpl.flush(SessionImpl.java:1000) org.hyperic.hq.events.server.session.AlertDefinitionManagerEJBImpl.updateAlertDefinitionInternalEnableForceNewTxn(AlertDefinitionManagerEJBImpl.java:502) sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) java.lang.reflect.Method.invoke(Unknown Source) org.jboss.invocation.Invocation.performCall(Invocation.java:345) org.jboss.ejb.StatelessSessionContainer$ContainerInterceptor.invoke(StatelessSessionContainer.java:214) org.jboss.resource.connectionmanager.CachedConnectionInterceptor.invoke(CachedConnectionInterceptor.java:149) org.jboss.ejb.plugins.StatelessSessionInstanceInterceptor.invoke(StatelessSessionInstanceInterceptor.java:154) org.jboss.webservice.server.ServiceEndpointInterceptor.invoke(ServiceEndpointInterceptor.java:54) org.jboss.ejb.plugins.CallValidationInterceptor.invoke(CallValidationInterceptor.java:48) org.hyperic.hq.application.HQApp$Snatcher.invokeNextBoth(HQApp.java:416) org.hyperic.hq.application.HQApp$Snatcher.invokeNext(HQApp.java:468) org.hyperic.txsnatch.TxSnatch.invoke(TxSnatch.java:71) org.jboss.ejb.plugins.AbstractTxInterceptor.invokeNext(AbstractTxInterceptor.java:106) org.jboss.ejb.plugins.TxInterceptorCMT.runWithTransactions(TxInterceptorCMT.java:389) org.jboss.ejb.plugins.TxInterceptorCMT.invoke(TxInterceptorCMT.java:166) org.hyperic.hq.application.HQApp$Snatcher.invokeNextBoth(HQApp.java:416) org.hyperic.hq.application.HQApp$Snatcher.invokeNext(HQApp.java:468) org.hyperic.txsnatch.TxSnatch.invoke(TxSnatch.java:71) org.jboss.ejb.plugins.SecurityInterceptor.invoke(SecurityInterceptor.java:153) org.jboss.ejb.plugins.LogInterceptor.invoke(LogInterceptor.java:192) org.jboss.ejb.plugins.ProxyFactoryFinderInterceptor.invoke(ProxyFactoryFinderInterceptor.java:122) org.jboss.ejb.SessionContainer.internalInvoke(SessionContainer.java:624) org.jboss.ejb.Container.invoke(Container.java:873) org.jboss.ejb.plugins.local.BaseLocalProxyFactory.invoke(BaseLocalProxyFactory.java:415) org.jboss.ejb.plugins.local.StatelessSessionProxy.invoke(StatelessSessionProxy.java:88) $Proxy301.updateAlertDefinitionInternalEnableForceNewTxn(Unknown Source) com.hyperic.hq.bizapp.server.action.alert.EnableAlertDefAction.enableAlertDefinitionNow(EnableAlertDefAction.java:143) com.hyperic.hq.bizapp.server.action.alert.EnableAlertDefAction.onFailure(EnableAlertDefAction.java:129) org.hyperic.hq.events.ext.AbstractTrigger.invokeTriggerFireActionsFailureHandlers(AbstractTrigger.java:213) org.hyperic.hq.events.ext.AbstractTrigger.fireActions(AbstractTrigger.java:187) org.hyperic.hq.bizapp.server.trigger.conditional.MultiConditionTrigger.processEvent(MultiConditionTrigger.java:360) I believe the problem is in the AbstractTrigger where every time an event is processed by a trigger, a TriggerFireActionsFailureHandler is added as a post commit listener. Adding a post commit listener flushes the session, an operation that is inherently slow. When an exception occurs flushing that session (as we see in the stack trace above), the flush is very slow. This cause all threads attempting to acquired the shared lock to pile up.
The solution should be 2 fold: 1) Only register to invoke the TriggerFireActionsFailureHandlers for each recovery alert in the batch after all messages in the batch have been processed. There will only be a one time session flush hit here. Also, make sure if this handler executes, it occurs in its own new session and txn. 2) The exclusive lock held by the multicondition trigger should be released before the actions are executed. There's no need to hold onto this lock once the trigger has decided that it's time to fire. The scope of the exclusive lock held by the multi condition trigger was reduced such that the lock is released before the actions are executed.
Also, I removed the TriggerFireActionsFailureHandler that is supposed to force a reenable of a primary alert if recovery alert processing fails. This feature did not work correctly and is slowing down the alert processing, causing these back ups. Instead, I've added a new HQ Health database query to return a resource alert definitions that are active but disabled. If recovery alert processing fails this query will help support determine which alert definitions need to be reactivated manually from the UI. Otherwise, if a primary alert is disabled and the recovery alert has already fired, the primary alert will never fire again (even if the triggering conditions are met). Looks good. I was able to reproduce large resource alert backup in jms queue before this fix. With 4000 active alerts, half of which are firing every 5 minutes, I saw jms queue always going up and reached over 19K in 12 hours.
With this fix the jms queue has not gone up beyond 380 and its coming back to low numbers soon after it goes up. Verified few alert definitions from HQ Health db query and they are disabled correctly. |
|||||||||||||||||||||||||||||||||||||||||||||
Thread[JMS SessionPool Worker-5153,5,ASF Session Pool Threads]
java.lang.Object.wait(Native Method)
java.lang.Object.wait(Unknown Source)
EDU.oswego.cs.dl.util.concurrent.WriterPreferenceReadWriteLock$ReaderLock.acquire(WriterPreferenceReadWriteLock.java:163)
org.hyperic.hq.bizapp.server.trigger.conditional.MultiConditionTrigger.acquireSharedLock(MultiConditionTrigger.java:134)
org.hyperic.hq.bizapp.server.mdb.RegisteredDispatcherEJBImpl.updateVisitedMCTriggersSet(RegisteredDispatcherEJBImpl.java:127)
org.hyperic.hq.bizapp.server.mdb.RegisteredDispatcherEJBImpl.dispatchEvent(RegisteredDispatcherEJBImpl.java:102)
org.hyperic.hq.bizapp.server.mdb.RegisteredDispatcherEJBImpl.onMessage(RegisteredDispatcherEJBImpl.java:160)
sun.reflect.GeneratedMethodAccessor758.invoke(Unknown Source)
sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
java.lang.reflect.Method.invoke(Unknown Source)
org.jboss.invocation.Invocation.performCall(Invocation.java:345)
org.jboss.ejb.MessageDrivenContainer$ContainerInterceptor.invoke(MessageDrivenContainer.java:475)
org.jboss.resource.connectionmanager.CachedConnectionInterceptor.invoke(CachedConnectionInterceptor.java:149)
org.jboss.ejb.plugins.MessageDrivenInstanceInterceptor.invoke(MessageDrivenInstanceInterceptor.java:101)
org.jboss.ejb.plugins.CallValidationInterceptor.invoke(CallValidationInterceptor.java:48)
org.hyperic.hq.application.HQApp$Snatcher.invokeNextBoth(HQApp.java:416)
org.hyperic.hq.application.HQApp$Snatcher.invokeNext(HQApp.java:468)
org.hyperic.txsnatch.TxSnatch.invoke(TxSnatch.java:71)
org.jboss.ejb.plugins.AbstractTxInterceptor.invokeNext(AbstractTxInterceptor.java:106)
org.jboss.ejb.plugins.TxInterceptorCMT.runWithTransactions(TxInterceptorCMT.java:335)
org.jboss.ejb.plugins.TxInterceptorCMT.invoke(TxInterceptorCMT.java:166)
org.hyperic.hq.application.HQApp$Snatcher.invokeNextBoth(HQApp.java:416)
org.hyperic.hq.application.HQApp$Snatcher.invokeNext(HQApp.java:468)
org.hyperic.txsnatch.TxSnatch.invoke(TxSnatch.java:71)
org.jboss.ejb.plugins.RunAsSecurityInterceptor.invoke(RunAsSecurityInterceptor.java:94)
org.jboss.ejb.plugins.LogInterceptor.invoke(LogInterceptor.java:192)
org.jboss.ejb.plugins.ProxyFactoryFinderInterceptor.invoke(ProxyFactoryFinderInterceptor.java:122)
org.jboss.ejb.MessageDrivenContainer.internalInvoke(MessageDrivenContainer.java:389)
org.jboss.ejb.Container.invoke(Container.java:873)
org.jboss.ejb.plugins.jms.JMSContainerInvoker.invoke(JMSContainerInvoker.java:1077)
org.jboss.ejb.plugins.jms.JMSContainerInvoker$MessageListenerImpl.onMessage(JMSContainerInvoker.java:1379)
org.jboss.jms.asf.StdServerSession.onMessage(StdServerSession.java:256)
org.jboss.mq.SpyMessageConsumer.sessionConsumerProcessMessage(SpyMessageConsumer.java:904)
org.jboss.mq.SpyMessageConsumer.addMessage(SpyMessageConsumer.java:160)
org.jboss.mq.SpySession.run(SpySession.java:333)
org.jboss.jms.asf.StdServerSession.run(StdServerSession.java:180)
EDU.oswego.cs.dl.util.concurrent.PooledExecutor$Worker.run(PooledExecutor.java:748)
java.lang.Thread.run(Unknown Source)